From 6e30a7c98a9fda2f894e970e9cd637657f39c59d Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 5 Jun 2024 20:13:15 +0200 Subject: [PATCH 001/655] locking/atomic/x86: Introduce the read64_nonatomic macro to x86_32 with cx8 As described in commit: e73c4e34a0e9 ("locking/atomic/x86: Introduce arch_atomic64_read_nonatomic() to x86_32") the value preload before the CMPXCHG loop does not need to be atomic. Introduce the read64_nonatomic assembly macro to load the value from a atomic_t location in a faster non-atomic way and use it in atomic64_cx8_32.S. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240605181424.3228-1-ubizjak@gmail.com --- arch/x86/lib/atomic64_cx8_32.S | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 90afb488b396..b2eff07d65e4 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -16,6 +16,11 @@ cmpxchg8b (\reg) .endm +.macro read64_nonatomic reg + movl (\reg), %eax + movl 4(\reg), %edx +.endm + SYM_FUNC_START(atomic64_read_cx8) read64 %ecx RET @@ -51,7 +56,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8) movl %edx, %edi movl %ecx, %ebp - read64 %ecx + read64_nonatomic %ecx 1: movl %eax, %ebx movl %edx, %ecx @@ -79,7 +84,7 @@ addsub_return sub sub sbb SYM_FUNC_START(atomic64_\func\()_return_cx8) pushl %ebx - read64 %esi + read64_nonatomic %esi 1: movl %eax, %ebx movl %edx, %ecx From dce2a224763ce968445e14c43b49321936309c75 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 5 Jun 2024 20:13:16 +0200 Subject: [PATCH 002/655] locking/atomic/x86: Redeclare x86_32 arch_atomic64_{add,sub}() as void Correct the return type of x86_32 arch_atomic64_add() and arch_atomic64_sub() functions to 'void' and remove redundant return. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240605181424.3228-2-ubizjak@gmail.com --- arch/x86/include/asm/atomic64_32.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 8db2ec4d6cda..1f650b4dde50 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -163,20 +163,18 @@ static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) } #define arch_atomic64_dec_return arch_atomic64_dec_return -static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { __alternative_atomic64(add, add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), ASM_NO_INPUT_CLOBBER("memory")); - return i; } -static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), ASM_NO_INPUT_CLOBBER("memory")); - return i; } static __always_inline void arch_atomic64_inc(atomic64_t *v) From 4ea5e9deda3f4cbd471d29e6e99106e51be19c86 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 12 Jul 2024 19:52:31 +0800 Subject: [PATCH 003/655] driver core: Fix size calculation of symlink name for devlink_(add|remove)_symlinks() devlink_(add|remove)_symlinks() kzalloc() memory to save symlink name for both supplier and consumer, but do not explicitly take into account consumer's prefix "consumer:", so cause disadvantages listed below: 1) it seems wrong for the algorithm to calculate memory size 2) readers maybe need to count characters one by one of both prefix strings to confirm calculated memory size 3) it is relatively easy to introduce new bug if either prefix string is modified in future solved by taking into account consumer's prefix as well. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240712-devlink_fix-v3-1-fa1c5172ffc7@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 730cae66607c..3e82eaba4932 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -572,7 +572,11 @@ static int devlink_add_symlinks(struct device *dev) len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)), strlen(dev_bus_name(con)) + strlen(dev_name(con))); len += strlen(":"); - len += strlen("supplier:") + 1; + /* + * we kzalloc() memory for symlink name of both supplier and + * consumer, so explicitly take into account both prefix. + */ + len += max(strlen("supplier:"), strlen("consumer:")) + 1; buf = kzalloc(len, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -623,7 +627,7 @@ static void devlink_remove_symlinks(struct device *dev) len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)), strlen(dev_bus_name(con)) + strlen(dev_name(con))); len += strlen(":"); - len += strlen("supplier:") + 1; + len += max(strlen("supplier:"), strlen("consumer:")) + 1; buf = kzalloc(len, GFP_KERNEL); if (!buf) { WARN(1, "Unable to properly free device link symlinks!\n"); From 6d8249ac29bc23260dfa9747eb398ce76012d73c Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 22 Jul 2024 22:48:10 +0800 Subject: [PATCH 004/655] driver core: Fix error handling in driver API device_rename() For class-device, device_rename() failure maybe cause unexpected link name within its class folder as explained below: /sys/class/.../old_name -> /sys/devices/.../old_name device_rename(..., new_name) and failed /sys/class/.../new_name -> /sys/devices/.../old_name Fixed by undoing renaming link if renaming kobject failed. Fixes: f349cf34731c ("driver core: Implement ns directory support for device classes.") Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240722-device_rename_fix-v2-1-77de1a6c6495@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 3e82eaba4932..72798133ed63 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4516,9 +4516,11 @@ EXPORT_SYMBOL_GPL(device_destroy); */ int device_rename(struct device *dev, const char *new_name) { + struct subsys_private *sp = NULL; struct kobject *kobj = &dev->kobj; char *old_device_name = NULL; int error; + bool is_link_renamed = false; dev = get_device(dev); if (!dev) @@ -4533,7 +4535,7 @@ int device_rename(struct device *dev, const char *new_name) } if (dev->class) { - struct subsys_private *sp = class_to_subsys(dev->class); + sp = class_to_subsys(dev->class); if (!sp) { error = -EINVAL; @@ -4542,16 +4544,19 @@ int device_rename(struct device *dev, const char *new_name) error = sysfs_rename_link_ns(&sp->subsys.kobj, kobj, old_device_name, new_name, kobject_namespace(kobj)); - subsys_put(sp); if (error) goto out; + + is_link_renamed = true; } error = kobject_rename(kobj, new_name); - if (error) - goto out; - out: + if (error && is_link_renamed) + sysfs_rename_link_ns(&sp->subsys.kobj, kobj, new_name, + old_device_name, kobject_namespace(kobj)); + subsys_put(sp); + put_device(dev); kfree(old_device_name); From c0fd973c108cdc22a384854bc4b3e288a9717bb2 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Wed, 24 Jul 2024 21:54:48 +0800 Subject: [PATCH 005/655] driver core: bus: Return -EIO instead of 0 when show/store invalid bus attribute Return -EIO instead of 0 for below erroneous bus attribute operations: - read a bus attribute without show(). - write a bus attribute without store(). Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240724-bus_fix-v2-1-5adbafc698fb@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index ffea0728b8b2..e5073fa82b95 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -152,7 +152,8 @@ static ssize_t bus_attr_show(struct kobject *kobj, struct attribute *attr, { struct bus_attribute *bus_attr = to_bus_attr(attr); struct subsys_private *subsys_priv = to_subsys_private(kobj); - ssize_t ret = 0; + /* return -EIO for reading a bus attribute without show() */ + ssize_t ret = -EIO; if (bus_attr->show) ret = bus_attr->show(subsys_priv->bus, buf); @@ -164,7 +165,8 @@ static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr, { struct bus_attribute *bus_attr = to_bus_attr(attr); struct subsys_private *subsys_priv = to_subsys_private(kobj); - ssize_t ret = 0; + /* return -EIO for writing a bus attribute without store() */ + ssize_t ret = -EIO; if (bus_attr->store) ret = bus_attr->store(subsys_priv->bus, buf, count); From 0314647dec70bf0e856303dc70d00e9f1ba568ba Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Thu, 25 Jul 2024 23:40:55 +0800 Subject: [PATCH 006/655] driver core: Remove unused parameter for virtual_device_parent() Function struct kobject *virtual_device_parent(struct device *dev) does not use its parameter @dev, and the kobject returned also has nothing deal with specific device, so remove the unused parameter. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240725-virtual_kobj_fix-v1-1-36335cae4544@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 2 +- drivers/base/bus.c | 2 +- drivers/base/core.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index 0b53593372d7..8cf04a557bdb 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -145,7 +145,7 @@ void auxiliary_bus_init(void); static inline void auxiliary_bus_init(void) { } #endif -struct kobject *virtual_device_parent(struct device *dev); +struct kobject *virtual_device_parent(void); int bus_add_device(struct device *dev); void bus_probe_device(struct device *dev); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index e5073fa82b95..8bf04d5ef51d 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -1296,7 +1296,7 @@ int subsys_virtual_register(const struct bus_type *subsys, { struct kobject *virtual_dir; - virtual_dir = virtual_device_parent(NULL); + virtual_dir = virtual_device_parent(); if (!virtual_dir) return -ENOMEM; diff --git a/drivers/base/core.c b/drivers/base/core.c index 72798133ed63..1688e76cb64b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3171,7 +3171,7 @@ void device_initialize(struct device *dev) } EXPORT_SYMBOL_GPL(device_initialize); -struct kobject *virtual_device_parent(struct device *dev) +struct kobject *virtual_device_parent(void) { static struct kobject *virtual_dir = NULL; @@ -3249,7 +3249,7 @@ static struct kobject *get_device_parent(struct device *dev, * in a "glue" directory to prevent namespace collisions. */ if (parent == NULL) - parent_kobj = virtual_device_parent(dev); + parent_kobj = virtual_device_parent(); else if (parent->class && !dev->class->ns_type) { subsys_put(sp); return &parent->kobj; From 2bdf3b83515ead3b3fdf93610e4a3bb9a89bc852 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Sat, 27 Jul 2024 14:08:34 +0800 Subject: [PATCH 007/655] driver core: bus: Add simple error handling for buses_init() Add simple error handling for buses_init() since it is easy to do. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240727-buses_init-v1-1-e863295a2c0e@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 8bf04d5ef51d..5bb1dca96242 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -1387,8 +1387,13 @@ int __init buses_init(void) return -ENOMEM; system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj); - if (!system_kset) + if (!system_kset) { + /* Do error handling here as devices_init() do */ + kset_unregister(bus_kset); + bus_kset = NULL; + pr_err("%s: failed to create and add kset 'bus'\n", __func__); return -ENOMEM; + } return 0; } From bfa54a793ba77ef696755b66f3ac4ed00c7d1248 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Sat, 27 Jul 2024 16:34:01 +0800 Subject: [PATCH 008/655] driver core: bus: Fix double free in driver API bus_register() For bus_register(), any error which happens after kset_register() will cause that @priv are freed twice, fixed by setting @priv with NULL after the first free. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240727-bus_register_fix-v1-1-fed8dd0dba7a@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 5bb1dca96242..abf090ace833 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -922,6 +922,8 @@ int bus_register(const struct bus_type *bus) bus_remove_file(bus, &bus_attr_uevent); bus_uevent_fail: kset_unregister(&priv->subsys); + /* Above kset_unregister() will kfree @priv */ + priv = NULL; out: kfree(priv); return retval; From 7886a61ebc1f3998df5950299cbe17272bf32c59 Mon Sep 17 00:00:00 2001 From: "Luis Claudio R. Goncalves" Date: Tue, 30 Jul 2024 14:45:47 -0300 Subject: [PATCH 009/655] lockdep: suggest the fix for "lockdep bfs error:-1" on print_bfs_bug When lockdep fails while performing the Breadth-first-search operation due to lack of memory, hint that increasing the value of the config switch LOCKDEP_CIRCULAR_QUEUE_BITS should fix the warning. Preface the scary backtrace with the suggestion: [ 163.849242] Increase LOCKDEP_CIRCULAR_QUEUE_BITS to avoid this warning: [ 163.849248] ------------[ cut here ]------------ [ 163.849250] lockdep bfs error:-1 [ 163.849263] WARNING: CPU: 24 PID: 2454 at kernel/locking/lockdep.c:2091 print_bfs_bug+0x27/0x40 ... Signed-off-by: Luis Claudio R. Goncalves Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Boqun Feng Reviewed-by: Waiman Long Link: https://lkml.kernel.org/r/Zqkmy0lS-9Sw0M9j@uudg.org --- kernel/locking/lockdep.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 726b22ce7d0b..fee21f3c3b99 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2067,6 +2067,9 @@ static noinline void print_bfs_bug(int ret) /* * Breadth-first-search failed, graph got corrupted? */ + if (ret == BFS_EQUEUEFULL) + pr_warn("Increase LOCKDEP_CIRCULAR_QUEUE_BITS to avoid this warning:\n"); + WARN(1, "lockdep bfs error:%d\n", ret); } From d5934e76316e84eced836b6b2bafae1837d1cd58 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Mar 2024 16:48:48 -0700 Subject: [PATCH 010/655] cleanup: Add usage and style documentation When proposing that PCI grow some new cleanup helpers for pci_dev_put() and pci_dev_{lock,unlock} [1], Bjorn had some fundamental questions about expectations and best practices. Upon reviewing an updated changelog with those details he recommended adding them to documentation in the header file itself. Add that documentation and link it into the rendering for Documentation/core-api/. Signed-off-by: Dan Williams Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Jonathan Cameron Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/171175585714.2192972.12661675876300167762.stgit@dwillia2-xfh.jf.intel.com --- Documentation/core-api/cleanup.rst | 8 ++ Documentation/core-api/index.rst | 1 + include/linux/cleanup.h | 136 +++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+) create mode 100644 Documentation/core-api/cleanup.rst diff --git a/Documentation/core-api/cleanup.rst b/Documentation/core-api/cleanup.rst new file mode 100644 index 000000000000..527eb2f8ec6e --- /dev/null +++ b/Documentation/core-api/cleanup.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================== +Scope-based Cleanup Helpers +=========================== + +.. kernel-doc:: include/linux/cleanup.h + :doc: scope-based cleanup helpers diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index f147854700e4..b99d2fb3e2f1 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -35,6 +35,7 @@ Library functionality that is used throughout the kernel. kobject kref + cleanup assoc_array xarray maple_tree diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index d9e613803df1..9c6b4f2c0176 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -4,6 +4,142 @@ #include +/** + * DOC: scope-based cleanup helpers + * + * The "goto error" pattern is notorious for introducing subtle resource + * leaks. It is tedious and error prone to add new resource acquisition + * constraints into code paths that already have several unwind + * conditions. The "cleanup" helpers enable the compiler to help with + * this tedium and can aid in maintaining LIFO (last in first out) + * unwind ordering to avoid unintentional leaks. + * + * As drivers make up the majority of the kernel code base, here is an + * example of using these helpers to clean up PCI drivers. The target of + * the cleanups are occasions where a goto is used to unwind a device + * reference (pci_dev_put()), or unlock the device (pci_dev_unlock()) + * before returning. + * + * The DEFINE_FREE() macro can arrange for PCI device references to be + * dropped when the associated variable goes out of scope:: + * + * DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T)) + * ... + * struct pci_dev *dev __free(pci_dev_put) = + * pci_get_slot(parent, PCI_DEVFN(0, 0)); + * + * The above will automatically call pci_dev_put() if @dev is non-NULL + * when @dev goes out of scope (automatic variable scope). If a function + * wants to invoke pci_dev_put() on error, but return @dev (i.e. without + * freeing it) on success, it can do:: + * + * return no_free_ptr(dev); + * + * ...or:: + * + * return_ptr(dev); + * + * The DEFINE_GUARD() macro can arrange for the PCI device lock to be + * dropped when the scope where guard() is invoked ends:: + * + * DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T)) + * ... + * guard(pci_dev)(dev); + * + * The lifetime of the lock obtained by the guard() helper follows the + * scope of automatic variable declaration. Take the following example:: + * + * func(...) + * { + * if (...) { + * ... + * guard(pci_dev)(dev); // pci_dev_lock() invoked here + * ... + * } // <- implied pci_dev_unlock() triggered here + * } + * + * Observe the lock is held for the remainder of the "if ()" block not + * the remainder of "func()". + * + * Now, when a function uses both __free() and guard(), or multiple + * instances of __free(), the LIFO order of variable definition order + * matters. GCC documentation says: + * + * "When multiple variables in the same scope have cleanup attributes, + * at exit from the scope their associated cleanup functions are run in + * reverse order of definition (last defined, first cleanup)." + * + * When the unwind order matters it requires that variables be defined + * mid-function scope rather than at the top of the file. Take the + * following example and notice the bug highlighted by "!!":: + * + * LIST_HEAD(list); + * DEFINE_MUTEX(lock); + * + * struct object { + * struct list_head node; + * }; + * + * static struct object *alloc_add(void) + * { + * struct object *obj; + * + * lockdep_assert_held(&lock); + * obj = kzalloc(sizeof(*obj), GFP_KERNEL); + * if (obj) { + * LIST_HEAD_INIT(&obj->node); + * list_add(obj->node, &list): + * } + * return obj; + * } + * + * static void remove_free(struct object *obj) + * { + * lockdep_assert_held(&lock); + * list_del(&obj->node); + * kfree(obj); + * } + * + * DEFINE_FREE(remove_free, struct object *, if (_T) remove_free(_T)) + * static int init(void) + * { + * struct object *obj __free(remove_free) = NULL; + * int err; + * + * guard(mutex)(&lock); + * obj = alloc_add(); + * + * if (!obj) + * return -ENOMEM; + * + * err = other_init(obj); + * if (err) + * return err; // remove_free() called without the lock!! + * + * no_free_ptr(obj); + * return 0; + * } + * + * That bug is fixed by changing init() to call guard() and define + + * initialize @obj in this order:: + * + * guard(mutex)(&lock); + * struct object *obj __free(remove_free) = alloc_add(); + * + * Given that the "__free(...) = NULL" pattern for variables defined at + * the top of the function poses this potential interdependency problem + * the recommendation is to always define and assign variables in one + * statement and not group variable definitions at the top of the + * function when __free() is used. + * + * Lastly, given that the benefit of cleanup helpers is removal of + * "goto", and that the "goto" statement can jump between scopes, the + * expectation is that usage of "goto" and cleanup helpers is never + * mixed in the same function. I.e. for a given routine, convert all + * resources that need a "goto" cleanup to scope-based cleanup, or + * convert none of them. + */ + /* * DEFINE_FREE(name, type, free): * simple helper macro that defines the required wrapper for a __free() From 13c267f0c27e35ee9372d3cf0dde1ea09db02f13 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 28 May 2024 14:00:09 +0200 Subject: [PATCH 011/655] lockdep: Use str_plural() to fix Coccinelle warning Fixes the following Coccinelle/coccicheck warning reported by string_choices.cocci: opportunity for str_plural(depth) Acked-by: Waiman Long Signed-off-by: Thorsten Blum Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/20240528120008.403511-2-thorsten.blum@toblux.com --- kernel/locking/lockdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index fee21f3c3b99..266f57f36f69 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -785,7 +785,7 @@ static void lockdep_print_held_locks(struct task_struct *p) printk("no locks held by %s/%d.\n", p->comm, task_pid_nr(p)); else printk("%d lock%s held by %s/%d:\n", depth, - depth > 1 ? "s" : "", p->comm, task_pid_nr(p)); + str_plural(depth), p->comm, task_pid_nr(p)); /* * It's not reliable to print a task's held locks if it's not sleeping * and it's not the current task. From a6f88ac32c6e63e69c595bfae220d8641704c9b7 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Thu, 20 Jun 2024 22:54:34 +0000 Subject: [PATCH 012/655] lockdep: fix deadlock issue between lockdep and rcu There is a deadlock scenario between lockdep and rcu when rcu nocb feature is enabled, just as following call stack: rcuop/x -000|queued_spin_lock_slowpath(lock = 0xFFFFFF817F2A8A80, val = ?) -001|queued_spin_lock(inline) // try to hold nocb_gp_lock -001|do_raw_spin_lock(lock = 0xFFFFFF817F2A8A80) -002|__raw_spin_lock_irqsave(inline) -002|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F2A8A80) -003|wake_nocb_gp_defer(inline) -003|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F30B680) -004|__call_rcu_common(inline) -004|call_rcu(head = 0xFFFFFFC082EECC28, func = ?) -005|call_rcu_zapped(inline) -005|free_zapped_rcu(ch = ?)// hold graph lock -006|rcu_do_batch(rdp = 0xFFFFFF817F245680) -007|nocb_cb_wait(inline) -007|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F245680) -008|kthread(_create = 0xFFFFFF80803122C0) -009|ret_from_fork(asm) rcuop/y -000|queued_spin_lock_slowpath(lock = 0xFFFFFFC08291BBC8, val = 0) -001|queued_spin_lock() -001|lockdep_lock() -001|graph_lock() // try to hold graph lock -002|lookup_chain_cache_add() -002|validate_chain() -003|lock_acquire -004|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F211D80) -005|lock_timer_base(inline) -006|mod_timer(inline) -006|wake_nocb_gp_defer(inline)// hold nocb_gp_lock -006|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F2A8680) -007|__call_rcu_common(inline) -007|call_rcu(head = 0xFFFFFFC0822E0B58, func = ?) -008|call_rcu_hurry(inline) -008|rcu_sync_call(inline) -008|rcu_sync_func(rhp = 0xFFFFFFC0822E0B58) -009|rcu_do_batch(rdp = 0xFFFFFF817F266680) -010|nocb_cb_wait(inline) -010|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F266680) -011|kthread(_create = 0xFFFFFF8080363740) -012|ret_from_fork(asm) rcuop/x and rcuop/y are rcu nocb threads with the same nocb gp thread. This patch release the graph lock before lockdep call_rcu. Fixes: a0b0fd53e1e6 ("locking/lockdep: Free lock classes that are no longer in use") Cc: stable@vger.kernel.org Cc: Boqun Feng Cc: Waiman Long Cc: Carlos Llamas Cc: Bart Van Assche Signed-off-by: Zhiguo Niu Signed-off-by: Xuewen Yan Reviewed-by: Waiman Long Reviewed-by: Carlos Llamas Reviewed-by: Bart Van Assche Signed-off-by: Carlos Llamas Acked-by: Paul E. McKenney Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/20240620225436.3127927-1-cmllamas@google.com --- kernel/locking/lockdep.c | 50 ++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 266f57f36f69..b172ead28f1c 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -6186,25 +6186,27 @@ static struct pending_free *get_pending_free(void) static void free_zapped_rcu(struct rcu_head *cb); /* - * Schedule an RCU callback if no RCU callback is pending. Must be called with - * the graph lock held. - */ -static void call_rcu_zapped(struct pending_free *pf) +* See if we need to queue an RCU callback, must called with +* the lockdep lock held, returns false if either we don't have +* any pending free or the callback is already scheduled. +* Otherwise, a call_rcu() must follow this function call. +*/ +static bool prepare_call_rcu_zapped(struct pending_free *pf) { WARN_ON_ONCE(inside_selftest()); if (list_empty(&pf->zapped)) - return; + return false; if (delayed_free.scheduled) - return; + return false; delayed_free.scheduled = true; WARN_ON_ONCE(delayed_free.pf + delayed_free.index != pf); delayed_free.index ^= 1; - call_rcu(&delayed_free.rcu_head, free_zapped_rcu); + return true; } /* The caller must hold the graph lock. May be called from RCU context. */ @@ -6230,6 +6232,7 @@ static void free_zapped_rcu(struct rcu_head *ch) { struct pending_free *pf; unsigned long flags; + bool need_callback; if (WARN_ON_ONCE(ch != &delayed_free.rcu_head)) return; @@ -6241,14 +6244,18 @@ static void free_zapped_rcu(struct rcu_head *ch) pf = delayed_free.pf + (delayed_free.index ^ 1); __free_zapped_classes(pf); delayed_free.scheduled = false; - - /* - * If there's anything on the open list, close and start a new callback. - */ - call_rcu_zapped(delayed_free.pf + delayed_free.index); - + need_callback = + prepare_call_rcu_zapped(delayed_free.pf + delayed_free.index); lockdep_unlock(); raw_local_irq_restore(flags); + + /* + * If there's pending free and its callback has not been scheduled, + * queue an RCU callback. + */ + if (need_callback) + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); + } /* @@ -6288,6 +6295,7 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size) { struct pending_free *pf; unsigned long flags; + bool need_callback; init_data_structures_once(); @@ -6295,10 +6303,11 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size) lockdep_lock(); pf = get_pending_free(); __lockdep_free_key_range(pf, start, size); - call_rcu_zapped(pf); + need_callback = prepare_call_rcu_zapped(pf); lockdep_unlock(); raw_local_irq_restore(flags); - + if (need_callback) + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); /* * Wait for any possible iterators from look_up_lock_class() to pass * before continuing to free the memory they refer to. @@ -6392,6 +6401,7 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock) struct pending_free *pf; unsigned long flags; int locked; + bool need_callback = false; raw_local_irq_save(flags); locked = graph_lock(); @@ -6400,11 +6410,13 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock) pf = get_pending_free(); __lockdep_reset_lock(pf, lock); - call_rcu_zapped(pf); + need_callback = prepare_call_rcu_zapped(pf); graph_unlock(); out_irq: raw_local_irq_restore(flags); + if (need_callback) + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); } /* @@ -6448,6 +6460,7 @@ void lockdep_unregister_key(struct lock_class_key *key) struct pending_free *pf; unsigned long flags; bool found = false; + bool need_callback = false; might_sleep(); @@ -6468,11 +6481,14 @@ void lockdep_unregister_key(struct lock_class_key *key) if (found) { pf = get_pending_free(); __lockdep_free_key_range(pf, key, 1); - call_rcu_zapped(pf); + need_callback = prepare_call_rcu_zapped(pf); } lockdep_unlock(); raw_local_irq_restore(flags); + if (need_callback) + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); + /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */ synchronize_rcu(); } From 39dea484e2bb9066abbc01e2c5e03b6917b0b775 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 15 Jul 2024 10:42:17 +0200 Subject: [PATCH 013/655] locking/lockdep: Simplify character output in seq_line() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single characters should be put into a sequence. Thus use the corresponding function “seq_putc” for one selected call. This issue was transformed by using the Coccinelle software. Suggested-by: Christophe Jaillet Signed-off-by: Markus Elfring Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/e346d688-7b01-462f-867c-ba52b7790d19@web.de --- kernel/locking/lockdep_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index e2bfb1db589d..6db0f43fc4df 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -424,7 +424,7 @@ static void seq_line(struct seq_file *m, char c, int offset, int length) for (i = 0; i < offset; i++) seq_puts(m, " "); for (i = 0; i < length; i++) - seq_printf(m, "%c", c); + seq_putc(m, c); seq_puts(m, "\n"); } From 18ec12c97b39ff6aa15beb8d2b25d15cd44b87d8 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 12 Aug 2024 16:06:58 +0800 Subject: [PATCH 014/655] driver core: Fix a potential null-ptr-deref in module_add_driver() Inject fault while probing of-fpga-region, if kasprintf() fails in module_add_driver(), the second sysfs_remove_link() in exit path will cause null-ptr-deref as below because kernfs_name_hash() will call strlen() with NULL driver_name. Fix it by releasing resources based on the exit path sequence. KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] Mem abort info: ESR = 0x0000000096000005 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault Data abort info: ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [dfffffc000000000] address between user and kernel address ranges Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: of_fpga_region(+) fpga_region fpga_bridge cfg80211 rfkill 8021q garp mrp stp llc ipv6 [last unloaded: of_fpga_region] CPU: 2 UID: 0 PID: 2036 Comm: modprobe Not tainted 6.11.0-rc2-g6a0e38264012 #295 Hardware name: linux,dummy-virt (DT) pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : strlen+0x24/0xb0 lr : kernfs_name_hash+0x1c/0xc4 sp : ffffffc081f97380 x29: ffffffc081f97380 x28: ffffffc081f97b90 x27: ffffff80c821c2a0 x26: ffffffedac0be418 x25: 0000000000000000 x24: ffffff80c09d2000 x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000 x20: 0000000000000000 x19: 0000000000000000 x18: 0000000000001840 x17: 0000000000000000 x16: 0000000000000000 x15: 1ffffff8103f2e42 x14: 00000000f1f1f1f1 x13: 0000000000000004 x12: ffffffb01812d61d x11: 1ffffff01812d61c x10: ffffffb01812d61c x9 : dfffffc000000000 x8 : 0000004fe7ed29e4 x7 : ffffff80c096b0e7 x6 : 0000000000000001 x5 : ffffff80c096b0e0 x4 : 1ffffffdb990efa2 x3 : 0000000000000000 x2 : 0000000000000000 x1 : dfffffc000000000 x0 : 0000000000000000 Call trace: strlen+0x24/0xb0 kernfs_name_hash+0x1c/0xc4 kernfs_find_ns+0x118/0x2e8 kernfs_remove_by_name_ns+0x80/0x100 sysfs_remove_link+0x74/0xa8 module_add_driver+0x278/0x394 bus_add_driver+0x1f0/0x43c driver_register+0xf4/0x3c0 __platform_driver_register+0x60/0x88 of_fpga_region_init+0x20/0x1000 [of_fpga_region] do_one_initcall+0x110/0x788 do_init_module+0x1dc/0x5c8 load_module+0x3c38/0x4cac init_module_from_file+0xd4/0x128 idempotent_init_module+0x2cc/0x528 __arm64_sys_finit_module+0xac/0x100 invoke_syscall+0x6c/0x258 el0_svc_common.constprop.0+0x160/0x22c do_el0_svc+0x44/0x5c el0_svc+0x48/0xb8 el0t_64_sync_handler+0x13c/0x158 el0t_64_sync+0x190/0x194 Code: f2fbffe1 a90157f4 12000802 aa0003f5 (38e16861) ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Oops: Fatal exception Fixes: 85d2b0aa1703 ("module: don't ignore sysfs_create_link() failures") Signed-off-by: Jinjie Ruan Link: https://lore.kernel.org/r/20240812080658.2791982-1-ruanjinjie@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/module.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/base/module.c b/drivers/base/module.c index f742ad2a21da..c4eaa1158d54 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -66,27 +66,31 @@ int module_add_driver(struct module *mod, const struct device_driver *drv) driver_name = make_driver_name(drv); if (!driver_name) { ret = -ENOMEM; - goto out; + goto out_remove_kobj; } module_create_drivers_dir(mk); if (!mk->drivers_dir) { ret = -EINVAL; - goto out; + goto out_free_driver_name; } ret = sysfs_create_link(mk->drivers_dir, &drv->p->kobj, driver_name); if (ret) - goto out; + goto out_remove_drivers_dir; kfree(driver_name); return 0; -out: - sysfs_remove_link(&drv->p->kobj, "module"); + +out_remove_drivers_dir: sysfs_remove_link(mk->drivers_dir, driver_name); + +out_free_driver_name: kfree(driver_name); +out_remove_kobj: + sysfs_remove_link(&drv->p->kobj, "module"); return ret; } From aa8d1f48d353b0469bff357183ee9df137d15ef0 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Wed, 3 Jul 2024 10:10:43 +0800 Subject: [PATCH 015/655] KVM: x86/mmu: Introduce a quirk to control memslot zap behavior Introduce the quirk KVM_X86_QUIRK_SLOT_ZAP_ALL to allow users to select KVM's behavior when a memslot is moved or deleted for KVM_X86_DEFAULT_VM VMs. Make sure KVM behave as if the quirk is always disabled for non-KVM_X86_DEFAULT_VM VMs. The KVM_X86_QUIRK_SLOT_ZAP_ALL quirk offers two behavior options: - when enabled: Invalidate/zap all SPTEs ("zap-all"), - when disabled: Precisely zap only the leaf SPTEs within the range of the moving/deleting memory slot ("zap-slot-leafs-only"). "zap-all" is today's KVM behavior to work around a bug [1] where the changing the zapping behavior of memslot move/deletion would cause VM instability for VMs with an Nvidia GPU assigned; while "zap-slot-leafs-only" allows for more precise zapping of SPTEs within the memory slot range, improving performance in certain scenarios [2], and meeting the functional requirements for TDX. Previous attempts to select "zap-slot-leafs-only" include a per-VM capability approach [3] (which was not preferred because the root cause of the bug remained unidentified) and a per-memslot flag approach [4]. Sean and Paolo finally recommended the implementation of this quirk and explained that it's the least bad option [5]. By default, the quirk is enabled on KVM_X86_DEFAULT_VM VMs to use "zap-all". Users have the option to disable the quirk to select "zap-slot-leafs-only" for specific KVM_X86_DEFAULT_VM VMs that are unaffected by this bug. For non-KVM_X86_DEFAULT_VM VMs, the "zap-slot-leafs-only" behavior is always selected without user's opt-in, regardless of if the user opts for "zap-all". This is because it is assumed until proven otherwise that non- KVM_X86_DEFAULT_VM VMs will not be exposed to the bug [1], and most importantly, it's because TDX must have "zap-slot-leafs-only" always selected. In TDX's case a memslot's GPA range can be a mixture of "private" or "shared" memory. Shared is roughly analogous to how EPT is handled for normal VMs, but private GPAs need lots of special treatment: 1) "zap-all" would require to zap private root page or non-leaf entries or at least leaf-entries beyond the deleting memslot scope. However, TDX demands that the root page of the private page table remains unchanged, with leaf entries being zapped before non-leaf entries, and any dropped private guest pages must be re-accepted by the guest. 2) if "zap-all" zaps only shared page tables, it would result in private pages still being mapped when the memslot is gone. This may affect even other processes if later the gmem fd was whole punched, causing the pages being freed on the host while still mapped in the TD, because there's no pgoff to the gfn information to zap the private page table after memslot is gone. So, simply go "zap-slot-leafs-only" as if the quirk is always disabled for non-KVM_X86_DEFAULT_VM VMs to avoid manual opt-in for every VM type [6] or complicating quirk disabling interface (current quirk disabling interface is limited, no way to query quirks, or force them to be disabled). Add a new function kvm_mmu_zap_memslot_leafs() to implement "zap-slot-leafs-only". This function does not call kvm_unmap_gfn_range(), bypassing special handling to APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, as 1) The APIC_ACCESS_PAGE_PRIVATE_MEMSLOT cannot be created by users, nor can it be moved. It is only deleted by KVM when APICv is permanently inhibited. 2) kvm_vcpu_reload_apic_access_page() effectively does nothing when APIC_ACCESS_PAGE_PRIVATE_MEMSLOT is deleted. 3) Avoid making all cpus request of KVM_REQ_APIC_PAGE_RELOAD can save on costly IPIs. Suggested-by: Kai Huang Suggested-by: Sean Christopherson Suggested-by: Paolo Bonzini Link: https://patchwork.kernel.org/project/kvm/patch/20190205210137.1377-11-sean.j.christopherson@intel.com [1] Link: https://patchwork.kernel.org/project/kvm/patch/20190205210137.1377-11-sean.j.christopherson@intel.com/#25054908 [2] Link: https://lore.kernel.org/kvm/20200713190649.GE29725@linux.intel.com/T/#mabc0119583dacf621025e9d873c85f4fbaa66d5c [3] Link: https://lore.kernel.org/all/20240515005952.3410568-3-rick.p.edgecombe@intel.com [4] Link: https://lore.kernel.org/all/7df9032d-83e4-46a1-ab29-6c7973a2ab0b@redhat.com [5] Link: https://lore.kernel.org/all/ZnGa550k46ow2N3L@google.com [6] Co-developed-by: Rick Edgecombe Signed-off-by: Rick Edgecombe Signed-off-by: Yan Zhao Message-ID: <20240703021043.13881-1-yan.y.zhao@intel.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 8 +++++++ arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/mmu/mmu.c | 42 ++++++++++++++++++++++++++++++++- 4 files changed, 52 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index b3be87489108..b4d1cf2e4628 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8082,6 +8082,14 @@ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS By default, KVM emulates MONITOR/MWAIT (if guest CPUID on writes to MISC_ENABLE if KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT is disabled. + +KVM_X86_QUIRK_SLOT_ZAP_ALL By default, KVM invalidates all SPTEs in + fast way for memslot deletion when VM type + is KVM_X86_DEFAULT_VM. + When this quirk is disabled or when VM type + is other than KVM_X86_DEFAULT_VM, KVM zaps + only leaf SPTEs that are within the range of + the memslot being deleted. =================================== ============================================ 7.32 KVM_CAP_MAX_VCPU_ID diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4a68cb3eba78..e4fc362ba3da 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2345,7 +2345,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); KVM_X86_QUIRK_OUT_7E_INC_RIP | \ KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \ KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \ - KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) + KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \ + KVM_X86_QUIRK_SLOT_ZAP_ALL) /* * KVM previously used a u32 field in kvm_run to indicate the hypercall was diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index bf57a824f722..a8debbf2f702 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -439,6 +439,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5) #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) +#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 928cf84778b0..f107ec2557c1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6997,10 +6997,50 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) kvm_mmu_zap_all(kvm); } +/* + * Zapping leaf SPTEs with memslot range when a memslot is moved/deleted. + * + * Zapping non-leaf SPTEs, a.k.a. not-last SPTEs, isn't required, worst + * case scenario we'll have unused shadow pages lying around until they + * are recycled due to age or when the VM is destroyed. + */ +static void kvm_mmu_zap_memslot_leafs(struct kvm *kvm, struct kvm_memory_slot *slot) +{ + struct kvm_gfn_range range = { + .slot = slot, + .start = slot->base_gfn, + .end = slot->base_gfn + slot->npages, + .may_block = true, + }; + bool flush = false; + + write_lock(&kvm->mmu_lock); + + if (kvm_memslots_have_rmaps(kvm)) + flush = kvm_handle_gfn_range(kvm, &range, kvm_zap_rmap); + + if (tdp_mmu_enabled) + flush = kvm_tdp_mmu_unmap_gfn_range(kvm, &range, flush); + + if (flush) + kvm_flush_remote_tlbs_memslot(kvm, slot); + + write_unlock(&kvm->mmu_lock); +} + +static inline bool kvm_memslot_flush_zap_all(struct kvm *kvm) +{ + return kvm->arch.vm_type == KVM_X86_DEFAULT_VM && + kvm_check_has_quirk(kvm, KVM_X86_QUIRK_SLOT_ZAP_ALL); +} + void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { - kvm_mmu_zap_all_fast(kvm); + if (kvm_memslot_flush_zap_all(kvm)) + kvm_mmu_zap_all_fast(kvm); + else + kvm_mmu_zap_memslot_leafs(kvm, slot); } void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) From b4ed2c67d275b85b2ab07d54f88bebd5998d61d8 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Wed, 3 Jul 2024 10:11:19 +0800 Subject: [PATCH 016/655] KVM: selftests: Test slot move/delete with slot zap quirk enabled/disabled Update set_memory_region_test to make sure memslot move and deletion function correctly both when slot zap quirk KVM_X86_QUIRK_SLOT_ZAP_ALL is enabled and disabled. Signed-off-by: Yan Zhao Message-ID: <20240703021119.13904-1-yan.y.zhao@intel.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/set_memory_region_test.c | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index bb8002084f52..a8267628e9ed 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -175,7 +175,7 @@ static void guest_code_move_memory_region(void) GUEST_DONE(); } -static void test_move_memory_region(void) +static void test_move_memory_region(bool disable_slot_zap_quirk) { pthread_t vcpu_thread; struct kvm_vcpu *vcpu; @@ -184,6 +184,9 @@ static void test_move_memory_region(void) vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region); + if (disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + hva = addr_gpa2hva(vm, MEM_REGION_GPA); /* @@ -266,7 +269,7 @@ static void guest_code_delete_memory_region(void) GUEST_ASSERT(0); } -static void test_delete_memory_region(void) +static void test_delete_memory_region(bool disable_slot_zap_quirk) { pthread_t vcpu_thread; struct kvm_vcpu *vcpu; @@ -276,6 +279,9 @@ static void test_delete_memory_region(void) vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_delete_memory_region); + if (disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + /* Delete the memory region, the guest should not die. */ vm_mem_region_delete(vm, MEM_REGION_SLOT); wait_for_vcpu(); @@ -553,7 +559,10 @@ int main(int argc, char *argv[]) { #ifdef __x86_64__ int i, loops; + int j, disable_slot_zap_quirk = 0; + if (kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_SLOT_ZAP_ALL) + disable_slot_zap_quirk = 1; /* * FIXME: the zero-memslot test fails on aarch64 and s390x because * KVM_RUN fails with ENOEXEC or EFAULT. @@ -579,13 +588,17 @@ int main(int argc, char *argv[]) else loops = 10; - pr_info("Testing MOVE of in-use region, %d loops\n", loops); - for (i = 0; i < loops; i++) - test_move_memory_region(); + for (j = 0; j <= disable_slot_zap_quirk; j++) { + pr_info("Testing MOVE of in-use region, %d loops, slot zap quirk %s\n", + loops, j ? "disabled" : "enabled"); + for (i = 0; i < loops; i++) + test_move_memory_region(!!j); - pr_info("Testing DELETE of in-use region, %d loops\n", loops); - for (i = 0; i < loops; i++) - test_delete_memory_region(); + pr_info("Testing DELETE of in-use region, %d loops, slot zap quirk %s\n", + loops, j ? "disabled" : "enabled"); + for (i = 0; i < loops; i++) + test_delete_memory_region(!!j); + } #endif return 0; From 218f6415004a881d116e254eeb837358aced55ab Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Wed, 3 Jul 2024 10:12:06 +0800 Subject: [PATCH 017/655] KVM: selftests: Allow slot modification stress test with quirk disabled Add a new user option to memslot_modification_stress_test to allow testing with slot zap quirk KVM_X86_QUIRK_SLOT_ZAP_ALL disabled. Signed-off-by: Yan Zhao Message-ID: <20240703021206.13923-1-yan.y.zhao@intel.com> Signed-off-by: Paolo Bonzini --- .../kvm/memslot_modification_stress_test.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 49f162573126..e3343f0df9e1 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -79,6 +79,7 @@ struct test_params { useconds_t delay; uint64_t nr_iterations; bool partition_vcpu_memory_access; + bool disable_slot_zap_quirk; }; static void run_test(enum vm_guest_mode mode, void *arg) @@ -89,6 +90,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, VM_MEM_SRC_ANONYMOUS, p->partition_vcpu_memory_access); +#ifdef __x86_64__ + if (p->disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + + pr_info("Memslot zap quirk %s\n", p->disable_slot_zap_quirk ? + "disabled" : "enabled"); +#endif pr_info("Finished creating vCPUs\n"); @@ -107,11 +115,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-m mode] [-d delay_usec]\n" + printf("usage: %s [-h] [-m mode] [-d delay_usec] [-q]\n" " [-b memory] [-v vcpus] [-o] [-i iterations]\n", name); guest_modes_help(); printf(" -d: add a delay between each iteration of adding and\n" " deleting a memslot in usec.\n"); + printf(" -q: Disable memslot zap quirk.\n"); printf(" -b: specify the size of the memory region which should be\n" " accessed by each vCPU. e.g. 10M or 3G.\n" " Default: 1G\n"); @@ -137,7 +146,7 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:d:b:v:oi:")) != -1) { + while ((opt = getopt(argc, argv, "hm:d:qb:v:oi:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -160,6 +169,12 @@ int main(int argc, char *argv[]) case 'i': p.nr_iterations = atoi_positive("Number of iterations", optarg); break; + case 'q': + p.disable_slot_zap_quirk = true; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & + KVM_X86_QUIRK_SLOT_ZAP_ALL); + break; case 'h': default: help(argv[0]); From 61de4c34b51c5b9c7ef8229eb246346254638446 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Wed, 3 Jul 2024 10:12:19 +0800 Subject: [PATCH 018/655] KVM: selftests: Test memslot move in memslot_perf_test with quirk disabled Add a new user option to memslot_perf_test to allow testing memslot move with quirk KVM_X86_QUIRK_SLOT_ZAP_ALL disabled. Signed-off-by: Yan Zhao Message-ID: <20240703021219.13939-1-yan.y.zhao@intel.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/memslot_perf_test.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 579a64f97333..893366982f77 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -113,6 +113,7 @@ static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless"); static sem_t vcpu_ready; static bool map_unmap_verify; +static bool disable_slot_zap_quirk; static bool verbose; #define pr_info_v(...) \ @@ -578,6 +579,9 @@ static bool test_memslot_move_prepare(struct vm_data *data, uint32_t guest_page_size = data->vm->page_size; uint64_t movesrcgpa, movetestgpa; + if (disable_slot_zap_quirk) + vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + movesrcgpa = vm_slot2gpa(data, data->nslots - 1); if (isactive) { @@ -896,6 +900,7 @@ static void help(char *name, struct test_args *targs) pr_info(" -h: print this help screen.\n"); pr_info(" -v: enable verbose mode (not for benchmarking).\n"); pr_info(" -d: enable extra debug checks.\n"); + pr_info(" -q: Disable memslot zap quirk during memslot move.\n"); pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n", targs->nslots); pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n", @@ -954,7 +959,7 @@ static bool parse_args(int argc, char *argv[], uint32_t max_mem_slots; int opt; - while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) { + while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) { switch (opt) { case 'h': default: @@ -966,6 +971,11 @@ static bool parse_args(int argc, char *argv[], case 'd': map_unmap_verify = true; break; + case 'q': + disable_slot_zap_quirk = true; + TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & + KVM_X86_QUIRK_SLOT_ZAP_ALL); + break; case 's': targs->nslots = atoi_paranoid(optarg); if (targs->nslots <= 1 && targs->nslots != -1) { From fda1dd3c54ef3c4200c2e77634a91610da973950 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 18 Jul 2024 17:50:37 -0700 Subject: [PATCH 019/655] find: Switch from inline to __always_inline 'inline' keyword is only a recommendation for compiler. If it decides to not inline find_bit nodemask functions, the whole small_const_nbits() machinery doesn't work. This is how a standard GCC 11.3.0 does for my x86_64 build now. This patch replaces 'inline' directive with unconditional '__always_inline' to make sure that there's always a chance for compile-time optimization. It doesn't change size of kernel image, according to bloat-o-meter. [[ Brian: split out from: Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@gmail.com/ But rewritten, as there were too many conflicts. ]] Co-developed-by: Brian Norris Signed-off-by: Brian Norris Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Yury Norov --- include/linux/find.h | 50 ++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/include/linux/find.h b/include/linux/find.h index 5dfca4225fef..68685714bc18 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -52,7 +52,7 @@ unsigned long _find_next_bit_le(const unsigned long *addr, unsigned * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { @@ -81,7 +81,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) @@ -112,7 +112,7 @@ unsigned long find_next_and_bit(const unsigned long *addr1, * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) @@ -142,7 +142,7 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1, * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) @@ -171,7 +171,7 @@ unsigned long find_next_or_bit(const unsigned long *addr1, * Returns the bit number of the next zero bit * If no bits are zero, returns @size. */ -static inline +static __always_inline unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { @@ -198,7 +198,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, * Returns the bit number of the first set bit. * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { if (small_const_nbits(size)) { @@ -224,7 +224,7 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) * Returns the bit number of the N'th set bit. * If no such, returns >= @size. */ -static inline +static __always_inline unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n) { if (n >= size) @@ -249,7 +249,7 @@ unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsign * Returns the bit number of the N'th set bit. * If no such, returns @size. */ -static inline +static __always_inline unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long n) { @@ -276,7 +276,7 @@ unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long * * Returns the bit number of the N'th set bit. * If no such, returns @size. */ -static inline +static __always_inline unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long n) { @@ -332,7 +332,7 @@ unsigned long find_nth_and_andnot_bit(const unsigned long *addr1, * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size) @@ -357,7 +357,7 @@ unsigned long find_first_and_bit(const unsigned long *addr1, * Returns the bit number for the first set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_first_and_and_bit(const unsigned long *addr1, const unsigned long *addr2, const unsigned long *addr3, @@ -381,7 +381,7 @@ unsigned long find_first_and_and_bit(const unsigned long *addr1, * Returns the bit number of the first cleared bit. * If no bits are zero, returns @size. */ -static inline +static __always_inline unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) { if (small_const_nbits(size)) { @@ -402,7 +402,7 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) * * Returns the bit number of the last set bit, or size. */ -static inline +static __always_inline unsigned long find_last_bit(const unsigned long *addr, unsigned long size) { if (small_const_nbits(size)) { @@ -425,7 +425,7 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size) * Returns the bit number for the next set bit, or first set bit up to @offset * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_and_bit_wrap(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) @@ -448,7 +448,7 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1, * Returns the bit number for the next set bit, or first set bit up to @offset * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_bit_wrap(const unsigned long *addr, unsigned long size, unsigned long offset) { @@ -465,7 +465,7 @@ unsigned long find_next_bit_wrap(const unsigned long *addr, * Helper for for_each_set_bit_wrap(). Make sure you're doing right thing * before using it alone. */ -static inline +static __always_inline unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size, unsigned long start, unsigned long n) { @@ -506,20 +506,20 @@ extern unsigned long find_next_clump8(unsigned long *clump, #if defined(__LITTLE_ENDIAN) -static inline unsigned long find_next_zero_bit_le(const void *addr, - unsigned long size, unsigned long offset) +static __always_inline +unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset) { return find_next_zero_bit(addr, size, offset); } -static inline unsigned long find_next_bit_le(const void *addr, - unsigned long size, unsigned long offset) +static __always_inline +unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { return find_next_bit(addr, size, offset); } -static inline unsigned long find_first_zero_bit_le(const void *addr, - unsigned long size) +static __always_inline +unsigned long find_first_zero_bit_le(const void *addr, unsigned long size) { return find_first_zero_bit(addr, size); } @@ -527,7 +527,7 @@ static inline unsigned long find_first_zero_bit_le(const void *addr, #elif defined(__BIG_ENDIAN) #ifndef find_next_zero_bit_le -static inline +static __always_inline unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset) { @@ -546,7 +546,7 @@ unsigned long find_next_zero_bit_le(const void *addr, unsigned #endif #ifndef find_first_zero_bit_le -static inline +static __always_inline unsigned long find_first_zero_bit_le(const void *addr, unsigned long size) { if (small_const_nbits(size)) { @@ -560,7 +560,7 @@ unsigned long find_first_zero_bit_le(const void *addr, unsigned long size) #endif #ifndef find_next_bit_le -static inline +static __always_inline unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { From ed8cd2b3bd9f070120528fc5403a2d0b5afe07f8 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 18 Jul 2024 17:50:38 -0700 Subject: [PATCH 020/655] bitmap: Switch from inline to __always_inline 'inline' keyword is only a recommendation for compiler. If it decides to not inline bitmap functions, the whole small_const_nbits() machinery doesn't work. This is how a standard GCC 11.3.0 does for my x86_64 build now. This patch replaces 'inline' directive with unconditional '__always_inline' to make sure that there's always a chance for compile-time optimization. It doesn't change size of kernel image, according to bloat-o-meter. [[ Brian: split out from: Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@gmail.com/ But rewritten, as there were too many conflicts. ]] Co-developed-by: Brian Norris Signed-off-by: Brian Norris Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Yury Norov --- include/linux/bitmap.h | 140 ++++++++++++++++++++++------------------- 1 file changed, 76 insertions(+), 64 deletions(-) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 8c4768c44a01..0406f48f0a6a 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -203,12 +203,12 @@ unsigned long bitmap_find_next_zero_area_off(unsigned long *map, * the bit offset of all zero areas this function finds is multiples of that * power of 2. A @align_mask of 0 means no alignment is required. */ -static inline unsigned long -bitmap_find_next_zero_area(unsigned long *map, - unsigned long size, - unsigned long start, - unsigned int nr, - unsigned long align_mask) +static __always_inline +unsigned long bitmap_find_next_zero_area(unsigned long *map, + unsigned long size, + unsigned long start, + unsigned int nr, + unsigned long align_mask) { return bitmap_find_next_zero_area_off(map, size, start, nr, align_mask, 0); @@ -228,7 +228,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig, #define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE) -static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) +static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { unsigned int len = bitmap_size(nbits); @@ -238,7 +238,7 @@ static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) memset(dst, 0, len); } -static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) +static __always_inline void bitmap_fill(unsigned long *dst, unsigned int nbits) { unsigned int len = bitmap_size(nbits); @@ -248,8 +248,8 @@ static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) memset(dst, 0xff, len); } -static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, - unsigned int nbits) +static __always_inline +void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits) { unsigned int len = bitmap_size(nbits); @@ -262,8 +262,8 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, /* * Copy bitmap and clear tail bits in last word. */ -static inline void bitmap_copy_clear_tail(unsigned long *dst, - const unsigned long *src, unsigned int nbits) +static __always_inline +void bitmap_copy_clear_tail(unsigned long *dst, const unsigned long *src, unsigned int nbits) { bitmap_copy(dst, src, nbits); if (nbits % BITS_PER_LONG) @@ -306,16 +306,18 @@ void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits); bitmap_copy_clear_tail((unsigned long *)(buf), (const unsigned long *)(bitmap), (nbits)) #endif -static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +bool bitmap_and(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_and(dst, src1, src2, nbits); } -static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +void bitmap_or(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 | *src2; @@ -323,8 +325,9 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, __bitmap_or(dst, src1, src2, nbits); } -static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +void bitmap_xor(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 ^ *src2; @@ -332,16 +335,17 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, __bitmap_xor(dst, src1, src2, nbits); } -static inline bool bitmap_andnot(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +bool bitmap_andnot(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_andnot(dst, src1, src2, nbits); } -static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, - unsigned int nbits) +static __always_inline +void bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = ~(*src); @@ -356,8 +360,8 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr #endif #define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) -static inline bool bitmap_equal(const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +bool bitmap_equal(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); @@ -376,10 +380,9 @@ static inline bool bitmap_equal(const unsigned long *src1, * * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise */ -static inline bool bitmap_or_equal(const unsigned long *src1, - const unsigned long *src2, - const unsigned long *src3, - unsigned int nbits) +static __always_inline +bool bitmap_or_equal(const unsigned long *src1, const unsigned long *src2, + const unsigned long *src3, unsigned int nbits) { if (!small_const_nbits(nbits)) return __bitmap_or_equal(src1, src2, src3, nbits); @@ -387,9 +390,8 @@ static inline bool bitmap_or_equal(const unsigned long *src1, return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits)); } -static inline bool bitmap_intersects(const unsigned long *src1, - const unsigned long *src2, - unsigned int nbits) +static __always_inline +bool bitmap_intersects(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; @@ -397,8 +399,8 @@ static inline bool bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } -static inline bool bitmap_subset(const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +bool bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); @@ -406,7 +408,8 @@ static inline bool bitmap_subset(const unsigned long *src1, return __bitmap_subset(src1, src2, nbits); } -static inline bool bitmap_empty(const unsigned long *src, unsigned nbits) +static __always_inline +bool bitmap_empty(const unsigned long *src, unsigned nbits) { if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); @@ -414,7 +417,8 @@ static inline bool bitmap_empty(const unsigned long *src, unsigned nbits) return find_first_bit(src, nbits) == nbits; } -static inline bool bitmap_full(const unsigned long *src, unsigned int nbits) +static __always_inline +bool bitmap_full(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); @@ -448,8 +452,8 @@ unsigned long bitmap_weight_andnot(const unsigned long *src1, return __bitmap_weight_andnot(src1, src2, nbits); } -static __always_inline void bitmap_set(unsigned long *map, unsigned int start, - unsigned int nbits) +static __always_inline +void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) __set_bit(start, map); @@ -464,8 +468,8 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start, __bitmap_set(map, start, nbits); } -static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, - unsigned int nbits) +static __always_inline +void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) __clear_bit(start, map); @@ -480,8 +484,9 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, __bitmap_clear(map, start, nbits); } -static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, - unsigned int shift, unsigned int nbits) +static __always_inline +void bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift; @@ -489,8 +494,9 @@ static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *s __bitmap_shift_right(dst, src, shift, nbits); } -static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src, - unsigned int shift, unsigned int nbits) +static __always_inline +void bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits); @@ -498,11 +504,12 @@ static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *sr __bitmap_shift_left(dst, src, shift, nbits); } -static inline void bitmap_replace(unsigned long *dst, - const unsigned long *old, - const unsigned long *new, - const unsigned long *mask, - unsigned int nbits) +static __always_inline +void bitmap_replace(unsigned long *dst, + const unsigned long *old, + const unsigned long *new, + const unsigned long *mask, + unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*old & ~(*mask)) | (*new & *mask); @@ -545,8 +552,9 @@ static inline void bitmap_replace(unsigned long *dst, * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation. * See bitmap_scatter() for details related to this relationship. */ -static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src, - const unsigned long *mask, unsigned int nbits) +static __always_inline +void bitmap_scatter(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) { unsigned int n = 0; unsigned int bit; @@ -599,8 +607,9 @@ static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src, * bitmap_scatter(res, src, mask, n) and a call to * bitmap_scatter(res, result, mask, n) will lead to the same res value. */ -static inline void bitmap_gather(unsigned long *dst, const unsigned long *src, - const unsigned long *mask, unsigned int nbits) +static __always_inline +void bitmap_gather(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) { unsigned int n = 0; unsigned int bit; @@ -611,9 +620,9 @@ static inline void bitmap_gather(unsigned long *dst, const unsigned long *src, __assign_bit(n++, dst, test_bit(bit, src)); } -static inline void bitmap_next_set_region(unsigned long *bitmap, - unsigned int *rs, unsigned int *re, - unsigned int end) +static __always_inline +void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, + unsigned int *re, unsigned int end) { *rs = find_next_bit(bitmap, end, *rs); *re = find_next_zero_bit(bitmap, end, *rs + 1); @@ -628,7 +637,8 @@ static inline void bitmap_next_set_region(unsigned long *bitmap, * This is the complement to __bitmap_find_free_region() and releases * the found region (by clearing it in the bitmap). */ -static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order) +static __always_inline +void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order) { bitmap_clear(bitmap, pos, BIT(order)); } @@ -644,7 +654,8 @@ static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos * Returns: 0 on success, or %-EBUSY if specified region wasn't * free (not all bits were zero). */ -static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) +static __always_inline +int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) { unsigned int len = BIT(order); @@ -668,7 +679,8 @@ static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos * Returns: the bit offset in bitmap of the allocated region, * or -errno on failure. */ -static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) +static __always_inline +int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) { unsigned int pos, end; /* scans bitmap by regions of size order */ @@ -722,7 +734,7 @@ static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bi * That is ``(u32 *)(&val)[0]`` gets the upper 32 bits, * but we expect the lower 32-bits of u64. */ -static inline void bitmap_from_u64(unsigned long *dst, u64 mask) +static __always_inline void bitmap_from_u64(unsigned long *dst, u64 mask) { bitmap_from_arr64(dst, &mask, 64); } @@ -737,9 +749,8 @@ static inline void bitmap_from_u64(unsigned long *dst, u64 mask) * @map memory region. For @nbits = 0 and @nbits > BITS_PER_LONG the return * value is undefined. */ -static inline unsigned long bitmap_read(const unsigned long *map, - unsigned long start, - unsigned long nbits) +static __always_inline +unsigned long bitmap_read(const unsigned long *map, unsigned long start, unsigned long nbits) { size_t index = BIT_WORD(start); unsigned long offset = start % BITS_PER_LONG; @@ -772,8 +783,9 @@ static inline unsigned long bitmap_read(const unsigned long *map, * * For @nbits == 0 and @nbits > BITS_PER_LONG no writes are performed. */ -static inline void bitmap_write(unsigned long *map, unsigned long value, - unsigned long start, unsigned long nbits) +static __always_inline +void bitmap_write(unsigned long *map, unsigned long value, + unsigned long start, unsigned long nbits) { size_t index; unsigned long offset; From ab6b1010dab68f6d4bf063517db4ce2d63554bc6 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 18 Jul 2024 17:50:39 -0700 Subject: [PATCH 021/655] cpumask: Switch from inline to __always_inline On recent (v6.6+) builds with Clang (based on Clang 18.0.0) and certain configurations [0], I'm finding that (lack of) inlining decisions may lead to section mismatch warnings like the following: WARNING: modpost: vmlinux.o: section mismatch in reference: cpumask_andnot (section: .text) -> cpuhp_bringup_cpus_parallel.tmp_mask (section: .init.data) ERROR: modpost: Section mismatches detected. or more confusingly: WARNING: modpost: vmlinux: section mismatch in reference: cpumask_andnot+0x5f (section: .text) -> efi_systab_phys (section: .init.data) The first warning makes a little sense, because cpuhp_bringup_cpus_parallel() (an __init function) calls cpumask_andnot() on tmp_mask (an __initdata symbol). If the compiler doesn't inline cpumask_andnot(), this may appear like a mismatch. The second warning makes less sense, but might be because efi_systab_phys and cpuhp_bringup_cpus_parallel.tmp_mask are laid out near each other, and the latter isn't a proper C symbol definition. In any case, it seems a reasonable solution to suggest more strongly to the compiler that these cpumask macros *must* be inlined, as 'inline' is just a recommendation. This change has been previously proposed in the past as: Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@gmail.com/ But the change has been split up, to separately justify the cpumask changes (which drive my work) and the bitmap/const optimizations (that Yury separately proposed for other reasons). This ends up as somewhere between a "rebase" and "rewrite" -- I had to rewrite most of the patch. According to bloat-o-meter, vmlinux decreases minimally in size (-0.00% to -0.01%, depending on the version of GCC or Clang and .config in question) with this series of changes: gcc 13.2.0, x86_64_defconfig -3005 bytes, Before=21944501, After=21941496, chg -0.01% clang 16.0.6, x86_64_defconfig -105 bytes, Before=22571692, After=22571587, chg -0.00% gcc 9.5.0, x86_64_defconfig -1771 bytes, Before=21557598, After=21555827, chg -0.01% clang 18.0_pre516547 (ChromiumOS toolchain), x86_64_defconfig -191 bytes, Before=22615339, After=22615148, chg -0.00% clang 18.0_pre516547 (ChromiumOS toolchain), based on ChromiumOS config + gcov -979 bytes, Before=76294783, After=76293804, chg -0.00% [0] CONFIG_HOTPLUG_PARALLEL=y ('select'ed for x86 as of [1]) and CONFIG_GCOV_PROFILE_ALL. [1] commit 0c7ffa32dbd6 ("x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it") Co-developed-by: Brian Norris Signed-off-by: Brian Norris Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Yury Norov --- include/linux/cpumask.h | 212 +++++++++++++++++++++------------------- 1 file changed, 112 insertions(+), 100 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 801a7e524113..9e3f3c96607d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -30,7 +30,7 @@ extern unsigned int nr_cpu_ids; #endif -static inline void set_nr_cpu_ids(unsigned int nr) +static __always_inline void set_nr_cpu_ids(unsigned int nr) { #if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS) WARN_ON(nr != nr_cpu_ids); @@ -149,7 +149,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) * * Return: >= nr_cpu_ids if no cpus set. */ -static inline unsigned int cpumask_first(const struct cpumask *srcp) +static __always_inline unsigned int cpumask_first(const struct cpumask *srcp) { return find_first_bit(cpumask_bits(srcp), small_cpumask_bits); } @@ -160,7 +160,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) * * Return: >= nr_cpu_ids if all cpus are set. */ -static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) +static __always_inline unsigned int cpumask_first_zero(const struct cpumask *srcp) { return find_first_zero_bit(cpumask_bits(srcp), small_cpumask_bits); } @@ -172,7 +172,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) * * Return: >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). */ -static inline +static __always_inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); @@ -186,7 +186,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask * * Return: >= nr_cpu_ids if no cpus set in all. */ -static inline +static __always_inline unsigned int cpumask_first_and_and(const struct cpumask *srcp1, const struct cpumask *srcp2, const struct cpumask *srcp3) @@ -201,7 +201,7 @@ unsigned int cpumask_first_and_and(const struct cpumask *srcp1, * * Return: >= nr_cpumask_bits if no CPUs set. */ -static inline unsigned int cpumask_last(const struct cpumask *srcp) +static __always_inline unsigned int cpumask_last(const struct cpumask *srcp) { return find_last_bit(cpumask_bits(srcp), small_cpumask_bits); } @@ -213,7 +213,7 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp) * * Return: >= nr_cpu_ids if no further cpus set. */ -static inline +static __always_inline unsigned int cpumask_next(int n, const struct cpumask *srcp) { /* -1 is a legal arg here. */ @@ -229,7 +229,8 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp) * * Return: >= nr_cpu_ids if no further cpus unset. */ -static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) +static __always_inline +unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { /* -1 is a legal arg here. */ if (n != -1) @@ -239,18 +240,21 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) #if NR_CPUS == 1 /* Uniprocessor: there is only one valid CPU */ -static inline unsigned int cpumask_local_spread(unsigned int i, int node) +static __always_inline +unsigned int cpumask_local_spread(unsigned int i, int node) { return 0; } -static inline unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, + const struct cpumask *src2p) { return cpumask_first_and(src1p, src2p); } -static inline unsigned int cpumask_any_distribute(const struct cpumask *srcp) +static __always_inline +unsigned int cpumask_any_distribute(const struct cpumask *srcp) { return cpumask_first(srcp); } @@ -269,9 +273,9 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp); * * Return: >= nr_cpu_ids if no further cpus set in both. */ -static inline +static __always_inline unsigned int cpumask_next_and(int n, const struct cpumask *src1p, - const struct cpumask *src2p) + const struct cpumask *src2p) { /* -1 is a legal arg here. */ if (n != -1) @@ -291,7 +295,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits) #if NR_CPUS == 1 -static inline +static __always_inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) { cpumask_check(start); @@ -394,7 +398,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * Often used to find any cpu but smp_processor_id() in a mask. * Return: >= nr_cpu_ids if no cpus set. */ -static inline +static __always_inline unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) { unsigned int i; @@ -414,7 +418,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) * * Returns >= nr_cpu_ids if no cpus set. */ -static inline +static __always_inline unsigned int cpumask_any_and_but(const struct cpumask *mask1, const struct cpumask *mask2, unsigned int cpu) @@ -436,7 +440,8 @@ unsigned int cpumask_any_and_but(const struct cpumask *mask1, * * Return: >= nr_cpu_ids if such cpu doesn't exist. */ -static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) +static __always_inline +unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) { return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu)); } @@ -449,7 +454,7 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s * * Return: >= nr_cpu_ids if such cpu doesn't exist. */ -static inline +static __always_inline unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, const struct cpumask *srcp2) { @@ -465,7 +470,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, * * Return: >= nr_cpu_ids if such cpu doesn't exist. */ -static inline +static __always_inline unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, const struct cpumask *srcp2) { @@ -508,12 +513,14 @@ unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ -static __always_inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +static __always_inline +void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } -static __always_inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +static __always_inline +void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { __set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } @@ -557,7 +564,8 @@ static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp, * * Return: true if @cpu is set in @cpumask, else returns false */ -static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask) +static __always_inline +bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { return test_bit(cpumask_check(cpu), cpumask_bits((cpumask))); } @@ -571,7 +579,8 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum * * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ -static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) +static __always_inline +bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } @@ -585,7 +594,8 @@ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cp * * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ -static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) +static __always_inline +bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } @@ -594,7 +604,7 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask * * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer */ -static inline void cpumask_setall(struct cpumask *dstp) +static __always_inline void cpumask_setall(struct cpumask *dstp) { if (small_const_nbits(small_cpumask_bits)) { cpumask_bits(dstp)[0] = BITMAP_LAST_WORD_MASK(nr_cpumask_bits); @@ -607,7 +617,7 @@ static inline void cpumask_setall(struct cpumask *dstp) * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer */ -static inline void cpumask_clear(struct cpumask *dstp) +static __always_inline void cpumask_clear(struct cpumask *dstp) { bitmap_zero(cpumask_bits(dstp), large_cpumask_bits); } @@ -620,9 +630,9 @@ static inline void cpumask_clear(struct cpumask *dstp) * * Return: false if *@dstp is empty, else returns true */ -static inline bool cpumask_and(struct cpumask *dstp, - const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_and(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) { return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -634,8 +644,9 @@ static inline bool cpumask_and(struct cpumask *dstp, * @src1p: the first input * @src2p: the second input */ -static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) { bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -647,9 +658,9 @@ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input */ -static inline void cpumask_xor(struct cpumask *dstp, - const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +void cpumask_xor(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) { bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -663,9 +674,9 @@ static inline void cpumask_xor(struct cpumask *dstp, * * Return: false if *@dstp is empty, else returns true */ -static inline bool cpumask_andnot(struct cpumask *dstp, - const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) { return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -678,8 +689,8 @@ static inline bool cpumask_andnot(struct cpumask *dstp, * * Return: true if the cpumasks are equal, false if not */ -static inline bool cpumask_equal(const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -694,9 +705,9 @@ static inline bool cpumask_equal(const struct cpumask *src1p, * Return: true if first cpumask ORed with second cpumask == third cpumask, * otherwise false */ -static inline bool cpumask_or_equal(const struct cpumask *src1p, - const struct cpumask *src2p, - const struct cpumask *src3p) +static __always_inline +bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p, + const struct cpumask *src3p) { return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p), cpumask_bits(src3p), small_cpumask_bits); @@ -710,8 +721,8 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p, * Return: true if first cpumask ANDed with second cpumask is non-empty, * otherwise false */ -static inline bool cpumask_intersects(const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -724,8 +735,8 @@ static inline bool cpumask_intersects(const struct cpumask *src1p, * * Return: true if *@src1p is a subset of *@src2p, else returns false */ -static inline bool cpumask_subset(const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -737,7 +748,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p, * * Return: true if srcp is empty (has no bits set), else false */ -static inline bool cpumask_empty(const struct cpumask *srcp) +static __always_inline bool cpumask_empty(const struct cpumask *srcp) { return bitmap_empty(cpumask_bits(srcp), small_cpumask_bits); } @@ -748,7 +759,7 @@ static inline bool cpumask_empty(const struct cpumask *srcp) * * Return: true if srcp is full (has all bits set), else false */ -static inline bool cpumask_full(const struct cpumask *srcp) +static __always_inline bool cpumask_full(const struct cpumask *srcp) { return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits); } @@ -759,7 +770,7 @@ static inline bool cpumask_full(const struct cpumask *srcp) * * Return: count of bits set in *srcp */ -static inline unsigned int cpumask_weight(const struct cpumask *srcp) +static __always_inline unsigned int cpumask_weight(const struct cpumask *srcp) { return bitmap_weight(cpumask_bits(srcp), small_cpumask_bits); } @@ -771,8 +782,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) * * Return: count of bits set in both *srcp1 and *srcp2 */ -static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, - const struct cpumask *srcp2) +static __always_inline +unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } @@ -784,8 +795,9 @@ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, * * Return: count of bits set in both *srcp1 and *srcp2 */ -static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, - const struct cpumask *srcp2) +static __always_inline +unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, + const struct cpumask *srcp2) { return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } @@ -796,8 +808,8 @@ static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, * @srcp: the input to shift * @n: the number of bits to shift by */ -static inline void cpumask_shift_right(struct cpumask *dstp, - const struct cpumask *srcp, int n) +static __always_inline +void cpumask_shift_right(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n, small_cpumask_bits); @@ -809,8 +821,8 @@ static inline void cpumask_shift_right(struct cpumask *dstp, * @srcp: the input to shift * @n: the number of bits to shift by */ -static inline void cpumask_shift_left(struct cpumask *dstp, - const struct cpumask *srcp, int n) +static __always_inline +void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n, nr_cpumask_bits); @@ -821,8 +833,8 @@ static inline void cpumask_shift_left(struct cpumask *dstp, * @dstp: the result * @srcp: the input cpumask */ -static inline void cpumask_copy(struct cpumask *dstp, - const struct cpumask *srcp) +static __always_inline +void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp) { bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits); } @@ -858,8 +870,8 @@ static inline void cpumask_copy(struct cpumask *dstp, * * Return: -errno, or 0 for success. */ -static inline int cpumask_parse_user(const char __user *buf, int len, - struct cpumask *dstp) +static __always_inline +int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } @@ -872,8 +884,8 @@ static inline int cpumask_parse_user(const char __user *buf, int len, * * Return: -errno, or 0 for success. */ -static inline int cpumask_parselist_user(const char __user *buf, int len, - struct cpumask *dstp) +static __always_inline +int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parselist_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); @@ -886,7 +898,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, * * Return: -errno, or 0 for success. */ -static inline int cpumask_parse(const char *buf, struct cpumask *dstp) +static __always_inline int cpumask_parse(const char *buf, struct cpumask *dstp) { return bitmap_parse(buf, UINT_MAX, cpumask_bits(dstp), nr_cpumask_bits); } @@ -898,7 +910,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) * * Return: -errno, or 0 for success. */ -static inline int cpulist_parse(const char *buf, struct cpumask *dstp) +static __always_inline int cpulist_parse(const char *buf, struct cpumask *dstp) { return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits); } @@ -908,7 +920,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) * * Return: size to allocate for a &struct cpumask in bytes */ -static inline unsigned int cpumask_size(void) +static __always_inline unsigned int cpumask_size(void) { return bitmap_size(large_cpumask_bits); } @@ -920,7 +932,7 @@ static inline unsigned int cpumask_size(void) bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); -static inline +static __always_inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node); @@ -938,13 +950,13 @@ bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) * * Return: %true if allocation succeeded, %false if not */ -static inline +static __always_inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return alloc_cpumask_var_node(mask, flags, NUMA_NO_NODE); } -static inline +static __always_inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return alloc_cpumask_var(mask, flags | __GFP_ZERO); @@ -954,7 +966,7 @@ void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); void free_bootmem_cpumask_var(cpumask_var_t mask); -static inline bool cpumask_available(cpumask_var_t mask) +static __always_inline bool cpumask_available(cpumask_var_t mask) { return mask != NULL; } @@ -964,43 +976,43 @@ static inline bool cpumask_available(cpumask_var_t mask) #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) #define __cpumask_var_read_mostly -static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +static __always_inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return true; } -static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, +static __always_inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { return true; } -static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +static __always_inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { cpumask_clear(*mask); return true; } -static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, +static __always_inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { cpumask_clear(*mask); return true; } -static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) +static __always_inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) { } -static inline void free_cpumask_var(cpumask_var_t mask) +static __always_inline void free_cpumask_var(cpumask_var_t mask) { } -static inline void free_bootmem_cpumask_var(cpumask_var_t mask) +static __always_inline void free_bootmem_cpumask_var(cpumask_var_t mask) { } -static inline bool cpumask_available(cpumask_var_t mask) +static __always_inline bool cpumask_available(cpumask_var_t mask) { return true; } @@ -1058,7 +1070,7 @@ void set_cpu_online(unsigned int cpu, bool online); ((struct cpumask *)(1 ? (bitmap) \ : (void *)sizeof(__check_is_bitmap(bitmap)))) -static inline int __check_is_bitmap(const unsigned long *bitmap) +static __always_inline int __check_is_bitmap(const unsigned long *bitmap) { return 1; } @@ -1073,7 +1085,7 @@ static inline int __check_is_bitmap(const unsigned long *bitmap) extern const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; -static inline const struct cpumask *get_cpu_mask(unsigned int cpu) +static __always_inline const struct cpumask *get_cpu_mask(unsigned int cpu) { const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; p -= cpu / BITS_PER_LONG; @@ -1100,32 +1112,32 @@ static __always_inline unsigned int num_online_cpus(void) #define num_present_cpus() cpumask_weight(cpu_present_mask) #define num_active_cpus() cpumask_weight(cpu_active_mask) -static inline bool cpu_online(unsigned int cpu) +static __always_inline bool cpu_online(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_online_mask); } -static inline bool cpu_enabled(unsigned int cpu) +static __always_inline bool cpu_enabled(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_enabled_mask); } -static inline bool cpu_possible(unsigned int cpu) +static __always_inline bool cpu_possible(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_possible_mask); } -static inline bool cpu_present(unsigned int cpu) +static __always_inline bool cpu_present(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_present_mask); } -static inline bool cpu_active(unsigned int cpu) +static __always_inline bool cpu_active(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_active_mask); } -static inline bool cpu_dying(unsigned int cpu) +static __always_inline bool cpu_dying(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_dying_mask); } @@ -1138,32 +1150,32 @@ static inline bool cpu_dying(unsigned int cpu) #define num_present_cpus() 1U #define num_active_cpus() 1U -static inline bool cpu_online(unsigned int cpu) +static __always_inline bool cpu_online(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_possible(unsigned int cpu) +static __always_inline bool cpu_possible(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_enabled(unsigned int cpu) +static __always_inline bool cpu_enabled(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_present(unsigned int cpu) +static __always_inline bool cpu_present(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_active(unsigned int cpu) +static __always_inline bool cpu_active(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_dying(unsigned int cpu) +static __always_inline bool cpu_dying(unsigned int cpu) { return false; } @@ -1197,7 +1209,7 @@ static inline bool cpu_dying(unsigned int cpu) * Return: the length of the (null-terminated) @buf string, zero if * nothing is copied. */ -static inline ssize_t +static __always_inline ssize_t cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) { return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask), @@ -1220,9 +1232,9 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) * Return: the length of how many bytes have been copied, excluding * terminating '\0'. */ -static inline ssize_t -cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, - loff_t off, size_t count) +static __always_inline +ssize_t cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, + loff_t off, size_t count) { return bitmap_print_bitmask_to_buf(buf, cpumask_bits(mask), nr_cpu_ids, off, count) - 1; @@ -1242,9 +1254,9 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, * Return: the length of how many bytes have been copied, excluding * terminating '\0'. */ -static inline ssize_t -cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, - loff_t off, size_t count) +static __always_inline +ssize_t cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, + loff_t off, size_t count) { return bitmap_print_list_to_buf(buf, cpumask_bits(mask), nr_cpu_ids, off, count) - 1; From 54c9e0085bd1015e524d8f4d3c4e78a7bc77ffca Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 18 Jul 2024 17:50:40 -0700 Subject: [PATCH 022/655] nodemask: Switch from inline to __always_inline 'inline' keyword is only a recommendation for compiler. If it decides to not inline nodemask functions, the whole small_const_nbits() machinery doesn't work. This is how a standard GCC 11.3.0 does for my x86_64 build now. This patch replaces 'inline' directive with unconditional '__always_inline' to make sure that there's always a chance for compile-time optimization. It doesn't change size of kernel image, according to bloat-o-meter. [[ Brian: split out from: Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@gmail.com/ But rewritten, as there were too many conflicts. ]] Co-developed-by: Brian Norris Signed-off-by: Brian Norris Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Yury Norov --- include/linux/nodemask.h | 86 ++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index b61438313a73..9fd7a0ce9c1a 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -107,11 +107,11 @@ extern nodemask_t _unused_nodemask_arg_; */ #define nodemask_pr_args(maskp) __nodemask_pr_numnodes(maskp), \ __nodemask_pr_bits(maskp) -static inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) +static __always_inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) { return m ? MAX_NUMNODES : 0; } -static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) +static __always_inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) { return m ? m->bits : NULL; } @@ -132,19 +132,19 @@ static __always_inline void __node_set(int node, volatile nodemask_t *dstp) } #define node_clear(node, dst) __node_clear((node), &(dst)) -static inline void __node_clear(int node, volatile nodemask_t *dstp) +static __always_inline void __node_clear(int node, volatile nodemask_t *dstp) { clear_bit(node, dstp->bits); } #define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES) -static inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits) +static __always_inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits) { bitmap_fill(dstp->bits, nbits); } #define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES) -static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) +static __always_inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) { bitmap_zero(dstp->bits, nbits); } @@ -154,14 +154,14 @@ static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) #define node_test_and_set(node, nodemask) \ __node_test_and_set((node), &(nodemask)) -static inline bool __node_test_and_set(int node, nodemask_t *addr) +static __always_inline bool __node_test_and_set(int node, nodemask_t *addr) { return test_and_set_bit(node, addr->bits); } #define nodes_and(dst, src1, src2) \ __nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES) -static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, +static __always_inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); @@ -169,7 +169,7 @@ static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, #define nodes_or(dst, src1, src2) \ __nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES) -static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, +static __always_inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); @@ -177,7 +177,7 @@ static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, #define nodes_xor(dst, src1, src2) \ __nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES) -static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, +static __always_inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); @@ -185,7 +185,7 @@ static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, #define nodes_andnot(dst, src1, src2) \ __nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES) -static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, +static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); @@ -193,7 +193,7 @@ static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, #define nodes_complement(dst, src) \ __nodes_complement(&(dst), &(src), MAX_NUMNODES) -static inline void __nodes_complement(nodemask_t *dstp, +static __always_inline void __nodes_complement(nodemask_t *dstp, const nodemask_t *srcp, unsigned int nbits) { bitmap_complement(dstp->bits, srcp->bits, nbits); @@ -201,7 +201,7 @@ static inline void __nodes_complement(nodemask_t *dstp, #define nodes_equal(src1, src2) \ __nodes_equal(&(src1), &(src2), MAX_NUMNODES) -static inline bool __nodes_equal(const nodemask_t *src1p, +static __always_inline bool __nodes_equal(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_equal(src1p->bits, src2p->bits, nbits); @@ -209,7 +209,7 @@ static inline bool __nodes_equal(const nodemask_t *src1p, #define nodes_intersects(src1, src2) \ __nodes_intersects(&(src1), &(src2), MAX_NUMNODES) -static inline bool __nodes_intersects(const nodemask_t *src1p, +static __always_inline bool __nodes_intersects(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_intersects(src1p->bits, src2p->bits, nbits); @@ -217,33 +217,33 @@ static inline bool __nodes_intersects(const nodemask_t *src1p, #define nodes_subset(src1, src2) \ __nodes_subset(&(src1), &(src2), MAX_NUMNODES) -static inline bool __nodes_subset(const nodemask_t *src1p, +static __always_inline bool __nodes_subset(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_subset(src1p->bits, src2p->bits, nbits); } #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES) -static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) +static __always_inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) { return bitmap_empty(srcp->bits, nbits); } #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES) -static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) +static __always_inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) { return bitmap_full(srcp->bits, nbits); } #define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES) -static inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits) +static __always_inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits) { return bitmap_weight(srcp->bits, nbits); } #define nodes_shift_right(dst, src, n) \ __nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES) -static inline void __nodes_shift_right(nodemask_t *dstp, +static __always_inline void __nodes_shift_right(nodemask_t *dstp, const nodemask_t *srcp, int n, int nbits) { bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); @@ -251,7 +251,7 @@ static inline void __nodes_shift_right(nodemask_t *dstp, #define nodes_shift_left(dst, src, n) \ __nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES) -static inline void __nodes_shift_left(nodemask_t *dstp, +static __always_inline void __nodes_shift_left(nodemask_t *dstp, const nodemask_t *srcp, int n, int nbits) { bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); @@ -261,13 +261,13 @@ static inline void __nodes_shift_left(nodemask_t *dstp, > MAX_NUMNODES, then the silly min_ts could be dropped. */ #define first_node(src) __first_node(&(src)) -static inline unsigned int __first_node(const nodemask_t *srcp) +static __always_inline unsigned int __first_node(const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); } #define next_node(n, src) __next_node((n), &(src)) -static inline unsigned int __next_node(int n, const nodemask_t *srcp) +static __always_inline unsigned int __next_node(int n, const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); } @@ -277,7 +277,7 @@ static inline unsigned int __next_node(int n, const nodemask_t *srcp) * the first node in src if needed. Returns MAX_NUMNODES if src is empty. */ #define next_node_in(n, src) __next_node_in((n), &(src)) -static inline unsigned int __next_node_in(int node, const nodemask_t *srcp) +static __always_inline unsigned int __next_node_in(int node, const nodemask_t *srcp) { unsigned int ret = __next_node(node, srcp); @@ -286,7 +286,7 @@ static inline unsigned int __next_node_in(int node, const nodemask_t *srcp) return ret; } -static inline void init_nodemask_of_node(nodemask_t *mask, int node) +static __always_inline void init_nodemask_of_node(nodemask_t *mask, int node) { nodes_clear(*mask); node_set(node, *mask); @@ -304,7 +304,7 @@ static inline void init_nodemask_of_node(nodemask_t *mask, int node) }) #define first_unset_node(mask) __first_unset_node(&(mask)) -static inline unsigned int __first_unset_node(const nodemask_t *maskp) +static __always_inline unsigned int __first_unset_node(const nodemask_t *maskp) { return min_t(unsigned int, MAX_NUMNODES, find_first_zero_bit(maskp->bits, MAX_NUMNODES)); @@ -338,21 +338,21 @@ static inline unsigned int __first_unset_node(const nodemask_t *maskp) #define nodemask_parse_user(ubuf, ulen, dst) \ __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) -static inline int __nodemask_parse_user(const char __user *buf, int len, +static __always_inline int __nodemask_parse_user(const char __user *buf, int len, nodemask_t *dstp, int nbits) { return bitmap_parse_user(buf, len, dstp->bits, nbits); } #define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES) -static inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) +static __always_inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) { return bitmap_parselist(buf, dstp->bits, nbits); } #define node_remap(oldbit, old, new) \ __node_remap((oldbit), &(old), &(new), MAX_NUMNODES) -static inline int __node_remap(int oldbit, +static __always_inline int __node_remap(int oldbit, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); @@ -360,7 +360,7 @@ static inline int __node_remap(int oldbit, #define nodes_remap(dst, src, old, new) \ __nodes_remap(&(dst), &(src), &(old), &(new), MAX_NUMNODES) -static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, +static __always_inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); @@ -368,7 +368,7 @@ static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, #define nodes_onto(dst, orig, relmap) \ __nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES) -static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, +static __always_inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, const nodemask_t *relmapp, int nbits) { bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); @@ -376,7 +376,7 @@ static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, #define nodes_fold(dst, orig, sz) \ __nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES) -static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, +static __always_inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, int sz, int nbits) { bitmap_fold(dstp->bits, origp->bits, sz, nbits); @@ -418,22 +418,22 @@ enum node_states { extern nodemask_t node_states[NR_NODE_STATES]; #if MAX_NUMNODES > 1 -static inline int node_state(int node, enum node_states state) +static __always_inline int node_state(int node, enum node_states state) { return node_isset(node, node_states[state]); } -static inline void node_set_state(int node, enum node_states state) +static __always_inline void node_set_state(int node, enum node_states state) { __node_set(node, &node_states[state]); } -static inline void node_clear_state(int node, enum node_states state) +static __always_inline void node_clear_state(int node, enum node_states state) { __node_clear(node, &node_states[state]); } -static inline int num_node_state(enum node_states state) +static __always_inline int num_node_state(enum node_states state) { return nodes_weight(node_states[state]); } @@ -443,11 +443,11 @@ static inline int num_node_state(enum node_states state) #define first_online_node first_node(node_states[N_ONLINE]) #define first_memory_node first_node(node_states[N_MEMORY]) -static inline unsigned int next_online_node(int nid) +static __always_inline unsigned int next_online_node(int nid) { return next_node(nid, node_states[N_ONLINE]); } -static inline unsigned int next_memory_node(int nid) +static __always_inline unsigned int next_memory_node(int nid) { return next_node(nid, node_states[N_MEMORY]); } @@ -455,13 +455,13 @@ static inline unsigned int next_memory_node(int nid) extern unsigned int nr_node_ids; extern unsigned int nr_online_nodes; -static inline void node_set_online(int nid) +static __always_inline void node_set_online(int nid) { node_set_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } -static inline void node_set_offline(int nid) +static __always_inline void node_set_offline(int nid) { node_clear_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); @@ -469,20 +469,20 @@ static inline void node_set_offline(int nid) #else -static inline int node_state(int node, enum node_states state) +static __always_inline int node_state(int node, enum node_states state) { return node == 0; } -static inline void node_set_state(int node, enum node_states state) +static __always_inline void node_set_state(int node, enum node_states state) { } -static inline void node_clear_state(int node, enum node_states state) +static __always_inline void node_clear_state(int node, enum node_states state) { } -static inline int num_node_state(enum node_states state) +static __always_inline int num_node_state(enum node_states state) { return 1; } @@ -502,7 +502,7 @@ static inline int num_node_state(enum node_states state) #endif -static inline int node_random(const nodemask_t *maskp) +static __always_inline int node_random(const nodemask_t *maskp) { #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) int w, bit; From 7193b5c4758368c3a0de0cf4fa54bc8697caaf86 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 8 Apr 2024 17:43:37 -0400 Subject: [PATCH 023/655] alpha: no need to include asm/xchg.h twice We used to generate different helpers for local and full {cmp,}xchg(); these days the barriers are in arch_{cmp,}xchg() instead and generated helpers are identical for local and full cases. No need for those parametrized includes of asm/xchg.h - we might as well insert its contents directly in asm/cmpxchg.h and do it only once. Signed-off-by: Al Viro --- arch/alpha/include/asm/cmpxchg.h | 239 ++++++++++++++++++++++++++++-- arch/alpha/include/asm/xchg.h | 246 ------------------------------- 2 files changed, 223 insertions(+), 262 deletions(-) delete mode 100644 arch/alpha/include/asm/xchg.h diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h index 91d4a4d9258c..ae1b96479d0c 100644 --- a/arch/alpha/include/asm/cmpxchg.h +++ b/arch/alpha/include/asm/cmpxchg.h @@ -3,17 +3,232 @@ #define _ALPHA_CMPXCHG_H /* - * Atomic exchange routines. + * Atomic exchange. + * Since it can be used to implement critical sections + * it must clobber "memory" (also for interrupts in UP). */ -#define ____xchg(type, args...) __arch_xchg ## type ## _local(args) -#define ____cmpxchg(type, args...) __cmpxchg ## type ## _local(args) -#include +static inline unsigned long +____xchg_u8(volatile char *m, unsigned long val) +{ + unsigned long ret, tmp, addr64; + + __asm__ __volatile__( + " andnot %4,7,%3\n" + " insbl %1,%4,%1\n" + "1: ldq_l %2,0(%3)\n" + " extbl %2,%4,%0\n" + " mskbl %2,%4,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%3)\n" + " beq %2,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) + : "r" ((long)m), "1" (val) : "memory"); + + return ret; +} + +static inline unsigned long +____xchg_u16(volatile short *m, unsigned long val) +{ + unsigned long ret, tmp, addr64; + + __asm__ __volatile__( + " andnot %4,7,%3\n" + " inswl %1,%4,%1\n" + "1: ldq_l %2,0(%3)\n" + " extwl %2,%4,%0\n" + " mskwl %2,%4,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%3)\n" + " beq %2,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) + : "r" ((long)m), "1" (val) : "memory"); + + return ret; +} + +static inline unsigned long +____xchg_u32(volatile int *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + "1: ldl_l %0,%4\n" + " bis $31,%3,%1\n" + " stl_c %1,%2\n" + " beq %1,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (val), "=&r" (dummy), "=m" (*m) + : "rI" (val), "m" (*m) : "memory"); + + return val; +} + +static inline unsigned long +____xchg_u64(volatile long *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + "1: ldq_l %0,%4\n" + " bis $31,%3,%1\n" + " stq_c %1,%2\n" + " beq %1,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (val), "=&r" (dummy), "=m" (*m) + : "rI" (val), "m" (*m) : "memory"); + + return val; +} + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid xchg(). */ +extern void __xchg_called_with_bad_pointer(void); + +static __always_inline unsigned long +____xchg(volatile void *ptr, unsigned long x, int size) +{ + return + size == 1 ? ____xchg_u8(ptr, x) : + size == 2 ? ____xchg_u16(ptr, x) : + size == 4 ? ____xchg_u32(ptr, x) : + size == 8 ? ____xchg_u64(ptr, x) : + (__xchg_called_with_bad_pointer(), x); +} + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +static inline unsigned long +____cmpxchg_u8(volatile char *m, unsigned char old, unsigned char new) +{ + unsigned long prev, tmp, cmp, addr64; + + __asm__ __volatile__( + " andnot %5,7,%4\n" + " insbl %1,%5,%1\n" + "1: ldq_l %2,0(%4)\n" + " extbl %2,%5,%0\n" + " cmpeq %0,%6,%3\n" + " beq %3,2f\n" + " mskbl %2,%5,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%4)\n" + " beq %2,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) + : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + + return prev; +} + +static inline unsigned long +____cmpxchg_u16(volatile short *m, unsigned short old, unsigned short new) +{ + unsigned long prev, tmp, cmp, addr64; + + __asm__ __volatile__( + " andnot %5,7,%4\n" + " inswl %1,%5,%1\n" + "1: ldq_l %2,0(%4)\n" + " extwl %2,%5,%0\n" + " cmpeq %0,%6,%3\n" + " beq %3,2f\n" + " mskwl %2,%5,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%4)\n" + " beq %2,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) + : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + + return prev; +} + +static inline unsigned long +____cmpxchg_u32(volatile int *m, int old, int new) +{ + unsigned long prev, cmp; + + __asm__ __volatile__( + "1: ldl_l %0,%5\n" + " cmpeq %0,%3,%1\n" + " beq %1,2f\n" + " mov %4,%1\n" + " stl_c %1,%2\n" + " beq %1,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r"(prev), "=&r"(cmp), "=m"(*m) + : "r"((long) old), "r"(new), "m"(*m) : "memory"); + + return prev; +} + +static inline unsigned long +____cmpxchg_u64(volatile long *m, unsigned long old, unsigned long new) +{ + unsigned long prev, cmp; + + __asm__ __volatile__( + "1: ldq_l %0,%5\n" + " cmpeq %0,%3,%1\n" + " beq %1,2f\n" + " mov %4,%1\n" + " stq_c %1,%2\n" + " beq %1,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r"(prev), "=&r"(cmp), "=m"(*m) + : "r"((long) old), "r"(new), "m"(*m) : "memory"); + + return prev; +} + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid cmpxchg(). */ +extern void __cmpxchg_called_with_bad_pointer(void); + +static __always_inline unsigned long +____cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, + int size) +{ + return + size == 1 ? ____cmpxchg_u8(ptr, old, new) : + size == 2 ? ____cmpxchg_u16(ptr, old, new) : + size == 4 ? ____cmpxchg_u32(ptr, old, new) : + size == 8 ? ____cmpxchg_u64(ptr, old, new) : + (__cmpxchg_called_with_bad_pointer(), old); +} #define xchg_local(ptr, x) \ ({ \ __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __arch_xchg_local((ptr), (unsigned long)_x_,\ + (__typeof__(*(ptr))) ____xchg((ptr), (unsigned long)_x_, \ sizeof(*(ptr))); \ }) @@ -21,7 +236,7 @@ ({ \ __typeof__(*(ptr)) _o_ = (o); \ __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \ + (__typeof__(*(ptr))) ____cmpxchg((ptr), (unsigned long)_o_, \ (unsigned long)_n_, \ sizeof(*(ptr))); \ }) @@ -32,12 +247,6 @@ cmpxchg_local((ptr), (o), (n)); \ }) -#undef ____xchg -#undef ____cmpxchg -#define ____xchg(type, args...) __arch_xchg ##type(args) -#define ____cmpxchg(type, args...) __cmpxchg ##type(args) -#include - /* * The leading and the trailing memory barriers guarantee that these * operations are fully ordered. @@ -48,7 +257,7 @@ __typeof__(*(ptr)) _x_ = (x); \ smp_mb(); \ __ret = (__typeof__(*(ptr))) \ - __arch_xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ + ____xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ smp_mb(); \ __ret; \ }) @@ -59,7 +268,7 @@ __typeof__(*(ptr)) _o_ = (o); \ __typeof__(*(ptr)) _n_ = (n); \ smp_mb(); \ - __ret = (__typeof__(*(ptr))) __cmpxchg((ptr), \ + __ret = (__typeof__(*(ptr))) ____cmpxchg((ptr), \ (unsigned long)_o_, (unsigned long)_n_, sizeof(*(ptr)));\ smp_mb(); \ __ret; \ @@ -71,6 +280,4 @@ arch_cmpxchg((ptr), (o), (n)); \ }) -#undef ____cmpxchg - #endif /* _ALPHA_CMPXCHG_H */ diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h deleted file mode 100644 index 7adb80c6746a..000000000000 --- a/arch/alpha/include/asm/xchg.h +++ /dev/null @@ -1,246 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ALPHA_CMPXCHG_H -#error Do not include xchg.h directly! -#else -/* - * xchg/xchg_local and cmpxchg/cmpxchg_local share the same code - * except that local version do not have the expensive memory barrier. - * So this file is included twice from asm/cmpxchg.h. - */ - -/* - * Atomic exchange. - * Since it can be used to implement critical sections - * it must clobber "memory" (also for interrupts in UP). - */ - -static inline unsigned long -____xchg(_u8, volatile char *m, unsigned long val) -{ - unsigned long ret, tmp, addr64; - - __asm__ __volatile__( - " andnot %4,7,%3\n" - " insbl %1,%4,%1\n" - "1: ldq_l %2,0(%3)\n" - " extbl %2,%4,%0\n" - " mskbl %2,%4,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%3)\n" - " beq %2,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) - : "r" ((long)m), "1" (val) : "memory"); - - return ret; -} - -static inline unsigned long -____xchg(_u16, volatile short *m, unsigned long val) -{ - unsigned long ret, tmp, addr64; - - __asm__ __volatile__( - " andnot %4,7,%3\n" - " inswl %1,%4,%1\n" - "1: ldq_l %2,0(%3)\n" - " extwl %2,%4,%0\n" - " mskwl %2,%4,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%3)\n" - " beq %2,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) - : "r" ((long)m), "1" (val) : "memory"); - - return ret; -} - -static inline unsigned long -____xchg(_u32, volatile int *m, unsigned long val) -{ - unsigned long dummy; - - __asm__ __volatile__( - "1: ldl_l %0,%4\n" - " bis $31,%3,%1\n" - " stl_c %1,%2\n" - " beq %1,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (val), "=&r" (dummy), "=m" (*m) - : "rI" (val), "m" (*m) : "memory"); - - return val; -} - -static inline unsigned long -____xchg(_u64, volatile long *m, unsigned long val) -{ - unsigned long dummy; - - __asm__ __volatile__( - "1: ldq_l %0,%4\n" - " bis $31,%3,%1\n" - " stq_c %1,%2\n" - " beq %1,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (val), "=&r" (dummy), "=m" (*m) - : "rI" (val), "m" (*m) : "memory"); - - return val; -} - -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid xchg(). */ -extern void __xchg_called_with_bad_pointer(void); - -static __always_inline unsigned long -____xchg(, volatile void *ptr, unsigned long x, int size) -{ - switch (size) { - case 1: - return ____xchg(_u8, ptr, x); - case 2: - return ____xchg(_u16, ptr, x); - case 4: - return ____xchg(_u32, ptr, x); - case 8: - return ____xchg(_u64, ptr, x); - } - __xchg_called_with_bad_pointer(); - return x; -} - -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ - -static inline unsigned long -____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new) -{ - unsigned long prev, tmp, cmp, addr64; - - __asm__ __volatile__( - " andnot %5,7,%4\n" - " insbl %1,%5,%1\n" - "1: ldq_l %2,0(%4)\n" - " extbl %2,%5,%0\n" - " cmpeq %0,%6,%3\n" - " beq %3,2f\n" - " mskbl %2,%5,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%4)\n" - " beq %2,3f\n" - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) - : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); - - return prev; -} - -static inline unsigned long -____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new) -{ - unsigned long prev, tmp, cmp, addr64; - - __asm__ __volatile__( - " andnot %5,7,%4\n" - " inswl %1,%5,%1\n" - "1: ldq_l %2,0(%4)\n" - " extwl %2,%5,%0\n" - " cmpeq %0,%6,%3\n" - " beq %3,2f\n" - " mskwl %2,%5,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%4)\n" - " beq %2,3f\n" - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) - : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); - - return prev; -} - -static inline unsigned long -____cmpxchg(_u32, volatile int *m, int old, int new) -{ - unsigned long prev, cmp; - - __asm__ __volatile__( - "1: ldl_l %0,%5\n" - " cmpeq %0,%3,%1\n" - " beq %1,2f\n" - " mov %4,%1\n" - " stl_c %1,%2\n" - " beq %1,3f\n" - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r"(prev), "=&r"(cmp), "=m"(*m) - : "r"((long) old), "r"(new), "m"(*m) : "memory"); - - return prev; -} - -static inline unsigned long -____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new) -{ - unsigned long prev, cmp; - - __asm__ __volatile__( - "1: ldq_l %0,%5\n" - " cmpeq %0,%3,%1\n" - " beq %1,2f\n" - " mov %4,%1\n" - " stq_c %1,%2\n" - " beq %1,3f\n" - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r"(prev), "=&r"(cmp), "=m"(*m) - : "r"((long) old), "r"(new), "m"(*m) : "memory"); - - return prev; -} - -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid cmpxchg(). */ -extern void __cmpxchg_called_with_bad_pointer(void); - -static __always_inline unsigned long -____cmpxchg(, volatile void *ptr, unsigned long old, unsigned long new, - int size) -{ - switch (size) { - case 1: - return ____cmpxchg(_u8, ptr, old, new); - case 2: - return ____cmpxchg(_u16, ptr, old, new); - case 4: - return ____cmpxchg(_u32, ptr, old, new); - case 8: - return ____cmpxchg(_u64, ptr, old, new); - } - __cmpxchg_called_with_bad_pointer(); - return old; -} - -#endif From 92a10d3861491d09e73b35db7113dd049659f588 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 30 Jul 2024 22:15:16 +0200 Subject: [PATCH 024/655] runtime constants: move list of constants to vmlinux.lds.h Refactor the list of constant variables into a macro. This should make it easier to add more constants in the future. Signed-off-by: Jann Horn Reviewed-by: Alexander Gordeev Reviewed-by: Thomas Gleixner Acked-by: Arnd Bergmann Acked-by: Will Deacon Signed-off-by: Arnd Bergmann --- arch/arm64/kernel/vmlinux.lds.S | 3 +-- arch/s390/kernel/vmlinux.lds.S | 3 +-- arch/x86/kernel/vmlinux.lds.S | 3 +-- include/asm-generic/vmlinux.lds.h | 4 ++++ 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 55a8e310ea12..58d89d997d05 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -264,8 +264,7 @@ SECTIONS EXIT_DATA } - RUNTIME_CONST(shift, d_hash_shift) - RUNTIME_CONST(ptr, dentry_hashtable) + RUNTIME_CONST_VARIABLES PERCPU_SECTION(L1_CACHE_BYTES) HYPERVISOR_PERCPU_SECTION diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index e67cd409b858..5e2e44596975 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -191,8 +191,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) - RUNTIME_CONST(shift, d_hash_shift) - RUNTIME_CONST(ptr, dentry_hashtable) + RUNTIME_CONST_VARIABLES PERCPU_SECTION(0x100) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 6e73403e874f..6726be89b7a6 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -357,8 +357,7 @@ SECTIONS PERCPU_SECTION(INTERNODE_CACHE_BYTES) #endif - RUNTIME_CONST(shift, d_hash_shift) - RUNTIME_CONST(ptr, dentry_hashtable) + RUNTIME_CONST_VARIABLES . = ALIGN(PAGE_SIZE); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 1ae44793132a..2157d8802351 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -918,6 +918,10 @@ #define RUNTIME_CONST(t,x) NAMED_SECTION(runtime_##t##_##x) +#define RUNTIME_CONST_VARIABLES \ + RUNTIME_CONST(shift, d_hash_shift) \ + RUNTIME_CONST(ptr, dentry_hashtable) + /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */ #define KUNIT_TABLE() \ . = ALIGN(8); \ From 09c38ad044e61f4ef3b321669361b161d5a5242f Mon Sep 17 00:00:00 2001 From: Hariharan Mari Date: Thu, 4 Jul 2024 13:14:00 +0200 Subject: [PATCH 025/655] KVM: s390: Fix SORTL and DFLTCC instruction format error in __insn32_query The __insn32_query() function incorrectly uses the RRF instruction format for both the SORTL (RRE format) and DFLTCC (RRF format) instructions. To fix this issue, add separate query functions for SORTL and DFLTCC that use the appropriate instruction formats. Additionally pass the query operand as a pointer to the entire array of 32 elements to slightly optimize performance and readability. Fixes: d668139718a9 ("KVM: s390: provide query function for instructions returning 32 byte") Suggested-by: Heiko Carstens Reviewed-by: Juergen Christ Signed-off-by: Hariharan Mari Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0fd96860fc45..bb7134faaebf 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -348,20 +348,29 @@ static inline int plo_test_bit(unsigned char nr) return cc == 0; } -static __always_inline void __insn32_query(unsigned int opcode, u8 *query) +static __always_inline void __sortl_query(u8 (*query)[32]) { asm volatile( " lghi 0,0\n" - " lgr 1,%[query]\n" + " la 1,%[query]\n" /* Parameter registers are ignored */ - " .insn rrf,%[opc] << 16,2,4,6,0\n" + " .insn rre,0xb9380000,2,4\n" + : [query] "=R" (*query) : - : [query] "d" ((unsigned long)query), [opc] "i" (opcode) - : "cc", "memory", "0", "1"); + : "cc", "0", "1"); } -#define INSN_SORTL 0xb938 -#define INSN_DFLTCC 0xb939 +static __always_inline void __dfltcc_query(u8 (*query)[32]) +{ + asm volatile( + " lghi 0,0\n" + " la 1,%[query]\n" + /* Parameter registers are ignored */ + " .insn rrf,0xb9390000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} static void __init kvm_s390_cpu_feat_init(void) { @@ -415,10 +424,10 @@ static void __init kvm_s390_cpu_feat_init(void) kvm_s390_available_subfunc.kdsa); if (test_facility(150)) /* SORTL */ - __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); + __sortl_query(&kvm_s390_available_subfunc.sortl); if (test_facility(151)) /* DFLTCC */ - __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); + __dfltcc_query(&kvm_s390_available_subfunc.dfltcc); if (MACHINE_HAS_ESOP) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); From 252b6fd2e186b793b960fa28ffccb07ccc4d5f51 Mon Sep 17 00:00:00 2001 From: Christoph Schlameuss Date: Wed, 7 Aug 2024 17:45:03 +0200 Subject: [PATCH 026/655] selftests: kvm: s390: Define page sizes in shared header Multiple test cases need page size and shift definitions. By moving the definitions to a single architecture specific header we limit the repetition. Make use of PAGE_SIZE, PAGE_SHIFT and PAGE_MASK defines in existing code. Signed-off-by: Christoph Schlameuss Reviewed-by: Claudio Imbrenda Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20240807154512.316936-2-schlameuss@linux.ibm.com Signed-off-by: Janosch Frank Message-ID: <20240807154512.316936-2-schlameuss@linux.ibm.com> --- tools/testing/selftests/kvm/include/s390x/processor.h | 5 +++++ tools/testing/selftests/kvm/lib/s390x/processor.c | 10 +++++----- tools/testing/selftests/kvm/s390x/cmma_test.c | 7 ++++--- tools/testing/selftests/kvm/s390x/memop.c | 4 +--- tools/testing/selftests/kvm/s390x/tprot.c | 5 ++--- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h index 255c9b990f4c..481bd2fd6a32 100644 --- a/tools/testing/selftests/kvm/include/s390x/processor.h +++ b/tools/testing/selftests/kvm/include/s390x/processor.h @@ -21,6 +21,11 @@ #define PAGE_PROTECT 0x200 /* HW read-only bit */ #define PAGE_NOEXEC 0x100 /* HW no-execute bit */ +/* Page size definitions */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE BIT_ULL(PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + /* Is there a portable way to do this? */ static inline void cpu_relax(void) { diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 4ad4492eea1d..20cfe970e3e3 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -14,7 +14,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) { vm_paddr_t paddr; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); if (vm->pgd_created) @@ -79,7 +79,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) } /* Fill in page table entry */ - idx = (gva >> 12) & 0x0ffu; /* page index */ + idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ if (!(entry[idx] & PAGE_INVALID)) fprintf(stderr, "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa); @@ -91,7 +91,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) int ri, idx; uint64_t *entry; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); entry = addr_gpa2hva(vm, vm->pgd); @@ -103,7 +103,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); } - idx = (gva >> 12) & 0x0ffu; /* page index */ + idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ TEST_ASSERT(!(entry[idx] & PAGE_INVALID), "No page mapping for vm virtual address 0x%lx", gva); @@ -168,7 +168,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) struct kvm_sregs sregs; struct kvm_vcpu *vcpu; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); stack_vaddr = __vm_vaddr_alloc(vm, stack_size, diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c index b39033844756..e32dd59703a0 100644 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ b/tools/testing/selftests/kvm/s390x/cmma_test.c @@ -17,16 +17,17 @@ #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" +#include "processor.h" #define MAIN_PAGE_COUNT 512 #define TEST_DATA_PAGE_COUNT 512 #define TEST_DATA_MEMSLOT 1 -#define TEST_DATA_START_GFN 4096 +#define TEST_DATA_START_GFN PAGE_SIZE #define TEST_DATA_TWO_PAGE_COUNT 256 #define TEST_DATA_TWO_MEMSLOT 2 -#define TEST_DATA_TWO_START_GFN 8192 +#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE) static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT]; @@ -66,7 +67,7 @@ static void guest_dirty_test_data(void) " lghi 5,%[page_count]\n" /* r5 += r1 */ "2: agfr 5,1\n" - /* r2 = r1 << 12 */ + /* r2 = r1 << PAGE_SHIFT */ "1: sllg 2,1,12(0)\n" /* essa(r4, r2, SET_STABLE) */ " .insn rrf,0xb9ab0000,4,2,1,0\n" diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index f2df7416be84..4374b4cd2a80 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -16,6 +16,7 @@ #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" +#include "processor.h" enum mop_target { LOGICAL, @@ -226,9 +227,6 @@ static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, #define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1ULL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE - 1)) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c index 7a742a673b7c..12d5e1cb62e3 100644 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ b/tools/testing/selftests/kvm/s390x/tprot.c @@ -9,9 +9,8 @@ #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" +#include "processor.h" -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1 << PAGE_SHIFT) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) @@ -151,7 +150,7 @@ static enum stage perform_next_stage(int *i, bool mapped_0) * instead. * In order to skip these tests we detect this inside the guest */ - skip = tests[*i].addr < (void *)4096 && + skip = tests[*i].addr < (void *)PAGE_SIZE && tests[*i].expected != TRANSL_UNAVAIL && !mapped_0; if (!skip) { From 845482188e3890269927c47316bcbacee9f71a3f Mon Sep 17 00:00:00 2001 From: Christoph Schlameuss Date: Wed, 7 Aug 2024 17:45:04 +0200 Subject: [PATCH 027/655] selftests: kvm: s390: Add kvm_s390_sie_block definition for userspace tests Subsequent tests do require direct manipulation of the SIE control block. This commit introduces the SIE control block definition for use within the selftests. There are already definitions of this within the kernel. This differs in two ways. * This is the first definition of this in userspace. * In the context of the selftests this does not require atomicity for the flags. With the userspace definition of the SIE block layout now being present we can reuse the values in other tests where applicable. Signed-off-by: Christoph Schlameuss Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20240807154512.316936-3-schlameuss@linux.ibm.com Signed-off-by: Janosch Frank Message-ID: <20240807154512.316936-3-schlameuss@linux.ibm.com> --- .../testing/selftests/kvm/include/s390x/sie.h | 240 ++++++++++++++++++ .../testing/selftests/kvm/s390x/debug_test.c | 4 +- 2 files changed, 242 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/s390x/sie.h diff --git a/tools/testing/selftests/kvm/include/s390x/sie.h b/tools/testing/selftests/kvm/include/s390x/sie.h new file mode 100644 index 000000000000..160acd4a1db9 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/sie.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Definition for kernel virtual machines on s390. + * + * Adapted copy of struct definition kvm_s390_sie_block from + * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs. + * + * Copyright IBM Corp. 2008, 2024 + * + * Authors: + * Christoph Schlameuss + * Carsten Otte + */ + +#ifndef SELFTEST_KVM_SIE_H +#define SELFTEST_KVM_SIE_H + +#include + +struct kvm_s390_sie_block { +#define CPUSTAT_STOPPED 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_KSS 0x00000200 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_IBS 0x00000040 +#define CPUSTAT_GED2 0x00000010 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + __u32 cpuflags; /* 0x0000 */ + __u32: 1; /* 0x0004 */ + __u32 prefix : 18; + __u32: 1; + __u32 ibc : 12; + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE BIT(0) + __u32 prog0c; /* 0x000c */ + union { + __u8 reserved10[16]; /* 0x0010 */ + struct { + __u64 pv_handle_cpu; + __u64 pv_handle_config; + }; + }; +#define PROG_BLOCK_SIE BIT(0) +#define PROG_REQUEST BIT(1) + __u32 prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u32 svcc; /* 0x0040 */ +#define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 +#define LCTL_CR9 0x0040 +#define LCTL_CR10 0x0020 +#define LCTL_CR11 0x0010 +#define LCTL_CR14 0x0002 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 +#define ICTL_PINT 0x20000000 +#define ICTL_LPSW 0x00400000 +#define ICTL_STCTL 0x00040000 +#define ICTL_ISKE 0x00004000 +#define ICTL_SSKE 0x00002000 +#define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 + __u32 ictl; /* 0x0048 */ +#define ECA_CEI 0x80000000 +#define ECA_IB 0x40000000 +#define ECA_SIGPI 0x10000000 +#define ECA_MVPGI 0x01000000 +#define ECA_AIV 0x00200000 +#define ECA_VX 0x00020000 +#define ECA_PROTEXCI 0x00002000 +#define ECA_APIE 0x00000008 +#define ECA_SII 0x00000001 + __u32 eca; /* 0x004c */ +#define ICPT_INST 0x04 +#define ICPT_PROGI 0x08 +#define ICPT_INSTPROGI 0x0C +#define ICPT_EXTREQ 0x10 +#define ICPT_EXTINT 0x14 +#define ICPT_IOREQ 0x18 +#define ICPT_WAIT 0x1c +#define ICPT_VALIDITY 0x20 +#define ICPT_STOP 0x28 +#define ICPT_OPEREXC 0x2C +#define ICPT_PARTEXEC 0x38 +#define ICPT_IOINST 0x40 +#define ICPT_KSS 0x5c +#define ICPT_MCHKREQ 0x60 +#define ICPT_INT_ENABLE 0x64 +#define ICPT_PV_INSTR 0x68 +#define ICPT_PV_NOTIFY 0x6c +#define ICPT_PV_PREF 0x70 + __u8 icptcode; /* 0x0050 */ + __u8 icptstatus; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54; /* 0x0054 */ +#define IICTL_CODE_NONE 0x00 +#define IICTL_CODE_MCHK 0x01 +#define IICTL_CODE_EXT 0x02 +#define IICTL_CODE_IO 0x03 +#define IICTL_CODE_RESTART 0x04 +#define IICTL_CODE_SPECIFICATION 0x10 +#define IICTL_CODE_OPERAND 0x11 + __u8 iictl; /* 0x0055 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ +#define ECB_GS 0x40 +#define ECB_TE 0x10 +#define ECB_SPECI 0x08 +#define ECB_SRSI 0x04 +#define ECB_HOSTPROTINT 0x02 +#define ECB_PTF 0x01 + __u8 ecb; /* 0x0061 */ +#define ECB2_CMMA 0x80 +#define ECB2_IEP 0x20 +#define ECB2_PFMFI 0x08 +#define ECB2_ESCA 0x04 +#define ECB2_ZPCI_LSI 0x02 + __u8 ecb2; /* 0x0062 */ +#define ECB3_AISI 0x20 +#define ECB3_AISII 0x10 +#define ECB3_DEA 0x08 +#define ECB3_AES 0x04 +#define ECB3_RI 0x01 + __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU + __u32 scaol; /* 0x0064 */ + __u8 sdf; /* 0x0068 */ + __u8 epdx; /* 0x0069 */ + __u8 cpnc; /* 0x006a */ + __u8 reserved6b; /* 0x006b */ + __u32 todpr; /* 0x006c */ +#define GISA_FORMAT1 0x00000001 + __u32 gd; /* 0x0070 */ + __u8 reserved74[12]; /* 0x0074 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ + __u64 psw_mask; /* 0x0090 */ + __u64 psw_addr; /* 0x0098 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[8]; /* 0x00b0 */ +#define HPID_KVM 0x4 +#define HPID_VSIE 0x5 + __u8 hpid; /* 0x00b8 */ + __u8 reservedb9[7]; /* 0x00b9 */ + union { + struct { + __u32 eiparams; /* 0x00c0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + }; + __u64 mcic; /* 0x00c0 */ + } __packed; + __u32 reservedc8; /* 0x00c8 */ + union { + struct { + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + }; + __u32 edc; /* 0x00cc */ + } __packed; + union { + struct { + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + }; + __u64 faddr; /* 0x00d0 */ + } __packed; + __u64 peraddr; /* 0x00d8 */ + __u8 eai; /* 0x00e0 */ + __u8 peraid; /* 0x00e1 */ + __u8 oai; /* 0x00e2 */ + __u8 armid; /* 0x00e3 */ + __u8 reservede4[4]; /* 0x00e4 */ + union { + __u64 tecmc; /* 0x00e8 */ + struct { + __u16 subchannel_id; /* 0x00e8 */ + __u16 subchannel_nr; /* 0x00ea */ + __u32 io_int_parm; /* 0x00ec */ + __u32 io_int_word; /* 0x00f0 */ + }; + } __packed; + __u8 reservedf4[8]; /* 0x00f4 */ +#define CRYCB_FORMAT_MASK 0x00000003 +#define CRYCB_FORMAT0 0x00000000 +#define CRYCB_FORMAT1 0x00000001 +#define CRYCB_FORMAT2 0x00000003 + __u32 crycbd; /* 0x00fc */ + __u64 gcr[16]; /* 0x0100 */ + union { + __u64 gbea; /* 0x0180 */ + __u64 sidad; + }; + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[8]; /* 0x01c0 */ +#define ECD_HOSTREGMGMT 0x20000000 +#define ECD_MEF 0x08000000 +#define ECD_ETOKENF 0x02000000 +#define ECD_ECC 0x00200000 + __u32 ecd; /* 0x01c8 */ + __u8 reserved1cc[18]; /* 0x01cc */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ + __u64 itdba; /* 0x01e8 */ + __u64 riccbd; /* 0x01f0 */ + __u64 gvrd; /* 0x01f8 */ +} __packed __aligned(512); + +#endif /* SELFTEST_KVM_SIE_H */ diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c index 84313fb27529..ad8095968601 100644 --- a/tools/testing/selftests/kvm/s390x/debug_test.c +++ b/tools/testing/selftests/kvm/s390x/debug_test.c @@ -2,12 +2,12 @@ /* Test KVM debugging features. */ #include "kvm_util.h" #include "test_util.h" +#include "sie.h" #include #define __LC_SVC_NEW_PSW 0x1c0 #define __LC_PGM_NEW_PSW 0x1d0 -#define ICPT_INSTRUCTION 0x04 #define IPA0_DIAG 0x8300 #define PGM_SPECIFICATION 0x06 @@ -85,7 +85,7 @@ static void test_step_pgm_diag(void) vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code, __LC_PGM_NEW_PSW, new_psw); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); - TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST); TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG); vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq); vcpu_run(vcpu); From 011901fc222435f5bf2d6accb392f749ddebdfe7 Mon Sep 17 00:00:00 2001 From: Christoph Schlameuss Date: Wed, 7 Aug 2024 17:45:05 +0200 Subject: [PATCH 028/655] selftests: kvm: s390: Add s390x ucontrol test suite with hpage test Add test suite to validate the s390x architecture specific ucontrol KVM interface. Make use of the selftest test harness. * uc_cap_hpage testcase verifies that a ucontrol VM cannot be run with hugepages. To allow testing of the ucontrol interface the kernel needs a non-default config containing CONFIG_KVM_S390_UCONTROL. This config needs to be set to built-in (y) as this cannot be built as module. Signed-off-by: Christoph Schlameuss Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20240807154512.316936-4-schlameuss@linux.ibm.com Signed-off-by: Janosch Frank Message-ID: <20240807154512.316936-4-schlameuss@linux.ibm.com> --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/s390x/config | 2 + .../selftests/kvm/s390x/ucontrol_test.c | 76 +++++++++++++++++++ 4 files changed, 80 insertions(+) create mode 100644 tools/testing/selftests/kvm/s390x/config create mode 100644 tools/testing/selftests/kvm/s390x/ucontrol_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 6d9381d60172..f2a30a58cd71 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -5,3 +5,4 @@ !*.h !*.S !*.sh +!config diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 48d32c5aa3eb..b3dc0a5bf0d4 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -186,6 +186,7 @@ TEST_GEN_PROGS_s390x += s390x/tprot TEST_GEN_PROGS_s390x += s390x/cmma_test TEST_GEN_PROGS_s390x += s390x/debug_test TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test +TEST_GEN_PROGS_s390x += s390x/ucontrol_test TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += guest_print_test diff --git a/tools/testing/selftests/kvm/s390x/config b/tools/testing/selftests/kvm/s390x/config new file mode 100644 index 000000000000..23270f2d679f --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/config @@ -0,0 +1,2 @@ +CONFIG_KVM=y +CONFIG_KVM_S390_UCONTROL=y diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c new file mode 100644 index 000000000000..cd68e7e37d35 --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/ucontrol_test.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test code for the s390x kvm ucontrol interface + * + * Copyright IBM Corp. 2024 + * + * Authors: + * Christoph Schlameuss + */ +#include "kselftest_harness.h" +#include "kvm_util.h" + +#include +#include + +/* so directly declare capget to check caps without libcap */ +int capget(cap_user_header_t header, cap_user_data_t data); + +/** + * In order to create user controlled virtual machines on S390, + * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL + * as privileged user (SYS_ADMIN). + */ +void require_ucontrol_admin(void) +{ + struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3]; + struct __user_cap_header_struct hdr = { + .version = _LINUX_CAPABILITY_VERSION_3, + }; + int rc; + + rc = capget(&hdr, data); + TEST_ASSERT_EQ(0, rc); + TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL)); +} + +/** + * Assert HPAGE CAP cannot be enabled on UCONTROL VM + */ +TEST(uc_cap_hpage) +{ + int rc, kvm_fd, vm_fd, vcpu_fd; + struct kvm_enable_cap cap = { + .cap = KVM_CAP_S390_HPAGE_1M, + }; + + require_ucontrol_admin(); + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); + ASSERT_GE(vm_fd, 0); + + /* assert hpages are not supported on ucontrol vm */ + rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M); + EXPECT_EQ(0, rc); + + /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */ + rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); + + /* assert HPAGE CAP is rejected after vCPU creation */ + vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); + ASSERT_GE(vcpu_fd, 0); + rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EBUSY, errno); + + close(vcpu_fd); + close(vm_fd); + close(kvm_fd); +} + +TEST_HARNESS_MAIN From d4f8592f6c42a6df7f570be6c4ccf8bd40838aab Mon Sep 17 00:00:00 2001 From: Christoph Schlameuss Date: Wed, 7 Aug 2024 17:45:06 +0200 Subject: [PATCH 029/655] selftests: kvm: s390: Add test fixture and simple VM setup tests Add a uc_kvm fixture to create and destroy a ucontrol VM. * uc_sie_assertions asserts basic settings in the SIE as setup by the kernel. * uc_attr_mem_limit asserts the memory limit is max value and cannot be set (not supported). * uc_no_dirty_log asserts dirty log is not supported. Signed-off-by: Christoph Schlameuss Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20240807154512.316936-5-schlameuss@linux.ibm.com Signed-off-by: Janosch Frank Message-ID: <20240807154512.316936-5-schlameuss@linux.ibm.com> --- .../selftests/kvm/s390x/ucontrol_test.c | 131 ++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c index cd68e7e37d35..d103a92e7495 100644 --- a/tools/testing/selftests/kvm/s390x/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390x/ucontrol_test.c @@ -9,10 +9,14 @@ */ #include "kselftest_harness.h" #include "kvm_util.h" +#include "processor.h" +#include "sie.h" #include #include +#define VM_MEM_SIZE (4 * SZ_1M) + /* so directly declare capget to check caps without libcap */ int capget(cap_user_header_t header, cap_user_data_t data); @@ -36,6 +40,133 @@ void require_ucontrol_admin(void) TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL)); } +FIXTURE(uc_kvm) +{ + struct kvm_s390_sie_block *sie_block; + struct kvm_run *run; + uintptr_t base_gpa; + uintptr_t code_gpa; + uintptr_t base_hva; + uintptr_t code_hva; + int kvm_run_size; + void *vm_mem; + int vcpu_fd; + int kvm_fd; + int vm_fd; +}; + +/** + * create VM with single vcpu, map kvm_run and SIE control block for easy access + */ +FIXTURE_SETUP(uc_kvm) +{ + struct kvm_s390_vm_cpu_processor info; + int rc; + + require_ucontrol_admin(); + + self->kvm_fd = open_kvm_dev_path_or_exit(); + self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); + ASSERT_GE(self->vm_fd, 0); + + kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_PROCESSOR, &info); + TH_LOG("create VM 0x%llx", info.cpuid); + + self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0); + ASSERT_GE(self->vcpu_fd, 0); + + self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); + ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run)) + TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size)); + self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size, + PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); + ASSERT_NE(self->run, MAP_FAILED); + /** + * For virtual cpus that have been created with S390 user controlled + * virtual machines, the resulting vcpu fd can be memory mapped at page + * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of + * the virtual cpu's hardware control block. + */ + self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, + self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); + ASSERT_NE(self->sie_block, MAP_FAILED); + + TH_LOG("VM created %p %p", self->run, self->sie_block); + + self->base_gpa = 0; + self->code_gpa = self->base_gpa + (3 * SZ_1M); + + self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_SIZE); + ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno); + self->base_hva = (uintptr_t)self->vm_mem; + self->code_hva = self->base_hva - self->base_gpa + self->code_gpa; + struct kvm_s390_ucas_mapping map = { + .user_addr = self->base_hva, + .vcpu_addr = self->base_gpa, + .length = VM_MEM_SIZE, + }; + TH_LOG("ucas map %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); + ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s", + rc, strerror(errno)); + + TH_LOG("page in %p", (void *)self->base_gpa); + rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa); + ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s", + (void *)self->base_hva, rc, strerror(errno)); + + self->sie_block->cpuflags &= ~CPUSTAT_STOPPED; +} + +FIXTURE_TEARDOWN(uc_kvm) +{ + munmap(self->sie_block, PAGE_SIZE); + munmap(self->run, self->kvm_run_size); + close(self->vcpu_fd); + close(self->vm_fd); + close(self->kvm_fd); + free(self->vm_mem); +} + +TEST_F(uc_kvm, uc_sie_assertions) +{ + /* assert interception of Code 08 (Program Interruption) is set */ + EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI); +} + +TEST_F(uc_kvm, uc_attr_mem_limit) +{ + u64 limit; + struct kvm_device_attr attr = { + .group = KVM_S390_VM_MEM_CTRL, + .attr = KVM_S390_VM_MEM_LIMIT_SIZE, + .addr = (unsigned long)&limit, + }; + int rc; + + rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr); + EXPECT_EQ(0, rc); + EXPECT_EQ(~0UL, limit); + + /* assert set not supported */ + rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +TEST_F(uc_kvm, uc_no_dirty_log) +{ + struct kvm_dirty_log dlog; + int rc; + + rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + /** * Assert HPAGE CAP cannot be enabled on UCONTROL VM */ From 100932fc37d468bfa80b5675b653ff65f7bafba7 Mon Sep 17 00:00:00 2001 From: Christoph Schlameuss Date: Wed, 7 Aug 2024 17:45:07 +0200 Subject: [PATCH 030/655] selftests: kvm: s390: Add debug print functions Add functions to simply print some basic state information in selftests. The output can be enabled by setting: #define TH_LOG_ENABLED 1 #define DEBUG 1 * print_psw: current SIE state description and VM run state * print_hex_bytes: print memory with some counting markers * print_hex: PRINT_HEX with 512 bytes * print_run: use print_psw and print_hex to print contents of VM run state and SIE state description * print_regs: print content of general and control registers All prints use pr_debug for the output and can be configured using DEBUG. Signed-off-by: Christoph Schlameuss Acked-by: Janosch Frank Link: https://lore.kernel.org/r/20240807154512.316936-6-schlameuss@linux.ibm.com Signed-off-by: Janosch Frank Message-ID: <20240807154512.316936-6-schlameuss@linux.ibm.com> --- .../selftests/kvm/include/s390x/debug_print.h | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tools/testing/selftests/kvm/include/s390x/debug_print.h diff --git a/tools/testing/selftests/kvm/include/s390x/debug_print.h b/tools/testing/selftests/kvm/include/s390x/debug_print.h new file mode 100644 index 000000000000..1bf275631cc6 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/debug_print.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Definition for kernel virtual machines on s390x + * + * Copyright IBM Corp. 2024 + * + * Authors: + * Christoph Schlameuss + */ + +#ifndef SELFTEST_KVM_DEBUG_PRINT_H +#define SELFTEST_KVM_DEBUG_PRINT_H + +#include "asm/ptrace.h" +#include "kvm_util.h" +#include "sie.h" + +static inline void print_hex_bytes(const char *name, u64 addr, size_t len) +{ + u64 pos; + + pr_debug("%s (%p)\n", name, (void *)addr); + pr_debug(" 0/0x00---------|"); + if (len > 8) + pr_debug(" 8/0x08---------|"); + if (len > 16) + pr_debug(" 16/0x10--------|"); + if (len > 24) + pr_debug(" 24/0x18--------|"); + for (pos = 0; pos < len; pos += 8) { + if ((pos % 32) == 0) + pr_debug("\n %3lu 0x%.3lx ", pos, pos); + pr_debug(" %16lx", *((u64 *)(addr + pos))); + } + pr_debug("\n"); +} + +static inline void print_hex(const char *name, u64 addr) +{ + print_hex_bytes(name, addr, 512); +} + +static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) +{ + pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n", + run->flags, + run->psw_mask, run->psw_addr, + run->exit_reason, exit_reason_str(run->exit_reason)); + pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n", + sie_block->psw_mask, sie_block->psw_addr); +} + +static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) +{ + print_hex_bytes("run", (u64)run, 0x150); + print_hex("sie_block", (u64)sie_block); + print_psw(run, sie_block); +} + +static inline void print_regs(struct kvm_run *run) +{ + struct kvm_sync_regs *sync_regs = &run->s.regs; + + print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS); + print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS); + print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS); +} + +#endif /* SELFTEST_KVM_DEBUG_PRINT_H */ From c7ff693fa2094ba0a9d0a20feb4ab1658eff9c33 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 22 Jul 2024 11:06:21 +0200 Subject: [PATCH 031/655] module: Split modules_install compression and in-kernel decompression The kernel configuration allows specifying a module compression mode. If one is selected then each module gets compressed during 'make modules_install' and additionally one can also enable support for a respective direct in-kernel decompression support. This means that the decompression support cannot be enabled without the automatic compression. Some distributions, such as the (open)SUSE family, use a signer service for modules. A build runs on a worker machine but signing is done by a separate locked-down server that is in possession of the signing key. The build invokes 'make modules_install' to create a modules tree, collects information about the modules, asks the signer service for their signature, appends each signature to the respective module and compresses all modules. When using this arrangment, the 'make modules_install' step produces unsigned+uncompressed modules and the distribution's own build recipe takes care of signing and compression later. The signing support can be currently enabled without automatically signing modules during 'make modules_install'. However, the in-kernel decompression support can be selected only after first enabling automatic compression during this step. To allow only enabling the in-kernel decompression support without the automatic compression during 'make modules_install', separate the compression options similarly to the signing options, as follows: > Enable loadable module support [*] Module compression Module compression type (GZIP) ---> [*] Automatically compress all modules [ ] Support in-kernel module decompression * "Module compression" (MODULE_COMPRESS) is a new main switch for the compression/decompression support. It replaces MODULE_COMPRESS_NONE. * "Module compression type" (MODULE_COMPRESS_) chooses the compression type, one of GZ, XZ, ZSTD. * "Automatically compress all modules" (MODULE_COMPRESS_ALL) is a new option to enable module compression during 'make modules_install'. It defaults to Y. * "Support in-kernel module decompression" (MODULE_DECOMPRESS) enables in-kernel decompression. Signed-off-by: Petr Pavlu Acked-by: Masahiro Yamada Signed-off-by: Luis Chamberlain --- kernel/module/Kconfig | 61 ++++++++++++++++++++-------------------- scripts/Makefile.modinst | 2 ++ 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index 4047b6d48255..bb7f7930fef6 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -278,64 +278,65 @@ config MODULE_SIG_HASH default "sha3-384" if MODULE_SIG_SHA3_384 default "sha3-512" if MODULE_SIG_SHA3_512 -choice - prompt "Module compression mode" +config MODULE_COMPRESS + bool "Module compression" help - This option allows you to choose the algorithm which will be used to - compress modules when 'make modules_install' is run. (or, you can - choose to not compress modules at all.) - - External modules will also be compressed in the same way during the - installation. - - For modules inside an initrd or initramfs, it's more efficient to - compress the whole initrd or initramfs instead. - + Enable module compression to reduce on-disk size of module binaries. This is fully compatible with signed modules. - Please note that the tool used to load modules needs to support the - corresponding algorithm. module-init-tools MAY support gzip, and kmod - MAY support gzip, xz and zstd. + The tool used to work with modules needs to support the selected + compression type. kmod MAY support gzip, xz and zstd. Other tools + might have a limited selection of the supported types. - Your build system needs to provide the appropriate compression tool - to compress the modules. + Note that for modules inside an initrd or initramfs, it's more + efficient to compress the whole ramdisk instead. - If in doubt, select 'None'. + If unsure, say N. -config MODULE_COMPRESS_NONE - bool "None" +choice + prompt "Module compression type" + depends on MODULE_COMPRESS help - Do not compress modules. The installed modules are suffixed - with .ko. + Choose the supported algorithm for module compression. config MODULE_COMPRESS_GZIP bool "GZIP" help - Compress modules with GZIP. The installed modules are suffixed - with .ko.gz. + Support modules compressed with GZIP. The installed modules are + suffixed with .ko.gz. config MODULE_COMPRESS_XZ bool "XZ" help - Compress modules with XZ. The installed modules are suffixed - with .ko.xz. + Support modules compressed with XZ. The installed modules are + suffixed with .ko.xz. config MODULE_COMPRESS_ZSTD bool "ZSTD" help - Compress modules with ZSTD. The installed modules are suffixed - with .ko.zst. + Support modules compressed with ZSTD. The installed modules are + suffixed with .ko.zst. endchoice +config MODULE_COMPRESS_ALL + bool "Automatically compress all modules" + default y + depends on MODULE_COMPRESS + help + Compress all modules during 'make modules_install'. + + Your build system needs to provide the appropriate compression tool + for the selected compression type. External modules will also be + compressed in the same way during the installation. + config MODULE_DECOMPRESS bool "Support in-kernel module decompression" - depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ || MODULE_COMPRESS_ZSTD + depends on MODULE_COMPRESS select ZLIB_INFLATE if MODULE_COMPRESS_GZIP select XZ_DEC if MODULE_COMPRESS_XZ select ZSTD_DECOMPRESS if MODULE_COMPRESS_ZSTD help - Support for decompressing kernel modules by the kernel itself instead of relying on userspace to perform this task. Useful when load pinning security policy is enabled. diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index 0afd75472679..bce4a9adb893 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -51,9 +51,11 @@ $(foreach x, % :, $(if $(findstring $x, $(dst)), \ $(error module installation path cannot contain '$x'))) suffix-y := +ifdef CONFIG_MODULE_COMPRESS_ALL suffix-$(CONFIG_MODULE_COMPRESS_GZIP) := .gz suffix-$(CONFIG_MODULE_COMPRESS_XZ) := .xz suffix-$(CONFIG_MODULE_COMPRESS_ZSTD) := .zst +endif modules := $(patsubst $(extmod_prefix)%.o, $(dst)/%.ko$(suffix-y), $(modules)) install-$(CONFIG_MODULES) += $(modules) From f94ce04e54038c37bcac8ae2e4e99a81a188b777 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 22 Jul 2024 11:06:22 +0200 Subject: [PATCH 032/655] module: Clean up the description of MODULE_SIG_ The MODULE_SIG_ config choice has an inconsistent prompt styled as a question and lengthy option names. Simplify the prompt and option names to be consistent with other module options. Signed-off-by: Petr Pavlu Signed-off-by: Luis Chamberlain --- kernel/module/Kconfig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index bb7f7930fef6..ccdbd1bc12aa 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -228,7 +228,7 @@ comment "Do not forget to sign required modules with scripts/sign-file" depends on MODULE_SIG_FORCE && !MODULE_SIG_ALL choice - prompt "Which hash algorithm should modules be signed with?" + prompt "Hash algorithm to sign modules" depends on MODULE_SIG || IMA_APPRAISE_MODSIG help This determines which sort of hashing algorithm will be used during @@ -238,31 +238,31 @@ choice the signature on that module. config MODULE_SIG_SHA1 - bool "Sign modules with SHA-1" + bool "SHA-1" select CRYPTO_SHA1 config MODULE_SIG_SHA256 - bool "Sign modules with SHA-256" + bool "SHA-256" select CRYPTO_SHA256 config MODULE_SIG_SHA384 - bool "Sign modules with SHA-384" + bool "SHA-384" select CRYPTO_SHA512 config MODULE_SIG_SHA512 - bool "Sign modules with SHA-512" + bool "SHA-512" select CRYPTO_SHA512 config MODULE_SIG_SHA3_256 - bool "Sign modules with SHA3-256" + bool "SHA3-256" select CRYPTO_SHA3 config MODULE_SIG_SHA3_384 - bool "Sign modules with SHA3-384" + bool "SHA3-384" select CRYPTO_SHA3 config MODULE_SIG_SHA3_512 - bool "Sign modules with SHA3-512" + bool "SHA3-512" select CRYPTO_SHA3 endchoice From c4b3c1332f55c48785e6661cebeb7269a92a45fd Mon Sep 17 00:00:00 2001 From: Liao Chen Date: Tue, 20 Aug 2024 02:20:29 +0000 Subject: [PATCH 033/655] zonefs: add support for FS_IOC_GETFSSYSFSPATH FS_IOC_GETFSSYSFSPATH ioctl expects sysfs sub-path of a filesystem, the format can be "$FSTYP/$SYSFS_IDENTIFIER" under /sys/fs, it can helps to standardizes exporting sysfs datas across filesystems. This patch wires up FS_IOC_GETFSSYSFSPATH for zonefs, it will output "zonefs/". Signed-off-by: Liao Chen Signed-off-by: Damien Le Moal --- fs/zonefs/sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/zonefs/sysfs.c b/fs/zonefs/sysfs.c index 8ccb65c2b419..ff9a688f1f9c 100644 --- a/fs/zonefs/sysfs.c +++ b/fs/zonefs/sysfs.c @@ -92,6 +92,7 @@ int zonefs_sysfs_register(struct super_block *sb) struct zonefs_sb_info *sbi = ZONEFS_SB(sb); int ret; + super_set_sysfs_name_id(sb); init_completion(&sbi->s_kobj_unregister); ret = kobject_init_and_add(&sbi->s_kobj, &zonefs_sb_ktype, zonefs_sysfs_root, "%s", sb->s_id); From 0808ebf2f80b962e75741a41ced372a7116f1e26 Mon Sep 17 00:00:00 2001 From: Ken Raeburn Date: Fri, 26 Jul 2024 15:08:53 -0400 Subject: [PATCH 034/655] dm vdo: don't refer to dedupe_context after releasing it Clear the dedupe_context pointer in a data_vio whenever ownership of the context is lost, so that vdo can't examine it accidentally. Signed-off-by: Ken Raeburn Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/dedupe.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c index 39ac68614419..80628ae93fba 100644 --- a/drivers/md/dm-vdo/dedupe.c +++ b/drivers/md/dm-vdo/dedupe.c @@ -729,6 +729,7 @@ static void process_update_result(struct data_vio *agent) !change_context_state(context, DEDUPE_CONTEXT_COMPLETE, DEDUPE_CONTEXT_IDLE)) return; + agent->dedupe_context = NULL; release_context(context); } @@ -1648,6 +1649,7 @@ static void process_query_result(struct data_vio *agent) if (change_context_state(context, DEDUPE_CONTEXT_COMPLETE, DEDUPE_CONTEXT_IDLE)) { agent->is_duplicate = decode_uds_advice(context); + agent->dedupe_context = NULL; release_context(context); } } @@ -2321,6 +2323,7 @@ static void timeout_index_operations_callback(struct vdo_completion *completion) * send its requestor on its way. */ list_del_init(&context->list_entry); + context->requestor->dedupe_context = NULL; continue_data_vio(context->requestor); timed_out++; } From 3a59b2ec2400017f7fcc5d794802ebc7f4187d22 Mon Sep 17 00:00:00 2001 From: Ken Raeburn Date: Tue, 6 Aug 2024 05:13:19 -0400 Subject: [PATCH 035/655] dm vdo: remove bad check of bi_next field Remove this check to prevent spurious warning messages due to the behavior of other storage layers. This check was intended to make sure dm-vdo does not chain metadata bios together. However, vdo has no control over underlying storage layers, so the assertion is not always true. Signed-off-by: Ken Raeburn Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/io-submitter.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/md/dm-vdo/io-submitter.c b/drivers/md/dm-vdo/io-submitter.c index 9a3716bb3c05..ab62abe18827 100644 --- a/drivers/md/dm-vdo/io-submitter.c +++ b/drivers/md/dm-vdo/io-submitter.c @@ -346,7 +346,6 @@ void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical, VDO_ASSERT_LOG_ONLY(!code->quiescent, "I/O not allowed in state %s", code->name); - VDO_ASSERT_LOG_ONLY(vio->bio->bi_next == NULL, "metadata bio has no next bio"); vdo_reset_completion(completion); completion->error_handler = error_handler; From 47874c98dc0873aad65259d09f9b9029e97429e3 Mon Sep 17 00:00:00 2001 From: Bruce Johnston Date: Thu, 18 Jul 2024 14:02:38 -0400 Subject: [PATCH 036/655] dm vdo: add dmsetup message for returning configuration info Add a new dmsetup message called config, which will return useful configuration information for the vdo volume and the uds index associated with it. The output is a YAML string, and contains a version number to allow future additions to the content. Signed-off-by: Bruce Johnston Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- .../admin-guide/device-mapper/vdo.rst | 7 ++- drivers/md/dm-vdo/dm-vdo-target.c | 5 +- drivers/md/dm-vdo/message-stats.c | 48 +++++++++++++++++++ drivers/md/dm-vdo/message-stats.h | 1 + 4 files changed, 59 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/vdo.rst b/Documentation/admin-guide/device-mapper/vdo.rst index c69ac186863a..a14e6d3e787c 100644 --- a/Documentation/admin-guide/device-mapper/vdo.rst +++ b/Documentation/admin-guide/device-mapper/vdo.rst @@ -251,7 +251,12 @@ The messages are: by the vdostats userspace program to interpret the output buffer. - dump: + config: + Outputs useful vdo configuration information. Mostly used + by users who want to recreate a similar VDO volume and + want to know the creation configuration used. + + dump: Dumps many internal structures to the system log. This is not always safe to run, so it should only be used to debug a hung vdo. Optional parameters to specify structures to diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c index dd05691e4097..4b94f5718ed7 100644 --- a/drivers/md/dm-vdo/dm-vdo-target.c +++ b/drivers/md/dm-vdo/dm-vdo-target.c @@ -1105,6 +1105,9 @@ static int vdo_message(struct dm_target *ti, unsigned int argc, char **argv, if ((argc == 1) && (strcasecmp(argv[0], "stats") == 0)) { vdo_write_stats(vdo, result_buffer, maxlen); result = 1; + } else if ((argc == 1) && (strcasecmp(argv[0], "config") == 0)) { + vdo_write_config(vdo, &result_buffer, &maxlen); + result = 1; } else { result = vdo_status_to_errno(process_vdo_message(vdo, argc, argv)); } @@ -2832,7 +2835,7 @@ static void vdo_resume(struct dm_target *ti) static struct target_type vdo_target_bio = { .features = DM_TARGET_SINGLETON, .name = "vdo", - .version = { 9, 0, 0 }, + .version = { 9, 1, 0 }, .module = THIS_MODULE, .ctr = vdo_ctr, .dtr = vdo_dtr, diff --git a/drivers/md/dm-vdo/message-stats.c b/drivers/md/dm-vdo/message-stats.c index 2802cf92922b..75dfcd7c5f63 100644 --- a/drivers/md/dm-vdo/message-stats.c +++ b/drivers/md/dm-vdo/message-stats.c @@ -4,6 +4,7 @@ */ #include "dedupe.h" +#include "indexer.h" #include "logger.h" #include "memory-alloc.h" #include "message-stats.h" @@ -430,3 +431,50 @@ int vdo_write_stats(struct vdo *vdo, char *buf, unsigned int maxlen) vdo_free(stats); return VDO_SUCCESS; } + +static void write_index_memory(u32 mem, char **buf, unsigned int *maxlen) +{ + char *prefix = "memorySize : "; + + /* Convert index memory to fractional value */ + if (mem == (u32)UDS_MEMORY_CONFIG_256MB) + write_string(prefix, "0.25, ", NULL, buf, maxlen); + else if (mem == (u32)UDS_MEMORY_CONFIG_512MB) + write_string(prefix, "0.50, ", NULL, buf, maxlen); + else if (mem == (u32)UDS_MEMORY_CONFIG_768MB) + write_string(prefix, "0.75, ", NULL, buf, maxlen); + else + write_u32(prefix, mem, ", ", buf, maxlen); +} + +static void write_index_config(struct index_config *config, char **buf, + unsigned int *maxlen) +{ + write_string("index : ", "{ ", NULL, buf, maxlen); + /* index mem size */ + write_index_memory(config->mem, buf, maxlen); + /* whether the index is sparse or not */ + write_bool("isSparse : ", config->sparse, ", ", buf, maxlen); + write_string(NULL, "}", ", ", buf, maxlen); +} + +int vdo_write_config(struct vdo *vdo, char **buf, unsigned int *maxlen) +{ + struct vdo_config *config = &vdo->states.vdo.config; + + write_string(NULL, "{ ", NULL, buf, maxlen); + /* version */ + write_u32("version : ", 1, ", ", buf, maxlen); + /* physical size */ + write_block_count_t("physicalSize : ", config->physical_blocks * VDO_BLOCK_SIZE, ", ", + buf, maxlen); + /* logical size */ + write_block_count_t("logicalSize : ", config->logical_blocks * VDO_BLOCK_SIZE, ", ", + buf, maxlen); + /* slab size */ + write_block_count_t("slabSize : ", config->slab_size, ", ", buf, maxlen); + /* index config */ + write_index_config(&vdo->geometry.index_config, buf, maxlen); + write_string(NULL, "}", NULL, buf, maxlen); + return VDO_SUCCESS; +} diff --git a/drivers/md/dm-vdo/message-stats.h b/drivers/md/dm-vdo/message-stats.h index f7fceca9acab..f9c95eff569d 100644 --- a/drivers/md/dm-vdo/message-stats.h +++ b/drivers/md/dm-vdo/message-stats.h @@ -8,6 +8,7 @@ #include "types.h" +int vdo_write_config(struct vdo *vdo, char **buf, unsigned int *maxlen); int vdo_write_stats(struct vdo *vdo, char *buf, unsigned int maxlen); #endif /* VDO_MESSAGE_STATS_H */ From f3ff668352c59db2c0ab47f2e86488a70694ed57 Mon Sep 17 00:00:00 2001 From: Susan LeGendre-McGhee Date: Fri, 19 Jul 2024 17:52:32 -0400 Subject: [PATCH 037/655] dm vdo: abort loading dirty VDO with the old recovery journal format Abort the load process with status code VDO_UNSUPPORTED_VERSION without forcing read-only mode when a journal block with the old format version is detected. Forcing the VDO volume into read-only mode and thus requiring a read-only rebuild should only be done when absolutely necessary. Signed-off-by: Susan LeGendre-McGhee Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/dm-vdo-target.c | 24 +++++++++++++++++++++++- drivers/md/dm-vdo/repair.c | 4 +--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c index 4b94f5718ed7..0e04c2021682 100644 --- a/drivers/md/dm-vdo/dm-vdo-target.c +++ b/drivers/md/dm-vdo/dm-vdo-target.c @@ -2296,6 +2296,14 @@ static void handle_load_error(struct vdo_completion *completion) return; } + if ((completion->result == VDO_UNSUPPORTED_VERSION) && + (vdo->admin.phase == LOAD_PHASE_MAKE_DIRTY)) { + vdo_log_error("Aborting load due to unsupported version"); + vdo->admin.phase = LOAD_PHASE_FINISHED; + load_callback(completion); + return; + } + vdo_log_error_strerror(completion->result, "Entering read-only mode due to load error"); vdo->admin.phase = LOAD_PHASE_WAIT_FOR_READ_ONLY; @@ -2740,6 +2748,19 @@ static int vdo_preresume_registered(struct dm_target *ti, struct vdo *vdo) vdo_log_info("starting device '%s'", device_name); result = perform_admin_operation(vdo, LOAD_PHASE_START, load_callback, handle_load_error, "load"); + if (result == VDO_UNSUPPORTED_VERSION) { + /* + * A component version is not supported. This can happen when the + * recovery journal metadata is in an old version format. Abort the + * load without saving the state. + */ + vdo->suspend_type = VDO_ADMIN_STATE_SUSPENDING; + perform_admin_operation(vdo, SUSPEND_PHASE_START, + suspend_callback, suspend_callback, + "suspend"); + return result; + } + if ((result != VDO_SUCCESS) && (result != VDO_READ_ONLY)) { /* * Something has gone very wrong. Make sure everything has drained and @@ -2811,7 +2832,8 @@ static int vdo_preresume(struct dm_target *ti) vdo_register_thread_device_id(&instance_thread, &vdo->instance); result = vdo_preresume_registered(ti, vdo); - if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE)) + if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE) || + (result == VDO_UNSUPPORTED_VERSION)) result = -EINVAL; vdo_unregister_thread_device_id(); return vdo_status_to_errno(result); diff --git a/drivers/md/dm-vdo/repair.c b/drivers/md/dm-vdo/repair.c index 7e0009d2f67d..8aae78bb8a05 100644 --- a/drivers/md/dm-vdo/repair.c +++ b/drivers/md/dm-vdo/repair.c @@ -1575,9 +1575,7 @@ static int parse_journal_for_recovery(struct repair_completion *repair) if (header.metadata_type == VDO_METADATA_RECOVERY_JOURNAL) { /* This is an old format block, so we need to upgrade */ vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION, - "Recovery journal is in the old format, a read-only rebuild is required."); - vdo_enter_read_only_mode(repair->completion.vdo, - VDO_UNSUPPORTED_VERSION); + "Recovery journal is in the old format. Downgrade and complete recovery, then upgrade with a clean volume"); return VDO_UNSUPPORTED_VERSION; } From 448c4e4eb1901543259c038ed08266c250f5b079 Mon Sep 17 00:00:00 2001 From: Susan LeGendre-McGhee Date: Tue, 30 Jul 2024 17:18:07 -0400 Subject: [PATCH 038/655] dm vdo: force read-only mode for a corrupt recovery journal Ensure the recovery journal does not attempt recovery when blocks with mismatched metadata versions are detected. This check is performed after determining that the blocks are otherwise valid so that it does not interfere with normal recovery. Signed-off-by: Susan LeGendre-McGhee Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/repair.c | 39 ++++++++++++++++++-------------- drivers/md/dm-vdo/status-codes.c | 2 +- drivers/md/dm-vdo/status-codes.h | 2 +- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm-vdo/repair.c b/drivers/md/dm-vdo/repair.c index 8aae78bb8a05..ffff2c999518 100644 --- a/drivers/md/dm-vdo/repair.c +++ b/drivers/md/dm-vdo/repair.c @@ -1202,17 +1202,14 @@ static bool __must_check is_valid_recovery_journal_block(const struct recovery_j * @journal: The journal to use. * @header: The unpacked block header to check. * @sequence: The expected sequence number. - * @type: The expected metadata type. * * Return: True if the block matches. */ static bool __must_check is_exact_recovery_journal_block(const struct recovery_journal *journal, const struct recovery_block_header *header, - sequence_number_t sequence, - enum vdo_metadata_type type) + sequence_number_t sequence) { - return ((header->metadata_type == type) && - (header->sequence_number == sequence) && + return ((header->sequence_number == sequence) && (is_valid_recovery_journal_block(journal, header, true))); } @@ -1371,7 +1368,8 @@ static void extract_entries_from_block(struct repair_completion *repair, get_recovery_journal_block_header(journal, repair->journal_data, sequence); - if (!is_exact_recovery_journal_block(journal, &header, sequence, format)) { + if (!is_exact_recovery_journal_block(journal, &header, sequence) || + (header.metadata_type != format)) { /* This block is invalid, so skip it. */ return; } @@ -1557,10 +1555,13 @@ static int parse_journal_for_recovery(struct repair_completion *repair) sequence_number_t i, head; bool found_entries = false; struct recovery_journal *journal = repair->completion.vdo->recovery_journal; + struct recovery_block_header header; + enum vdo_metadata_type expected_format; head = min(repair->block_map_head, repair->slab_journal_head); + header = get_recovery_journal_block_header(journal, repair->journal_data, head); + expected_format = header.metadata_type; for (i = head; i <= repair->highest_tail; i++) { - struct recovery_block_header header; journal_entry_count_t block_entries; u8 j; @@ -1572,17 +1573,15 @@ static int parse_journal_for_recovery(struct repair_completion *repair) }; header = get_recovery_journal_block_header(journal, repair->journal_data, i); - if (header.metadata_type == VDO_METADATA_RECOVERY_JOURNAL) { - /* This is an old format block, so we need to upgrade */ - vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION, - "Recovery journal is in the old format. Downgrade and complete recovery, then upgrade with a clean volume"); - return VDO_UNSUPPORTED_VERSION; - } - - if (!is_exact_recovery_journal_block(journal, &header, i, - VDO_METADATA_RECOVERY_JOURNAL_2)) { + if (!is_exact_recovery_journal_block(journal, &header, i)) { /* A bad block header was found so this must be the end of the journal. */ break; + } else if (header.metadata_type != expected_format) { + /* There is a mix of old and new format blocks, so we need to rebuild. */ + vdo_log_error_strerror(VDO_CORRUPT_JOURNAL, + "Recovery journal is in an invalid format, a read-only rebuild is required."); + vdo_enter_read_only_mode(repair->completion.vdo, VDO_CORRUPT_JOURNAL); + return VDO_CORRUPT_JOURNAL; } block_entries = header.entry_count; @@ -1618,8 +1617,14 @@ static int parse_journal_for_recovery(struct repair_completion *repair) break; } - if (!found_entries) + if (!found_entries) { return validate_heads(repair); + } else if (expected_format == VDO_METADATA_RECOVERY_JOURNAL) { + /* All journal blocks have the old format, so we need to upgrade. */ + vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION, + "Recovery journal is in the old format. Downgrade and complete recovery, then upgrade with a clean volume"); + return VDO_UNSUPPORTED_VERSION; + } /* Set the tail to the last valid tail block, if there is one. */ if (repair->tail_recovery_point.sector_count == 0) diff --git a/drivers/md/dm-vdo/status-codes.c b/drivers/md/dm-vdo/status-codes.c index d3493450b169..dd252d660b6d 100644 --- a/drivers/md/dm-vdo/status-codes.c +++ b/drivers/md/dm-vdo/status-codes.c @@ -28,7 +28,7 @@ const struct error_info vdo_status_list[] = { { "VDO_LOCK_ERROR", "A lock is held incorrectly" }, { "VDO_READ_ONLY", "The device is in read-only mode" }, { "VDO_SHUTTING_DOWN", "The device is shutting down" }, - { "VDO_CORRUPT_JOURNAL", "Recovery journal entries corrupted" }, + { "VDO_CORRUPT_JOURNAL", "Recovery journal corrupted" }, { "VDO_TOO_MANY_SLABS", "Exceeds maximum number of slabs supported" }, { "VDO_INVALID_FRAGMENT", "Compressed block fragment is invalid" }, { "VDO_RETRY_AFTER_REBUILD", "Retry operation after rebuilding finishes" }, diff --git a/drivers/md/dm-vdo/status-codes.h b/drivers/md/dm-vdo/status-codes.h index 72da04159f88..426dc8e2ca5d 100644 --- a/drivers/md/dm-vdo/status-codes.h +++ b/drivers/md/dm-vdo/status-codes.h @@ -52,7 +52,7 @@ enum vdo_status_codes { VDO_READ_ONLY, /* the VDO is shutting down */ VDO_SHUTTING_DOWN, - /* the recovery journal has corrupt entries */ + /* the recovery journal has corrupt entries or corrupt metadata */ VDO_CORRUPT_JOURNAL, /* exceeds maximum number of slabs supported */ VDO_TOO_MANY_SLABS, From 013f510d1fce563f37f420bf231b065885a16f21 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 1 Aug 2024 21:31:21 +0800 Subject: [PATCH 039/655] dm: Remove unused declaration dm_get_rq_mapinfo() Commit ae6ad75e5c3c ("dm: remove unused dm_get_rq_mapinfo()") removed the implementation but leave declaration. Signed-off-by: Yue Haibing Reviewed-by: Damien Le Moal Signed-off-by: Mikulas Patocka --- include/linux/device-mapper.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 53ca3a913d06..8321f65897f3 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -524,7 +524,6 @@ int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors); void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone); -union map_info *dm_get_rq_mapinfo(struct request *rq); #ifdef CONFIG_BLK_DEV_ZONED struct dm_report_zones_args { From 5d3691a8266e77fd2d695064532d0926af974331 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 8 Aug 2024 13:50:36 +0200 Subject: [PATCH 040/655] dm delay: enhance kernel documentation This commit improves documentation of the dm-delay target. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mikulas Patocka --- .../admin-guide/device-mapper/delay.rst | 43 ++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/delay.rst b/Documentation/admin-guide/device-mapper/delay.rst index 917ba8c33359..4d667228e744 100644 --- a/Documentation/admin-guide/device-mapper/delay.rst +++ b/Documentation/admin-guide/device-mapper/delay.rst @@ -3,29 +3,52 @@ dm-delay ======== Device-Mapper's "delay" target delays reads and/or writes -and maps them to different devices. +and/or flushs and optionally maps them to different devices. -Parameters:: +Arguments:: [ [ ]] -With separate write parameters, the first set is only used for reads. +Table line has to either have 3, 6 or 9 arguments: + +3: apply offset and delay to read, write and flush operations on device + +6: apply offset and delay to device, also apply write_offset and write_delay + to write and flush operations on optionally different write_device with + optionally different sector offset + +9: same as 6 arguments plus define flush_offset and flush_delay explicitely + on/with optionally different flush_device/flush_offset. + Offsets are specified in sectors. + Delays are specified in milliseconds. + Example scripts =============== :: - #!/bin/sh - # Create device delaying rw operation for 500ms - echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed + # + # Create mapped device named "delayed" delaying read, write and flush operations for 500ms. + # + dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 500" :: - #!/bin/sh - # Create device delaying only write operation for 500ms and - # splitting reads and writes to different devices $1 $2 - echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed + # + # Create mapped device delaying write and flush operations for 400ms and + # splitting reads to device $1 but writes and flushs to different device $2 + # to different offsets of 2048 and 4096 sectors respectively. + # + dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 2048 0 $2 4096 400" + +:: + #!/bin/sh + # + # Create mapped device delaying reads for 50ms, writes for 100ms and flushs for 333ms + # onto the same backing device at offset 0 sectors. + # + dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 50 $2 0 100 $1 0 333" From f3631ae11d4694e2befff9dd10dab8cd56033f6c Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Mon, 12 Aug 2024 19:53:09 +0800 Subject: [PATCH 041/655] dm: Remove unused declaration and empty definition "dm_zone_map_bio" dm_zone_map_bio() has beed removed since commit f211268ed1f9 ("dm: Use the block layer zone append emulation"), remain the declaration unused in header files. So, let's remove this unused declaration and empty definition. Signed-off-by: Zhang Zekun Reviewed-by: Damien Le Moal Signed-off-by: Mikulas Patocka --- drivers/md/dm.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index cc466ad5cb1d..8ad782249af8 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -109,7 +109,6 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone); int dm_blk_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); bool dm_is_zone_write(struct mapped_device *md, struct bio *bio); -int dm_zone_map_bio(struct dm_target_io *io); int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t, sector_t sector, unsigned int nr_zones, unsigned long *need_reset); @@ -119,10 +118,6 @@ static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) { return false; } -static inline int dm_zone_map_bio(struct dm_target_io *tio) -{ - return DM_MAPIO_KILL; -} #endif /* From 4441686b24a1d7acf9834ca95864d67e3f97666a Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Fri, 16 Aug 2024 13:21:33 +0200 Subject: [PATCH 042/655] dm-crypt: Allow to specify the integrity key size as option For the MAC based integrity operation, the integrity key size (i.e. key_mac_size) is currently set to the digest size of the used digest. For wrapped key HMAC algorithms, the key size is independent of the cryptographic key size. So there is no known size of the mac key in such cases. The desired key size can optionally be specified as argument when the dm-crypt device is configured via 'integrity_key_size:%u'. If no integrity_key_size argument is specified, the mac key size is still set to the digest size, as before. Increase version number to 1.28.0 so that support for the new argument can be detected by user space (i.e. cryptsetup). Signed-off-by: Ingo Franzki Reviewed-by: Milan Broz Signed-off-by: Mikulas Patocka --- .../admin-guide/device-mapper/dm-crypt.rst | 4 ++++ drivers/md/dm-crypt.c | 19 +++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst index 552c9155165d..cde1d5594b0d 100644 --- a/Documentation/admin-guide/device-mapper/dm-crypt.rst +++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst @@ -160,6 +160,10 @@ iv_large_sectors The must be multiple of (in 512 bytes units) if this flag is specified. +integrity_key_size: + Use an integrity key of size instead of using an integrity key size + of the digest size of the used HMAC algorithm. + Module parameters:: diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 348b4b26c272..d5533b43054e 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -147,6 +147,7 @@ enum cipher_flags { CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cipher */ CRYPT_IV_LARGE_SECTORS, /* Calculate IV from sector_size, not 512B sectors */ CRYPT_ENCRYPT_PREPROCESS, /* Must preprocess data for encryption (elephant) */ + CRYPT_KEY_MAC_SIZE_SET, /* The integrity_key_size option was used */ }; /* @@ -2937,7 +2938,8 @@ static int crypt_ctr_auth_cipher(struct crypt_config *cc, char *cipher_api) if (IS_ERR(mac)) return PTR_ERR(mac); - cc->key_mac_size = crypto_ahash_digestsize(mac); + if (!test_bit(CRYPT_KEY_MAC_SIZE_SET, &cc->cipher_flags)) + cc->key_mac_size = crypto_ahash_digestsize(mac); crypto_free_ahash(mac); cc->authenc_key = kmalloc(crypt_authenckey_size(cc), GFP_KERNEL); @@ -3219,6 +3221,13 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar cc->cipher_auth = kstrdup(sval, GFP_KERNEL); if (!cc->cipher_auth) return -ENOMEM; + } else if (sscanf(opt_string, "integrity_key_size:%u%c", &val, &dummy) == 1) { + if (!val) { + ti->error = "Invalid integrity_key_size argument"; + return -EINVAL; + } + cc->key_mac_size = val; + set_bit(CRYPT_KEY_MAC_SIZE_SET, &cc->cipher_flags); } else if (sscanf(opt_string, "sector_size:%hu%c", &cc->sector_size, &dummy) == 1) { if (cc->sector_size < (1 << SECTOR_SHIFT) || cc->sector_size > 4096 || @@ -3607,10 +3616,10 @@ static void crypt_status(struct dm_target *ti, status_type_t type, num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags); num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); + num_feature_args += !!cc->used_tag_size; num_feature_args += cc->sector_size != (1 << SECTOR_SHIFT); num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags); - if (cc->used_tag_size) - num_feature_args++; + num_feature_args += test_bit(CRYPT_KEY_MAC_SIZE_SET, &cc->cipher_flags); if (num_feature_args) { DMEMIT(" %d", num_feature_args); if (ti->num_discard_bios) @@ -3631,6 +3640,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" sector_size:%d", cc->sector_size); if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags)) DMEMIT(" iv_large_sectors"); + if (test_bit(CRYPT_KEY_MAC_SIZE_SET, &cc->cipher_flags)) + DMEMIT(" integrity_key_size:%u", cc->key_mac_size); } break; @@ -3758,7 +3769,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 27, 0}, + .version = {1, 28, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, From d11f2a1ab8f6dcfda827f8f0c47d8483b8669165 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Aug 2024 18:48:19 +0300 Subject: [PATCH 043/655] driver core: Sort headers Sort the headers in alphabetic order in order to ease the maintenance for this part. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240821154839.604259-2-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 6d3897492285..54f10a8325f8 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -9,29 +9,29 @@ */ #include +#include #include #include +#include /* for dma_default_coherent */ #include #include #include +#include #include #include -#include -#include +#include +#include #include #include #include -#include -#include #include -#include #include -#include #include +#include +#include #include #include #include -#include /* for dma_default_coherent */ #include "base.h" #include "physical_location.h" From a355a4655ec660fc68b60b909776290cb88e1124 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Aug 2024 18:48:20 +0300 Subject: [PATCH 044/655] driver core: Use kasprintf() instead of fixed buffer formatting Improve readability and maintainability by replacing a hardcoded string allocation and formatting by the use of the kasprintf() helper. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240821154839.604259-3-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 70 +++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 38 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 54f10a8325f8..67f3da47f63c 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include /* for dma_default_coherent */ @@ -563,24 +564,11 @@ static struct class devlink_class = { static int devlink_add_symlinks(struct device *dev) { + char *buf_con __free(kfree) = NULL, *buf_sup __free(kfree) = NULL; int ret; - size_t len; struct device_link *link = to_devlink(dev); struct device *sup = link->supplier; struct device *con = link->consumer; - char *buf; - - len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)), - strlen(dev_bus_name(con)) + strlen(dev_name(con))); - len += strlen(":"); - /* - * we kzalloc() memory for symlink name of both supplier and - * consumer, so explicitly take into account both prefix. - */ - len += max(strlen("supplier:"), strlen("consumer:")) + 1; - buf = kzalloc(len, GFP_KERNEL); - if (!buf) - return -ENOMEM; ret = sysfs_create_link(&link->link_dev.kobj, &sup->kobj, "supplier"); if (ret) @@ -590,58 +578,64 @@ static int devlink_add_symlinks(struct device *dev) if (ret) goto err_con; - snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); - ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf); + buf_con = kasprintf(GFP_KERNEL, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); + if (!buf_con) { + ret = -ENOMEM; + goto err_con_dev; + } + + ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf_con); if (ret) goto err_con_dev; - snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); - ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf); + buf_sup = kasprintf(GFP_KERNEL, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); + if (!buf_sup) { + ret = -ENOMEM; + goto err_sup_dev; + } + + ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf_sup); if (ret) goto err_sup_dev; goto out; err_sup_dev: - snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); - sysfs_remove_link(&sup->kobj, buf); + sysfs_remove_link(&sup->kobj, buf_con); err_con_dev: sysfs_remove_link(&link->link_dev.kobj, "consumer"); err_con: sysfs_remove_link(&link->link_dev.kobj, "supplier"); out: - kfree(buf); return ret; } static void devlink_remove_symlinks(struct device *dev) { + char *buf_con __free(kfree) = NULL, *buf_sup __free(kfree) = NULL; struct device_link *link = to_devlink(dev); - size_t len; struct device *sup = link->supplier; struct device *con = link->consumer; - char *buf; sysfs_remove_link(&link->link_dev.kobj, "consumer"); sysfs_remove_link(&link->link_dev.kobj, "supplier"); - len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)), - strlen(dev_bus_name(con)) + strlen(dev_name(con))); - len += strlen(":"); - len += max(strlen("supplier:"), strlen("consumer:")) + 1; - buf = kzalloc(len, GFP_KERNEL); - if (!buf) { - WARN(1, "Unable to properly free device link symlinks!\n"); - return; + if (device_is_registered(con)) { + buf_sup = kasprintf(GFP_KERNEL, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); + if (!buf_sup) + goto out; + sysfs_remove_link(&con->kobj, buf_sup); } - if (device_is_registered(con)) { - snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); - sysfs_remove_link(&con->kobj, buf); - } - snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); - sysfs_remove_link(&sup->kobj, buf); - kfree(buf); + buf_con = kasprintf(GFP_KERNEL, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); + if (!buf_con) + goto out; + sysfs_remove_link(&sup->kobj, buf_con); + + return; + +out: + WARN(1, "Unable to properly free device link symlinks!\n"); } static struct class_interface devlink_class_intf = { From adcae2048df15ae9647d792b09f8742d6ebe459b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Aug 2024 18:48:21 +0300 Subject: [PATCH 045/655] driver core: Use guards for simple mutex locks Guards can help to make the code more readable. So use it wherever they do so. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240821154839.604259-4-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 50 ++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 67f3da47f63c..271a88a6311e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -98,12 +98,9 @@ static int __fwnode_link_add(struct fwnode_handle *con, int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup, u8 flags) { - int ret; + guard(mutex)(&fwnode_link_lock); - mutex_lock(&fwnode_link_lock); - ret = __fwnode_link_add(con, sup, flags); - mutex_unlock(&fwnode_link_lock); - return ret; + return __fwnode_link_add(con, sup, flags); } /** @@ -144,10 +141,10 @@ static void fwnode_links_purge_suppliers(struct fwnode_handle *fwnode) { struct fwnode_link *link, *tmp; - mutex_lock(&fwnode_link_lock); + guard(mutex)(&fwnode_link_lock); + list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook) __fwnode_link_del(link); - mutex_unlock(&fwnode_link_lock); } /** @@ -160,10 +157,10 @@ static void fwnode_links_purge_consumers(struct fwnode_handle *fwnode) { struct fwnode_link *link, *tmp; - mutex_lock(&fwnode_link_lock); + guard(mutex)(&fwnode_link_lock); + list_for_each_entry_safe(link, tmp, &fwnode->consumers, s_hook) __fwnode_link_del(link); - mutex_unlock(&fwnode_link_lock); } /** @@ -1059,20 +1056,16 @@ int device_links_check_suppliers(struct device *dev) * Device waiting for supplier to become available is not allowed to * probe. */ - mutex_lock(&fwnode_link_lock); - sup_fw = fwnode_links_check_suppliers(dev->fwnode); - if (sup_fw) { - if (!dev_is_best_effort(dev)) { - fwnode_ret = -EPROBE_DEFER; - dev_err_probe(dev, -EPROBE_DEFER, - "wait for supplier %pfwf\n", sup_fw); - } else { - fwnode_ret = -EAGAIN; + scoped_guard(mutex, &fwnode_link_lock) { + sup_fw = fwnode_links_check_suppliers(dev->fwnode); + if (sup_fw) { + if (dev_is_best_effort(dev)) + fwnode_ret = -EAGAIN; + else + return dev_err_probe(dev, -EPROBE_DEFER, + "wait for supplier %pfwf\n", sup_fw); } } - mutex_unlock(&fwnode_link_lock); - if (fwnode_ret == -EPROBE_DEFER) - return fwnode_ret; device_links_write_lock(); @@ -1247,9 +1240,8 @@ static ssize_t waiting_for_supplier_show(struct device *dev, bool val; device_lock(dev); - mutex_lock(&fwnode_link_lock); - val = !!fwnode_links_check_suppliers(dev->fwnode); - mutex_unlock(&fwnode_link_lock); + scoped_guard(mutex, &fwnode_link_lock) + val = !!fwnode_links_check_suppliers(dev->fwnode); device_unlock(dev); return sysfs_emit(buf, "%u\n", val); } @@ -1322,13 +1314,15 @@ void device_links_driver_bound(struct device *dev) */ if (dev->fwnode && dev->fwnode->dev == dev) { struct fwnode_handle *child; + fwnode_links_purge_suppliers(dev->fwnode); - mutex_lock(&fwnode_link_lock); + + guard(mutex)(&fwnode_link_lock); + fwnode_for_each_available_child_node(dev->fwnode, child) __fw_devlink_pickup_dangling_consumers(child, dev->fwnode); __fw_devlink_link_to_consumers(dev); - mutex_unlock(&fwnode_link_lock); } device_remove_file(dev, &dev_attr_waiting_for_supplier); @@ -2337,10 +2331,10 @@ static void fw_devlink_link_device(struct device *dev) fw_devlink_parse_fwtree(fwnode); - mutex_lock(&fwnode_link_lock); + guard(mutex)(&fwnode_link_lock); + __fw_devlink_link_to_consumers(dev); __fw_devlink_link_to_suppliers(dev, fwnode); - mutex_unlock(&fwnode_link_lock); } /* Device links support end. */ From d1363030d824b244744399ddd85a52cf615dee4c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Aug 2024 18:48:22 +0300 Subject: [PATCH 046/655] driver core: Make use of returned value of dev_err_probe() Instead of assigning ret explicitly to the same value that is supplied to dev_err_probe(), make use of returned value of the latter. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240821154839.604259-5-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 271a88a6311e..980c08901cd0 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1084,10 +1084,8 @@ int device_links_check_suppliers(struct device *dev) } device_links_missing_supplier(dev); - dev_err_probe(dev, -EPROBE_DEFER, - "supplier %s not ready\n", - dev_name(link->supplier)); - ret = -EPROBE_DEFER; + ret = dev_err_probe(dev, -EPROBE_DEFER, + "supplier %s not ready\n", dev_name(link->supplier)); break; } WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE); From 888f67e621dda5c2804a696524e28d0ca4cf0a80 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Aug 2024 18:48:23 +0300 Subject: [PATCH 047/655] driver core: Use 2-argument strscpy() Use 2-argument strscpy(), which is not only shorter but also provides an additional check that destination buffer is an array. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240821154839.604259-6-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 980c08901cd0..4bc8b88d697e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4867,7 +4867,7 @@ set_dev_info(const struct device *dev, struct dev_printk_info *dev_info) else return; - strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem)); + strscpy(dev_info->subsystem, subsys); /* * Add device identifier DEVICE=: From 02c0207ecdcc9abdf3442676154f8fd2be0f649a Mon Sep 17 00:00:00 2001 From: Yuesong Li Date: Thu, 22 Aug 2024 10:14:00 +0800 Subject: [PATCH 048/655] dm bufio: Remove NULL check of list_entry() list_entry() will never return a NULL pointer, thus remove the check. Signed-off-by: Yuesong Li Signed-off-by: Mikulas Patocka --- drivers/md/dm-bufio.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 098bf526136c..d478aafa02c9 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -529,9 +529,6 @@ static struct dm_buffer *list_to_buffer(struct list_head *l) { struct lru_entry *le = list_entry(l, struct lru_entry, list); - if (!le) - return NULL; - return le_to_buffer(le); } From 1448d4a935abb89942d0af7864480db349433079 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 14 Aug 2024 22:33:46 +0200 Subject: [PATCH 049/655] KVM: x86: Optimize local variable in start_sw_tscdeadline() Change the data type of the local variable this_tsc_khz to u32 because virtual_tsc_khz is also declared as u32. Since do_div() casts the divisor to u32 anyway, changing the data type of this_tsc_khz to u32 also removes the following Coccinelle/coccicheck warning reported by do_div.cocci: WARNING: do_div() does a 64-by-32 division, please consider using div64_ul instead Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20240814203345.2234-2-thorsten.blum@toblux.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5bb481aefcbc..d77cd3b87e85 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1944,7 +1944,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) u64 ns = 0; ktime_t expire; struct kvm_vcpu *vcpu = apic->vcpu; - unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; + u32 this_tsc_khz = vcpu->arch.virtual_tsc_khz; unsigned long flags; ktime_t now; From 1c450ffef589383f743d798666703687fc2e582b Mon Sep 17 00:00:00 2001 From: Tao Su Date: Mon, 19 Aug 2024 14:23:27 +0800 Subject: [PATCH 050/655] KVM: x86: Advertise AVX10.1 CPUID to userspace Advertise AVX10.1 related CPUIDs, i.e. report AVX10 support bit via CPUID.(EAX=07H, ECX=01H):EDX[bit 19] and new CPUID leaf 0x24H so that guest OS and applications can query the AVX10.1 CPUIDs directly. Intel AVX10 represents the first major new vector ISA since the introduction of Intel AVX512, which will establish a common, converged vector instruction set across all Intel architectures[1]. AVX10.1 is an early version of AVX10, that enumerates the Intel AVX512 instruction set at 128, 256, and 512 bits which is enabled on Granite Rapids. I.e., AVX10.1 is only a new CPUID enumeration with no new functionality. New features, e.g. Embedded Rounding and Suppress All Exceptions (SAE) will be introduced in AVX10.2. Advertising AVX10.1 is safe because there is nothing to enable for AVX10.1, i.e. it's purely a new way to enumerate support, thus there will never be anything for the kernel to enable. Note just the CPUID checking is changed when using AVX512 related instructions, e.g. if using one AVX512 instruction needs to check (AVX512 AND AVX512DQ), it can check ((AVX512 AND AVX512DQ) OR AVX10.1) after checking XCR0[7:5]. The versions of AVX10 are expected to be inclusive, e.g. version N+1 is a superset of version N. Per the spec, the version can never be 0, just advertise AVX10.1 if it's supported in hardware. Moreover, advertising AVX10_{128,256,512} needs to land in the same commit as advertising basic AVX10.1 support, otherwise KVM would advertise an impossible CPU model. E.g. a CPU with AVX512 but not AVX10.1/512 is impossible per the SDM. As more and more AVX related CPUIDs are added (it would have resulted in around 40-50 CPUID flags when developing AVX10), the versioning approach is introduced. But incrementing version numbers are bad for virtualization. E.g. if AVX10.2 has a feature that shouldn't be enumerated to guests for whatever reason, then KVM can't enumerate any "later" features either, because the only way to hide the problematic AVX10.2 feature is to set the version to AVX10.1 or lower[2]. But most AVX features are just passed through and don't have virtualization controls, so AVX10 should not be problematic in practice, so long as Intel honors their promise that future versions will be supersets of past versions. [1] https://cdrdv2.intel.com/v1/dl/getContent/784267 [2] https://lore.kernel.org/all/Zkz5Ak0PQlAN8DxK@google.com/ Suggested-by: Sean Christopherson Signed-off-by: Tao Su Link: https://lore.kernel.org/r/20240819062327.3269720-1-tao1.su@linux.intel.com [sean: minor changelog tweaks] Signed-off-by: Sean Christopherson --- arch/x86/include/asm/cpuid.h | 1 + arch/x86/kvm/cpuid.c | 30 ++++++++++++++++++++++++++++-- arch/x86/kvm/reverse_cpuid.h | 8 ++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h index 6b122a31da06..aa21c105eef1 100644 --- a/arch/x86/include/asm/cpuid.h +++ b/arch/x86/include/asm/cpuid.h @@ -179,6 +179,7 @@ static __always_inline bool cpuid_function_is_indexed(u32 function) case 0x1d: case 0x1e: case 0x1f: + case 0x24: case 0x8000001d: return true; } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2617be544480..41786b834b16 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -705,7 +705,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) | - F(AMX_COMPLEX) + F(AMX_COMPLEX) | F(AVX10) ); kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, @@ -721,6 +721,10 @@ void kvm_set_cpu_caps(void) SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) ); + kvm_cpu_cap_init_kvm_defined(CPUID_24_0_EBX, + F(AVX10_128) | F(AVX10_256) | F(AVX10_512) + ); + kvm_cpu_cap_mask(CPUID_8000_0001_ECX, F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | @@ -949,7 +953,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) switch (function) { case 0: /* Limited to the highest leaf implemented in KVM. */ - entry->eax = min(entry->eax, 0x1fU); + entry->eax = min(entry->eax, 0x24U); break; case 1: cpuid_entry_override(entry, CPUID_1_EDX); @@ -1174,6 +1178,28 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) break; } break; + case 0x24: { + u8 avx10_version; + + if (!kvm_cpu_cap_has(X86_FEATURE_AVX10)) { + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + break; + } + + /* + * The AVX10 version is encoded in EBX[7:0]. Note, the version + * is guaranteed to be >=1 if AVX10 is supported. Note #2, the + * version needs to be captured before overriding EBX features! + */ + avx10_version = min_t(u8, entry->ebx & 0xff, 1); + cpuid_entry_override(entry, CPUID_24_0_EBX); + entry->ebx |= avx10_version; + + entry->eax = 0; + entry->ecx = 0; + entry->edx = 0; + break; + } case KVM_CPUID_SIGNATURE: { const u32 *sigptr = (const u32 *)KVM_SIGNATURE; entry->eax = KVM_CPUID_FEATURES; diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 2f4e155080ba..0d17d6b70639 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -17,6 +17,7 @@ enum kvm_only_cpuid_leafs { CPUID_8000_0007_EDX, CPUID_8000_0022_EAX, CPUID_7_2_EDX, + CPUID_24_0_EBX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -46,6 +47,7 @@ enum kvm_only_cpuid_leafs { #define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5) #define X86_FEATURE_AMX_COMPLEX KVM_X86_FEATURE(CPUID_7_1_EDX, 8) #define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14) +#define X86_FEATURE_AVX10 KVM_X86_FEATURE(CPUID_7_1_EDX, 19) /* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */ #define X86_FEATURE_INTEL_PSFD KVM_X86_FEATURE(CPUID_7_2_EDX, 0) @@ -55,6 +57,11 @@ enum kvm_only_cpuid_leafs { #define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) +/* Intel-defined sub-features, CPUID level 0x00000024:0 (EBX) */ +#define X86_FEATURE_AVX10_128 KVM_X86_FEATURE(CPUID_24_0_EBX, 16) +#define X86_FEATURE_AVX10_256 KVM_X86_FEATURE(CPUID_24_0_EBX, 17) +#define X86_FEATURE_AVX10_512 KVM_X86_FEATURE(CPUID_24_0_EBX, 18) + /* CPUID level 0x80000007 (EDX). */ #define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8) @@ -90,6 +97,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, + [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, }; /* From e7e80b66fb242a63cdfc3d3534cff62a5e293185 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 Jun 2024 16:19:09 -0700 Subject: [PATCH 051/655] x86/cpu: KVM: Add common defines for architectural memory types (PAT, MTRRs, etc.) Add defines for the architectural memory types that can be shoved into various MSRs and registers, e.g. MTRRs, PAT, VMX capabilities MSRs, EPTPs, etc. While most MSRs/registers support only a subset of all memory types, the values themselves are architectural and identical across all users. Leave the goofy MTRR_TYPE_* definitions as-is since they are in a uapi header, but add compile-time assertions to connect the dots (and sanity check that the msr-index.h values didn't get fat-fingered). Keep the VMX_EPTP_MT_* defines so that it's slightly more obvious that the EPTP holds a single memory type in 3 of its 64 bits; those bits just happen to be 2:0, i.e. don't need to be shifted. Opportunistically use X86_MEMTYPE_WB instead of an open coded '6' in setup_vmcs_config(). No functional change intended. Reviewed-by: Thomas Gleixner Acked-by: Kai Huang Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20240605231918.2915961-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/msr-index.h | 15 ++++++++++++++- arch/x86/include/asm/vmx.h | 5 +++-- arch/x86/kernel/cpu/mtrr/mtrr.c | 6 ++++++ arch/x86/kvm/vmx/nested.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/mm/pat/memtype.c | 33 ++++++++++++-------------------- 6 files changed, 37 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 82c6a4d350e0..bb43a07616e6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -36,6 +36,20 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) +/* + * Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc. + * Most MSRs support/allow only a subset of memory types, but the values + * themselves are common across all relevant MSRs. + */ +#define X86_MEMTYPE_UC 0ull /* Uncacheable, a.k.a. Strong Uncacheable */ +#define X86_MEMTYPE_WC 1ull /* Write Combining */ +/* RESERVED 2 */ +/* RESERVED 3 */ +#define X86_MEMTYPE_WT 4ull /* Write Through */ +#define X86_MEMTYPE_WP 5ull /* Write Protected */ +#define X86_MEMTYPE_WB 6ull /* Write Back */ +#define X86_MEMTYPE_UC_MINUS 7ull /* Weak Uncacheabled (PAT only) */ + /* FRED MSRs */ #define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */ #define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */ @@ -1163,7 +1177,6 @@ #define VMX_BASIC_64 0x0001000000000000LLU #define VMX_BASIC_MEM_TYPE_SHIFT 50 #define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU -#define VMX_BASIC_MEM_TYPE_WB 6LLU #define VMX_BASIC_INOUT 0x0040000000000000LLU /* Resctrl MSRs: */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index d77a31039f24..e531d8d80a11 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -508,9 +508,10 @@ enum vmcs_field { #define VMX_EPTP_PWL_4 0x18ull #define VMX_EPTP_PWL_5 0x20ull #define VMX_EPTP_AD_ENABLE_BIT (1ull << 6) +/* The EPTP memtype is encoded in bits 2:0, i.e. doesn't need to be shifted. */ #define VMX_EPTP_MT_MASK 0x7ull -#define VMX_EPTP_MT_WB 0x6ull -#define VMX_EPTP_MT_UC 0x0ull +#define VMX_EPTP_MT_WB X86_MEMTYPE_WB +#define VMX_EPTP_MT_UC X86_MEMTYPE_UC #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 2a2fc14955cd..989d368be04f 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -55,6 +55,12 @@ #include "mtrr.h" +static_assert(X86_MEMTYPE_UC == MTRR_TYPE_UNCACHABLE); +static_assert(X86_MEMTYPE_WC == MTRR_TYPE_WRCOMB); +static_assert(X86_MEMTYPE_WT == MTRR_TYPE_WRTHROUGH); +static_assert(X86_MEMTYPE_WP == MTRR_TYPE_WRPROT); +static_assert(X86_MEMTYPE_WB == MTRR_TYPE_WRBACK); + /* arch_phys_wc_add returns an MTRR register index plus this offset. */ #define MTRR_TO_PHYS_WC_OFFSET 1000 diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2392a7ef254d..504fe5ffd47b 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7070,7 +7070,7 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs) VMCS12_REVISION | VMX_BASIC_TRUE_CTLS | ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | - (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); + (X86_MEMTYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); if (cpu_has_vmx_basic_inout()) msrs->basic |= VMX_BASIC_INOUT; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f18c2d8c7476..a4d077db04cf 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2747,7 +2747,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, #endif /* Require Write-Back (WB) memory type for VMCS accesses. */ - if (((vmx_msr_high >> 18) & 15) != 6) + if (((vmx_msr_high >> 18) & 15) != X86_MEMTYPE_WB) return -EIO; rdmsrl(MSR_IA32_VMX_MISC, misc_msr); diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index bdc2a240c2aa..15b888ebaf17 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -176,15 +176,6 @@ static inline void set_page_memtype(struct page *pg, } #endif -enum { - PAT_UC = 0, /* uncached */ - PAT_WC = 1, /* Write combining */ - PAT_WT = 4, /* Write Through */ - PAT_WP = 5, /* Write Protected */ - PAT_WB = 6, /* Write Back (default) */ - PAT_UC_MINUS = 7, /* UC, but can be overridden by MTRR */ -}; - #define CM(c) (_PAGE_CACHE_MODE_ ## c) static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val, @@ -194,13 +185,13 @@ static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val, char *cache_mode; switch (pat_val) { - case PAT_UC: cache = CM(UC); cache_mode = "UC "; break; - case PAT_WC: cache = CM(WC); cache_mode = "WC "; break; - case PAT_WT: cache = CM(WT); cache_mode = "WT "; break; - case PAT_WP: cache = CM(WP); cache_mode = "WP "; break; - case PAT_WB: cache = CM(WB); cache_mode = "WB "; break; - case PAT_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break; - default: cache = CM(WB); cache_mode = "WB "; break; + case X86_MEMTYPE_UC: cache = CM(UC); cache_mode = "UC "; break; + case X86_MEMTYPE_WC: cache = CM(WC); cache_mode = "WC "; break; + case X86_MEMTYPE_WT: cache = CM(WT); cache_mode = "WT "; break; + case X86_MEMTYPE_WP: cache = CM(WP); cache_mode = "WP "; break; + case X86_MEMTYPE_WB: cache = CM(WB); cache_mode = "WB "; break; + case X86_MEMTYPE_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break; + default: cache = CM(WB); cache_mode = "WB "; break; } memcpy(msg, cache_mode, 4); @@ -257,11 +248,11 @@ void pat_cpu_init(void) void __init pat_bp_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; -#define PAT(p0, p1, p2, p3, p4, p5, p6, p7) \ - (((u64)PAT_ ## p0) | ((u64)PAT_ ## p1 << 8) | \ - ((u64)PAT_ ## p2 << 16) | ((u64)PAT_ ## p3 << 24) | \ - ((u64)PAT_ ## p4 << 32) | ((u64)PAT_ ## p5 << 40) | \ - ((u64)PAT_ ## p6 << 48) | ((u64)PAT_ ## p7 << 56)) +#define PAT(p0, p1, p2, p3, p4, p5, p6, p7) \ + ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \ + (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \ + (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \ + (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56)) if (!IS_ENABLED(CONFIG_X86_PAT)) From beb2e446046f8dd96bbeed3267b5f26bf1926ef9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 Jun 2024 16:19:10 -0700 Subject: [PATCH 052/655] x86/cpu: KVM: Move macro to encode PAT value to common header Move pat/memtype.c's PAT() macro to msr-index.h as PAT_VALUE(), and use it in KVM to define the default (Power-On / RESET) PAT value instead of open coding an inscrutable magic number. No functional change intended. Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240605231918.2915961-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/msr-index.h | 6 ++++++ arch/x86/kvm/x86.h | 3 ++- arch/x86/mm/pat/memtype.c | 13 +++---------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index bb43a07616e6..f6eb3460e169 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -377,6 +377,12 @@ #define MSR_IA32_CR_PAT 0x00000277 +#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7) \ + ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \ + (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \ + (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \ + (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56)) + #define MSR_IA32_DEBUGCTLMSR 0x000001d9 #define MSR_IA32_LASTBRANCHFROMIP 0x000001db #define MSR_IA32_LASTBRANCHTOIP 0x000001dc diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 50596f6f8320..5585c2ef147a 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -103,7 +103,8 @@ static inline unsigned int __shrink_ple_window(unsigned int val, return max(val, min); } -#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL +#define MSR_IA32_CR_PAT_DEFAULT \ + PAT_VALUE(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC) void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); int kvm_check_nested_events(struct kvm_vcpu *vcpu); diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 15b888ebaf17..6c4e29457c10 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -248,12 +248,6 @@ void pat_cpu_init(void) void __init pat_bp_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; -#define PAT(p0, p1, p2, p3, p4, p5, p6, p7) \ - ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \ - (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \ - (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \ - (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56)) - if (!IS_ENABLED(CONFIG_X86_PAT)) pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n"); @@ -284,7 +278,7 @@ void __init pat_bp_init(void) * NOTE: When WC or WP is used, it is redirected to UC- per * the default setup in __cachemode2pte_tbl[]. */ - pat_msr_val = PAT(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC); + pat_msr_val = PAT_VALUE(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC); } /* @@ -319,7 +313,7 @@ void __init pat_bp_init(void) * NOTE: When WT or WP is used, it is redirected to UC- per * the default setup in __cachemode2pte_tbl[]. */ - pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WC, UC_MINUS, UC); + pat_msr_val = PAT_VALUE(WB, WC, UC_MINUS, UC, WB, WC, UC_MINUS, UC); } else { /* * Full PAT support. We put WT in slot 7 to improve @@ -347,13 +341,12 @@ void __init pat_bp_init(void) * The reserved slots are unused, but mapped to their * corresponding types in the presence of PAT errata. */ - pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WP, UC_MINUS, WT); + pat_msr_val = PAT_VALUE(WB, WC, UC_MINUS, UC, WB, WP, UC_MINUS, WT); } memory_caching_control |= CACHE_PAT; init_cache_modes(pat_msr_val); -#undef PAT } static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ From b6717d35d8597d19f44736f11963e3bd5b8881b4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 Jun 2024 16:19:11 -0700 Subject: [PATCH 053/655] KVM: x86: Stuff vCPU's PAT with default value at RESET, not creation Move the stuffing of the vCPU's PAT to the architectural "default" value from kvm_arch_vcpu_create() to kvm_vcpu_reset(), guarded by !init_event, to better capture that the default value is the value "Following Power-up or Reset". E.g. setting PAT only during creation would break if KVM were to expose a RESET ioctl() to userspace (which is unlikely, but that's not a good reason to have unintuitive code). No functional change. Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Reviewed-by: Jim Mattson Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20240605231918.2915961-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 70219e406987..3ef6299ef823 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12260,8 +12260,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); - vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; - kvm_async_pf_hash_reset(vcpu); vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap; @@ -12427,6 +12425,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (!init_event) { vcpu->arch.smbase = 0x30000; + vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; + vcpu->arch.msr_misc_features_enables = 0; vcpu->arch.ia32_misc_enable_msr = MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; From d7bfc9ffd58037ff86f9fd0c3cef77cccb555da3 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 5 Jun 2024 16:19:12 -0700 Subject: [PATCH 054/655] KVM: VMX: Move MSR_IA32_VMX_BASIC bit defines to asm/vmx.h Move the bit defines for MSR_IA32_VMX_BASIC from msr-index.h to vmx.h so that they are colocated with other VMX MSR bit defines, and with the helpers that extract specific information from an MSR_IA32_VMX_BASIC value. Opportunistically use BIT_ULL() instead of open coding hex values. Opportunistically rename VMX_BASIC_64 to VMX_BASIC_32BIT_PHYS_ADDR_ONLY, as "VMX_BASIC_64" is widly misleading. The flag enumerates that addresses are limited to 32 bits, not that 64-bit addresses are allowed. Last but not least, opportunistically #define DUAL_MONITOR_TREATMENT so that all known single-bit feature flags are defined (this will allow replacing open-coded literals in the future). Cc: Shan Kang Cc: Kai Huang Signed-off-by: Xin Li [sean: split to separate patch, write changelog] Reviewed-by: Zhao Liu Reviewed-by: Kai Huang Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20240605231918.2915961-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/msr-index.h | 8 -------- arch/x86/include/asm/vmx.h | 7 +++++++ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index f6eb3460e169..ad0b579d1c01 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1177,14 +1177,6 @@ #define MSR_IA32_VMX_VMFUNC 0x00000491 #define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 -/* VMX_BASIC bits and bitmasks */ -#define VMX_BASIC_VMCS_SIZE_SHIFT 32 -#define VMX_BASIC_TRUE_CTLS (1ULL << 55) -#define VMX_BASIC_64 0x0001000000000000LLU -#define VMX_BASIC_MEM_TYPE_SHIFT 50 -#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU -#define VMX_BASIC_INOUT 0x0040000000000000LLU - /* Resctrl MSRs: */ /* - Intel: */ #define MSR_IA32_L3_QOS_CFG 0xc81 diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index e531d8d80a11..81b986e501a9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -135,6 +135,13 @@ #define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING) #define VMFUNC_EPTP_ENTRIES 512 +#define VMX_BASIC_VMCS_SIZE_SHIFT 32 +#define VMX_BASIC_32BIT_PHYS_ADDR_ONLY BIT_ULL(48) +#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49) +#define VMX_BASIC_MEM_TYPE_SHIFT 50 +#define VMX_BASIC_INOUT BIT_ULL(54) +#define VMX_BASIC_TRUE_CTLS BIT_ULL(55) + static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) { return vmx_basic & GENMASK_ULL(30, 0); From 9df398ff7d2ab4fa2ecf6131044431dc94c5bdf6 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 5 Jun 2024 16:19:13 -0700 Subject: [PATCH 055/655] KVM: VMX: Track CPU's MSR_IA32_VMX_BASIC as a single 64-bit value Track the "basic" capabilities VMX MSR as a single u64 in vmcs_config instead of splitting it across three fields, that obviously don't combine into a single 64-bit value, so that KVM can use the macros that define MSR bits using their absolute position. Replace all open coded shifts and masks, many of which are relative to the "high" half, with the appropriate macro. Opportunistically use VMX_BASIC_32BIT_PHYS_ADDR_ONLY instead of an open coded equivalent, and clean up the related comment to not reference a specific SDM section (to the surprise of no one, the comment is stale). No functional change intended (though obviously the code generation will be quite different). Cc: Shan Kang Cc: Kai Huang Signed-off-by: Xin Li [sean: split to separate patch, write changelog] Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20240605231918.2915961-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/vmx.h | 5 +++++ arch/x86/kvm/vmx/capabilities.h | 6 ++---- arch/x86/kvm/vmx/vmx.c | 28 ++++++++++++++-------------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 81b986e501a9..90963b14afaa 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -152,6 +152,11 @@ static inline u32 vmx_basic_vmcs_size(u64 vmx_basic) return (vmx_basic & GENMASK_ULL(44, 32)) >> 32; } +static inline u32 vmx_basic_vmcs_mem_type(u64 vmx_basic) +{ + return (vmx_basic & GENMASK_ULL(53, 50)) >> 50; +} + static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc) { return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 41a4533f9989..86ce8bb96bed 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -54,9 +54,7 @@ struct nested_vmx_msrs { }; struct vmcs_config { - int size; - u32 basic_cap; - u32 revision_id; + u64 basic; u32 pin_based_exec_ctrl; u32 cpu_based_exec_ctrl; u32 cpu_based_2nd_exec_ctrl; @@ -76,7 +74,7 @@ extern struct vmx_capability vmx_capability __ro_after_init; static inline bool cpu_has_vmx_basic_inout(void) { - return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT); + return vmcs_config.basic & VMX_BASIC_INOUT; } static inline bool cpu_has_virtual_nmis(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a4d077db04cf..5cbd86c4386b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2605,13 +2605,13 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr) static int setup_vmcs_config(struct vmcs_config *vmcs_conf, struct vmx_capability *vmx_cap) { - u32 vmx_msr_low, vmx_msr_high; u32 _pin_based_exec_control = 0; u32 _cpu_based_exec_control = 0; u32 _cpu_based_2nd_exec_control = 0; u64 _cpu_based_3rd_exec_control = 0; u32 _vmexit_control = 0; u32 _vmentry_control = 0; + u64 basic_msr; u64 misc_msr; int i; @@ -2734,29 +2734,29 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, _vmexit_control &= ~x_ctrl; } - rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); + rdmsrl(MSR_IA32_VMX_BASIC, basic_msr); /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ - if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) + if (vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE) return -EIO; #ifdef CONFIG_X86_64 - /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ - if (vmx_msr_high & (1u<<16)) + /* + * KVM expects to be able to shove all legal physical addresses into + * VMCS fields for 64-bit kernels, and per the SDM, "This bit is always + * 0 for processors that support Intel 64 architecture". + */ + if (basic_msr & VMX_BASIC_32BIT_PHYS_ADDR_ONLY) return -EIO; #endif /* Require Write-Back (WB) memory type for VMCS accesses. */ - if (((vmx_msr_high >> 18) & 15) != X86_MEMTYPE_WB) + if (vmx_basic_vmcs_mem_type(basic_msr) != X86_MEMTYPE_WB) return -EIO; rdmsrl(MSR_IA32_VMX_MISC, misc_msr); - vmcs_conf->size = vmx_msr_high & 0x1fff; - vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; - - vmcs_conf->revision_id = vmx_msr_low; - + vmcs_conf->basic = basic_msr; vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; @@ -2903,13 +2903,13 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) if (!pages) return NULL; vmcs = page_address(pages); - memset(vmcs, 0, vmcs_config.size); + memset(vmcs, 0, vmx_basic_vmcs_size(vmcs_config.basic)); /* KVM supports Enlightened VMCS v1 only */ if (kvm_is_using_evmcs()) vmcs->hdr.revision_id = KVM_EVMCS_VERSION; else - vmcs->hdr.revision_id = vmcs_config.revision_id; + vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic); if (shadow) vmcs->hdr.shadow_vmcs = 1; @@ -3002,7 +3002,7 @@ static __init int alloc_kvm_area(void) * physical CPU. */ if (kvm_is_using_evmcs()) - vmcs->hdr.revision_id = vmcs_config.revision_id; + vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic); per_cpu(vmxarea, cpu) = vmcs; } From c97b106fa8aa500823695abfda7c9bdefb42a648 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 5 Jun 2024 16:19:14 -0700 Subject: [PATCH 056/655] KVM: nVMX: Use macros and #defines in vmx_restore_vmx_basic() Use macros in vmx_restore_vmx_basic() instead of open coding everything using BIT_ULL() and GENMASK_ULL(). Opportunistically split feature bits and reserved bits into separate variables, and add a comment explaining the subset logic (it's not immediately obvious that the set of feature bits is NOT the set of _supported_ feature bits). Cc: Shan Kang Cc: Kai Huang Signed-off-by: Xin Li [sean: split to separate patch, write changelog, drop #defines] Reviewed-by: Zhao Liu Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20240605231918.2915961-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 504fe5ffd47b..eba5e94a3c7c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1251,21 +1251,32 @@ static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) { - const u64 feature_and_reserved = - /* feature (except bit 48; see below) */ - BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | - /* reserved */ - BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); + const u64 feature_bits = VMX_BASIC_DUAL_MONITOR_TREATMENT | + VMX_BASIC_INOUT | + VMX_BASIC_TRUE_CTLS; + + const u64 reserved_bits = GENMASK_ULL(63, 56) | + GENMASK_ULL(47, 45) | + BIT_ULL(31); + u64 vmx_basic = vmcs_config.nested.basic; - if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) + BUILD_BUG_ON(feature_bits & reserved_bits); + + /* + * Except for 32BIT_PHYS_ADDR_ONLY, which is an anti-feature bit (has + * inverted polarity), the incoming value must not set feature bits or + * reserved bits that aren't allowed/supported by KVM. Fields, i.e. + * multi-bit values, are explicitly checked below. + */ + if (!is_bitwise_subset(vmx_basic, data, feature_bits | reserved_bits)) return -EINVAL; /* * KVM does not emulate a version of VMX that constrains physical * addresses of VMX structures (e.g. VMCS) to 32-bits. */ - if (data & BIT_ULL(48)) + if (data & VMX_BASIC_32BIT_PHYS_ADDR_ONLY) return -EINVAL; if (vmx_basic_vmcs_revision_id(vmx_basic) != From 92e648042c237c3bba223ab7f7e1a76f41ab3ef7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 Jun 2024 16:19:15 -0700 Subject: [PATCH 057/655] KVM: nVMX: Add a helper to encode VMCS info in MSR_IA32_VMX_BASIC Add a helper to encode the VMCS revision, size, and supported memory types in MSR_IA32_VMX_BASIC, i.e. when synthesizing KVM's supported BASIC MSR value, and delete the now unused VMCS size and memtype shift macros. For a variety of reasons, KVM has shifted (pun intended) to using helpers to *get* information from the VMX MSRs, as opposed to defined MASK and SHIFT macros for direct use. Provide a similar helper for the nested VMX code, which needs to *set* information, so that KVM isn't left with a mix of SHIFT macros and dedicated helpers. Reported-by: Xiaoyao Li Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20240605231918.2915961-8-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/vmx.h | 7 +++++-- arch/x86/kvm/vmx/nested.c | 8 +++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 90963b14afaa..65aaf0577265 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -135,10 +135,8 @@ #define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING) #define VMFUNC_EPTP_ENTRIES 512 -#define VMX_BASIC_VMCS_SIZE_SHIFT 32 #define VMX_BASIC_32BIT_PHYS_ADDR_ONLY BIT_ULL(48) #define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49) -#define VMX_BASIC_MEM_TYPE_SHIFT 50 #define VMX_BASIC_INOUT BIT_ULL(54) #define VMX_BASIC_TRUE_CTLS BIT_ULL(55) @@ -157,6 +155,11 @@ static inline u32 vmx_basic_vmcs_mem_type(u64 vmx_basic) return (vmx_basic & GENMASK_ULL(53, 50)) >> 50; } +static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype) +{ + return revision | ((u64)size << 32) | ((u64)memtype << 50); +} + static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc) { return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index eba5e94a3c7c..335f1a650497 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7077,12 +7077,10 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs) * guest, and the VMCS structure we give it - not about the * VMX support of the underlying hardware. */ - msrs->basic = - VMCS12_REVISION | - VMX_BASIC_TRUE_CTLS | - ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | - (X86_MEMTYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); + msrs->basic = vmx_basic_encode_vmcs_info(VMCS12_REVISION, VMCS12_SIZE, + X86_MEMTYPE_WB); + msrs->basic |= VMX_BASIC_TRUE_CTLS; if (cpu_has_vmx_basic_inout()) msrs->basic |= VMX_BASIC_INOUT; } From dc1e67f70f6d4e336da06cb61bf6669d6fa39869 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 Jun 2024 16:19:16 -0700 Subject: [PATCH 058/655] KVM VMX: Move MSR_IA32_VMX_MISC bit defines to asm/vmx.h Move the handful of MSR_IA32_VMX_MISC bit defines that are currently in msr-indx.h to vmx.h so that all of the VMX_MISC defines and wrappers can be found in a single location. Opportunistically use BIT_ULL() instead of open coding hex values, add defines for feature bits that are architecturally defined, and move the defines down in the file so that they are colocated with the helpers for getting fields from VMX_MISC. No functional change intended. Cc: Shan Kang Cc: Kai Huang Signed-off-by: Xin Li [sean: split to separate patch, write changelog] Reviewed-by: Zhao Liu Reviewed-by: Kai Huang Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20240605231918.2915961-9-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/msr-index.h | 5 ----- arch/x86/include/asm/vmx.h | 19 ++++++++++++------- arch/x86/kvm/vmx/capabilities.h | 4 ++-- arch/x86/kvm/vmx/nested.c | 2 +- arch/x86/kvm/vmx/nested.h | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ad0b579d1c01..72c2c0ecb62c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1194,11 +1194,6 @@ #define MSR_IA32_SMBA_BW_BASE 0xc0000280 #define MSR_IA32_EVT_CFG_BASE 0xc0000400 -/* MSR_IA32_VMX_MISC bits */ -#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) -#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) -#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F - /* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 #define MSR_VM_IGNNE 0xc0010115 diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 65aaf0577265..400819ccb42c 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -122,13 +122,6 @@ #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff -#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f -#define VMX_MISC_SAVE_EFER_LMA 0x00000020 -#define VMX_MISC_ACTIVITY_HLT 0x00000040 -#define VMX_MISC_ACTIVITY_WAIT_SIPI 0x00000100 -#define VMX_MISC_ZERO_LEN_INS 0x40000000 -#define VMX_MISC_MSR_LIST_MULTIPLIER 512 - /* VMFUNC functions */ #define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28) @@ -160,6 +153,18 @@ static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype) return revision | ((u64)size << 32) | ((u64)memtype << 50); } +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK GENMASK_ULL(4, 0) +#define VMX_MISC_SAVE_EFER_LMA BIT_ULL(5) +#define VMX_MISC_ACTIVITY_HLT BIT_ULL(6) +#define VMX_MISC_ACTIVITY_SHUTDOWN BIT_ULL(7) +#define VMX_MISC_ACTIVITY_WAIT_SIPI BIT_ULL(8) +#define VMX_MISC_INTEL_PT BIT_ULL(14) +#define VMX_MISC_RDMSR_IN_SMM BIT_ULL(15) +#define VMX_MISC_VMXOFF_BLOCK_SMI BIT_ULL(28) +#define VMX_MISC_VMWRITE_SHADOW_RO_FIELDS BIT_ULL(29) +#define VMX_MISC_ZERO_LEN_INS BIT_ULL(30) +#define VMX_MISC_MSR_LIST_MULTIPLIER 512 + static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc) { return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 86ce8bb96bed..cb6588238f46 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -223,7 +223,7 @@ static inline bool cpu_has_vmx_vmfunc(void) static inline bool cpu_has_vmx_shadow_vmcs(void) { /* check if the cpu supports writing r/o exit information fields */ - if (!(vmcs_config.misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) + if (!(vmcs_config.misc & VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) return false; return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -365,7 +365,7 @@ static inline bool cpu_has_vmx_invvpid_global(void) static inline bool cpu_has_vmx_intel_pt(void) { - return (vmcs_config.misc & MSR_IA32_VMX_MISC_INTEL_PT) && + return (vmcs_config.misc & VMX_MISC_INTEL_PT) && (vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) && (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL); } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 335f1a650497..72db3718d8b2 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7062,7 +7062,7 @@ static void nested_vmx_setup_misc_data(struct vmcs_config *vmcs_conf, { msrs->misc_low = (u32)vmcs_conf->misc & VMX_MISC_SAVE_EFER_LMA; msrs->misc_low |= - MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | + VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | VMX_MISC_ACTIVITY_HLT | VMX_MISC_ACTIVITY_WAIT_SIPI; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index cce4e2aa30fb..0782fe599757 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -109,7 +109,7 @@ static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu) static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu) { return to_vmx(vcpu)->nested.msrs.misc_low & - MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; + VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; } static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu) From 8f56b14e9fa0f91daf4d3be05988abf336255dc6 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 5 Jun 2024 16:19:17 -0700 Subject: [PATCH 059/655] KVM: VMX: Open code VMX preemption timer rate mask in its accessor Use vmx_misc_preemption_timer_rate() to get the rate in hardware_setup(), and open code the rate's bitmask in vmx_misc_preemption_timer_rate() so that the function looks like all the helpers that grab values from VMX_BASIC and VMX_MISC MSR values. No functional change intended. Cc: Shan Kang Cc: Kai Huang Signed-off-by: Xin Li [sean: split to separate patch, write changelog] Reviewed-by: Kai Huang Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20240605231918.2915961-10-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/vmx.h | 3 +-- arch/x86/kvm/vmx/vmx.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 400819ccb42c..f7fd4369b821 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -153,7 +153,6 @@ static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype) return revision | ((u64)size << 32) | ((u64)memtype << 50); } -#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK GENMASK_ULL(4, 0) #define VMX_MISC_SAVE_EFER_LMA BIT_ULL(5) #define VMX_MISC_ACTIVITY_HLT BIT_ULL(6) #define VMX_MISC_ACTIVITY_SHUTDOWN BIT_ULL(7) @@ -167,7 +166,7 @@ static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype) static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc) { - return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; + return vmx_misc & GENMASK_ULL(4, 0); } static inline int vmx_misc_cr3_count(u64 vmx_misc) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5cbd86c4386b..39a26ecf87ea 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -8517,7 +8517,7 @@ __init int vmx_hardware_setup(void) u64 use_timer_freq = 5000ULL * 1000 * 1000; cpu_preemption_timer_multi = - vmcs_config.misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; + vmx_misc_preemption_timer_rate(vmcs_config.misc); if (tsc_khz) use_timer_freq = (u64)tsc_khz * 1000; From 566975f6ecd85247bd8989884d7b909d5a456da1 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 5 Jun 2024 16:19:18 -0700 Subject: [PATCH 060/655] KVM: nVMX: Use macros and #defines in vmx_restore_vmx_misc() Use macros in vmx_restore_vmx_misc() instead of open coding everything using BIT_ULL() and GENMASK_ULL(). Opportunistically split feature bits and reserved bits into separate variables, and add a comment explaining the subset logic (it's not immediately obvious that the set of feature bits is NOT the set of _supported_ feature bits). Cc: Shan Kang Cc: Kai Huang Signed-off-by: Xin Li [sean: split to separate patch, write changelog, drop #defines] Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20240605231918.2915961-11-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 72db3718d8b2..97ecc2722c8f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1345,16 +1345,29 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) { - const u64 feature_and_reserved_bits = - /* feature */ - BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) | - BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) | - /* reserved */ - GENMASK_ULL(13, 9) | BIT_ULL(31); + const u64 feature_bits = VMX_MISC_SAVE_EFER_LMA | + VMX_MISC_ACTIVITY_HLT | + VMX_MISC_ACTIVITY_SHUTDOWN | + VMX_MISC_ACTIVITY_WAIT_SIPI | + VMX_MISC_INTEL_PT | + VMX_MISC_RDMSR_IN_SMM | + VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | + VMX_MISC_VMXOFF_BLOCK_SMI | + VMX_MISC_ZERO_LEN_INS; + + const u64 reserved_bits = BIT_ULL(31) | GENMASK_ULL(13, 9); + u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low, vmcs_config.nested.misc_high); - if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) + BUILD_BUG_ON(feature_bits & reserved_bits); + + /* + * The incoming value must not set feature bits or reserved bits that + * aren't allowed/supported by KVM. Fields, i.e. multi-bit values, are + * explicitly checked below. + */ + if (!is_bitwise_subset(vmx_misc, data, feature_bits | reserved_bits)) return -EINVAL; if ((vmx->nested.msrs.pinbased_ctls_high & From c501062bb22ba325b7b77c91433d79574b4a3dcc Mon Sep 17 00:00:00 2001 From: Yongqiang Liu Date: Wed, 21 Aug 2024 19:27:37 +0800 Subject: [PATCH 061/655] KVM: SVM: Remove unnecessary GFP_KERNEL_ACCOUNT in svm_set_nested_state() The fixed size temporary variables vmcb_control_area and vmcb_save_area allocated in svm_set_nested_state() are released when the function exits. Meanwhile, svm_set_nested_state() also have vcpu mutex held to avoid massive concurrency allocation, so we don't need to set GFP_KERNEL_ACCOUNT. Signed-off-by: Yongqiang Liu Link: https://lore.kernel.org/r/20240821112737.3649937-1-liuyongqiang13@huawei.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/nested.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 6f704c1037e5..d5314cb7dff4 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1693,8 +1693,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; ret = -ENOMEM; - ctl = kzalloc(sizeof(*ctl), GFP_KERNEL_ACCOUNT); - save = kzalloc(sizeof(*save), GFP_KERNEL_ACCOUNT); + ctl = kzalloc(sizeof(*ctl), GFP_KERNEL); + save = kzalloc(sizeof(*save), GFP_KERNEL); if (!ctl || !save) goto out_free; From caf22c6dd31294d136e527d0548f8697c7e72f37 Mon Sep 17 00:00:00 2001 From: Qiang Liu Date: Tue, 2 Jul 2024 06:46:09 +0000 Subject: [PATCH 062/655] KVM: VMX: Modify the BUILD_BUG_ON_MSG of the 32-bit field in the vmcs_check16 function According to the SDM, the meaning of field bit 0 is: Access type (0 = full; 1 = high); must be full for 16-bit, 32-bit, and natural-width fields. So there is no 32-bit high field here, it should be a 32-bit field instead. Signed-off-by: Qiang Liu Link: https://lore.kernel.org/r/20240702064609.52487-1-liuq131@chinatelecom.cn Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/vmx_ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 8060e5fc6dbd..93e020dc88f6 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -47,7 +47,7 @@ static __always_inline void vmcs_check16(unsigned long field) BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, "16-bit accessor invalid for 64-bit high field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, - "16-bit accessor invalid for 32-bit high field"); + "16-bit accessor invalid for 32-bit field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, "16-bit accessor invalid for natural width field"); } From d9aa56edad3536f8b24c4695d51725e8d33b3c46 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 15 Jul 2024 22:12:24 +1200 Subject: [PATCH 063/655] KVM: VMX: Do not account for temporary memory allocation in ECREATE emulation In handle_encls_ecreate(), a page is allocated to store a copy of SECS structure used by the ENCLS[ECREATE] leaf from the guest. This page is only used temporarily and is freed after use in handle_encls_ecreate(). Don't account for the memory allocation of this page per [1]. Link: https://lore.kernel.org/kvm/b999afeb588eb75d990891855bc6d58861968f23.camel@intel.com/T/#mb81987afc3ab308bbb5861681aa9a20f2aece7fd [1] Signed-off-by: Kai Huang Link: https://lore.kernel.org/r/20240715101224.90958-1-kai.huang@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/sgx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index 6fef01e0536e..a3c3d2a51f47 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -274,7 +274,7 @@ static int handle_encls_ecreate(struct kvm_vcpu *vcpu) * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to * enforce restriction of access to the PROVISIONKEY. */ - contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT); + contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL); if (!contents) return -ENOMEM; From 653ea4489e6989f14a87abf8653f77c089097326 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 22 Jul 2024 16:59:22 -0700 Subject: [PATCH 064/655] KVM: nVMX: Honor userspace MSR filter lists for nested VM-Enter/VM-Exit Synthesize a consistency check VM-Exit (VM-Enter) or VM-Abort (VM-Exit) if L1 attempts to load/store an MSR via the VMCS MSR lists that userspace has disallowed access to via an MSR filter. Intel already disallows including a handful of "special" MSRs in the VMCS lists, so denying access isn't completely without precedent. More importantly, the behavior is well-defined _and_ can be communicated the end user, e.g. to the customer that owns a VM running as L1 on top of KVM. On the other hand, ignoring userspace MSR filters is all but guaranteed to result in unexpected behavior as the access will hit KVM's internal state, which is likely not up-to-date. Unlike KVM-internal accesses, instruction emulation, and dedicated VMCS fields, the MSRs in the VMCS load/store lists are 100% guest controlled, thus making it all but impossible to reason about the correctness of ignoring the MSR filter. And if userspace *really* wants to deny access to MSRs via the aforementioned scenarios, userspace can hide the associated feature from the guest, e.g. by disabling the PMU to prevent accessing PERF_GLOBAL_CTRL via its VMCS field. But for the MSR lists, KVM is blindly processing MSRs; the MSR filters are the _only_ way for userspace to deny access. This partially reverts commit ac8d6cad3c7b ("KVM: x86: Only do MSR filtering when access MSR by rdmsr/wrmsr"). Cc: Hou Wenlong Cc: Jim Mattson Link: https://lore.kernel.org/r/20240722235922.3351122-1-seanjc@google.com Signed-off-by: Sean Christopherson --- Documentation/virt/kvm/api.rst | 23 +++++++++++++++++++---- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/vmx/nested.c | 12 ++++++------ arch/x86/kvm/x86.c | 6 ++++-- 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index b3be87489108..a4b7dc4a9dda 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4214,7 +4214,9 @@ whether or not KVM_CAP_X86_USER_SPACE_MSR's KVM_MSR_EXIT_REASON_FILTER is enabled. If KVM_MSR_EXIT_REASON_FILTER is enabled, KVM will exit to userspace on denied accesses, i.e. userspace effectively intercepts the MSR access. If KVM_MSR_EXIT_REASON_FILTER is not enabled, KVM will inject a #GP into the guest -on denied accesses. +on denied accesses. Note, if an MSR access is denied during emulation of MSR +load/stores during VMX transitions, KVM ignores KVM_MSR_EXIT_REASON_FILTER. +See the below warning for full details. If an MSR access is allowed by userspace, KVM will emulate and/or virtualize the access in accordance with the vCPU model. Note, KVM may still ultimately @@ -4229,9 +4231,22 @@ filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes an error. .. warning:: - MSR accesses as part of nested VM-Enter/VM-Exit are not filtered. - This includes both writes to individual VMCS fields and reads/writes - through the MSR lists pointed to by the VMCS. + MSR accesses that are side effects of instruction execution (emulated or + native) are not filtered as hardware does not honor MSR bitmaps outside of + RDMSR and WRMSR, and KVM mimics that behavior when emulating instructions + to avoid pointless divergence from hardware. E.g. RDPID reads MSR_TSC_AUX, + SYSENTER reads the SYSENTER MSRs, etc. + + MSRs that are loaded/stored via dedicated VMCS fields are not filtered as + part of VM-Enter/VM-Exit emulation. + + MSRs that are loaded/store via VMX's load/store lists _are_ filtered as part + of VM-Enter/VM-Exit emulation. If an MSR access is denied on VM-Enter, KVM + synthesizes a consistency check VM-Exit(EXIT_REASON_MSR_LOAD_FAIL). If an + MSR access is denied on VM-Exit, KVM synthesizes a VM-Abort. In short, KVM + extends Intel's architectural list of MSRs that cannot be loaded/saved via + the VM-Enter/VM-Exit MSR list. It is platform owner's responsibility to + to communicate any such restrictions to their end users. x2APIC MSR accesses cannot be filtered (KVM silently ignores filters that cover any x2APIC MSRs). diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4a68cb3eba78..4a93ac1b9be9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2060,6 +2060,8 @@ void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); +int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data); int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2392a7ef254d..674f7089cc44 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -981,7 +981,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) __func__, i, e.index, e.reserved); goto fail; } - if (kvm_set_msr(vcpu, e.index, e.value)) { + if (kvm_set_msr_with_filter(vcpu, e.index, e.value)) { pr_debug_ratelimited( "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", __func__, i, e.index, e.value); @@ -1017,7 +1017,7 @@ static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu, } } - if (kvm_get_msr(vcpu, msr_index, data)) { + if (kvm_get_msr_with_filter(vcpu, msr_index, data)) { pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__, msr_index); return false; @@ -1112,9 +1112,9 @@ static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu, /* * Emulated VMEntry does not fail here. Instead a less * accurate value will be returned by - * nested_vmx_get_vmexit_msr_value() using kvm_get_msr() - * instead of reading the value from the vmcs02 VMExit - * MSR-store area. + * nested_vmx_get_vmexit_msr_value() by reading KVM's + * internal MSR state instead of reading the value from + * the vmcs02 VMExit MSR-store area. */ pr_warn_ratelimited( "Not enough msr entries in msr_autostore. Can't add msr %x\n", @@ -4806,7 +4806,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) goto vmabort; } - if (kvm_set_msr(vcpu, h.index, h.value)) { + if (kvm_set_msr_with_filter(vcpu, h.index, h.value)) { pr_debug_ratelimited( "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", __func__, j, h.index, h.value); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 70219e406987..34b52b49f5e6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1940,19 +1940,21 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, return ret; } -static int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) +int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) { if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) return KVM_MSR_RET_FILTERED; return kvm_get_msr_ignored_check(vcpu, index, data, false); } +EXPORT_SYMBOL_GPL(kvm_get_msr_with_filter); -static int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data) +int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data) { if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) return KVM_MSR_RET_FILTERED; return kvm_set_msr_ignored_check(vcpu, index, data, false); } +EXPORT_SYMBOL_GPL(kvm_set_msr_with_filter); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) { From 41ab0d59faa9532bbd37c91b03a8e9fb0215d67c Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 25 Jul 2024 13:52:31 -0400 Subject: [PATCH 065/655] KVM: nVMX: Use vmx_segment_cache_clear() instead of open coded equivalent In prepare_vmcs02_rare(), call vmx_segment_cache_clear() instead of setting segment_cache.bitmask directly. Using the helper minimizes the chances of prepare_vmcs02_rare() doing the wrong thing in the future, e.g. if KVM ends up doing more than just zero the bitmask when purging the cache. No functional change intended. Signed-off-by: Maxim Levitsky Link: https://lore.kernel.org/r/20240725175232.337266-2-mlevitsk@redhat.com [sean: massage changelog] Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 3 ++- arch/x86/kvm/vmx/vmx.c | 4 ---- arch/x86/kvm/vmx/vmx.h | 5 +++++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 674f7089cc44..867de342df33 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2470,6 +2470,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) { + vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector); @@ -2507,7 +2508,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); - vmx->segment_cache.bitmask = 0; + vmx_segment_cache_clear(vmx); } if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f18c2d8c7476..594db9afbc0f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -525,10 +525,6 @@ static const struct kvm_vmx_segment_field { VMX_SEGMENT_FIELD(LDTR), }; -static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) -{ - vmx->segment_cache.bitmask = 0; -} static unsigned long host_idt_base; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 42498fa63abb..11b1b70faef2 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -756,4 +756,9 @@ static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu) return lapic_in_kernel(vcpu) && enable_ipiv; } +static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +{ + vmx->segment_cache.bitmask = 0; +} + #endif /* __KVM_X86_VMX_H */ From 2ab637df5f68d4e0cfa9becd10f43400aee785b3 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 16 Aug 2024 15:01:24 +0200 Subject: [PATCH 066/655] KVM: VMX: hyper-v: Prevent impossible NULL pointer dereference in evmcs_load() GCC 12.3.0 complains about a potential NULL pointer dereference in evmcs_load() as hv_get_vp_assist_page() can return NULL. In fact, this cannot happen because KVM verifies (hv_init_evmcs()) that every CPU has a valid VP assist page and aborts enabling the feature otherwise. CPU onlining path is also checked in vmx_hardware_enable(). To make the compiler happy and to future proof the code, add a KVM_BUG_ON() sentinel. It doesn't seem to be possible (and logical) to observe evmcs_load() happening without an active vCPU so it is presumed that kvm_get_running_vcpu() can't return NULL. No functional change intended. Reported-by: Mirsad Todorovac Suggested-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20240816130124.286226-1-vkuznets@redhat.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/vmx_onhyperv.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/vmx/vmx_onhyperv.h b/arch/x86/kvm/vmx/vmx_onhyperv.h index eb48153bfd73..bba24ed99ee6 100644 --- a/arch/x86/kvm/vmx/vmx_onhyperv.h +++ b/arch/x86/kvm/vmx/vmx_onhyperv.h @@ -104,6 +104,14 @@ static inline void evmcs_load(u64 phys_addr) struct hv_vp_assist_page *vp_ap = hv_get_vp_assist_page(smp_processor_id()); + /* + * When enabling eVMCS, KVM verifies that every CPU has a valid hv_vp_assist_page() + * and aborts enabling the feature otherwise. CPU onlining path is also checked in + * vmx_hardware_enable(). + */ + if (KVM_BUG_ON(!vp_ap, kvm_get_running_vcpu()->kvm)) + return; + if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall) vp_ap->nested_control.features.directhypercall = 1; vp_ap->current_nested_vmcs = phys_addr; From e0183a42e3bcd4c30eb95bb046c016023fdc01ce Mon Sep 17 00:00:00 2001 From: Li Chen Date: Mon, 19 Aug 2024 13:59:27 +0800 Subject: [PATCH 067/655] KVM: x86: Use this_cpu_ptr() in kvm_user_return_msr_cpu_online Use this_cpu_ptr() instead of open coding the equivalent in kvm_user_return_msr_cpu_online. Signed-off-by: Li Chen Link: https://lore.kernel.org/r/87zfp96ojk.wl-me@linux.beauty Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 70219e406987..ffdf251bfef5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -413,8 +413,7 @@ EXPORT_SYMBOL_GPL(kvm_find_user_return_msr); static void kvm_user_return_msr_cpu_online(void) { - unsigned int cpu = smp_processor_id(); - struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu); + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); u64 value; int i; From acf2923271ef3611ceab7004fc8798d3ba31b739 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 13:20:06 -0700 Subject: [PATCH 068/655] KVM: x86/mmu: Clean up function comments for dirty logging APIs Rework the function comment for kvm_arch_mmu_enable_log_dirty_pt_masked() into the body of the function, as it has gotten a bit stale, is harder to read without the code context, and is the last source of warnings for W=1 builds in KVM x86 due to using a kernel-doc comment without documenting all parameters. Opportunistically subsume the functions comments for kvm_mmu_write_protect_pt_masked() and kvm_mmu_clear_dirty_pt_masked(), as there is no value in regurgitating similar information at a higher level, and capturing the differences between write-protection and PML-based dirty logging is best done in a common location. No functional change intended. Cc: David Matlack Reviewed-by: Kai Huang Reviewed-by: Pankaj Gupta Link: https://lore.kernel.org/r/20240802202006.340854-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 48 +++++++++++++----------------------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 928cf84778b0..5226bb055d99 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1307,15 +1307,6 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head, return flush; } -/** - * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages - * @kvm: kvm instance - * @slot: slot to protect - * @gfn_offset: start of the BITS_PER_LONG pages we care about - * @mask: indicates which pages we should protect - * - * Used when we do not need to care about huge page mappings. - */ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) @@ -1339,16 +1330,6 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, } } -/** - * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write - * protect the page if the D-bit isn't supported. - * @kvm: kvm instance - * @slot: slot to clear D-bit - * @gfn_offset: start of the BITS_PER_LONG pages we care about - * @mask: indicates which pages we should clear D-bit - * - * Used for PML to re-log the dirty GPAs after userspace querying dirty_bitmap. - */ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) @@ -1372,24 +1353,16 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, } } -/** - * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected - * PT level pages. - * - * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to - * enable dirty logging for them. - * - * We need to care about huge page mappings: e.g. during dirty logging we may - * have such mappings. - */ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { /* - * Huge pages are NOT write protected when we start dirty logging in - * initially-all-set mode; must write protect them here so that they - * are split to 4K on the first write. + * If the slot was assumed to be "initially all dirty", write-protect + * huge pages to ensure they are split to 4KiB on the first write (KVM + * dirty logs at 4KiB granularity). If eager page splitting is enabled, + * immediately try to split huge pages, e.g. so that vCPUs don't get + * saddled with the cost of splitting. * * The gfn_offset is guaranteed to be aligned to 64, but the base_gfn * of memslot has no such restriction, so the range can cross two large @@ -1411,7 +1384,16 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, PG_LEVEL_2M); } - /* Now handle 4K PTEs. */ + /* + * (Re)Enable dirty logging for all 4KiB SPTEs that map the GFNs in + * mask. If PML is enabled and the GFN doesn't need to be write- + * protected for other reasons, e.g. shadow paging, clear the Dirty bit. + * Otherwise clear the Writable bit. + * + * Note that kvm_mmu_clear_dirty_pt_masked() is called whenever PML is + * enabled but it chooses between clearing the Dirty bit and Writeable + * bit based on the context. + */ if (kvm_x86_ops.cpu_dirty_log_size) kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask); else From 74a0e79df68a8042fb84fd7207e57b70722cf825 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 11:19:26 -0700 Subject: [PATCH 069/655] KVM: SVM: Disallow guest from changing userspace's MSR_AMD64_DE_CFG value Inject a #GP if the guest attempts to change MSR_AMD64_DE_CFG from its *current* value, not if the guest attempts to write a value other than KVM's set of supported bits. As per the comment and the changelog of the original code, the intent is to effectively make MSR_AMD64_DE_CFG read- only for the guest. Opportunistically use a more conventional equality check instead of an exclusive-OR check to detect attempts to change bits. Fixes: d1d93fa90f1a ("KVM: SVM: Add MSR-based feature support for serializing LFENCE") Cc: Tom Lendacky Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20240802181935.292540-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d6f252555ab3..eaa41a6a00ee 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3189,8 +3189,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) if (data & ~msr_entry.data) return 1; - /* Don't allow the guest to change a bit, #GP */ - if (!msr->host_initiated && (data ^ msr_entry.data)) + /* + * Don't let the guest change the host-programmed value. The + * MSR is very model specific, i.e. contains multiple bits that + * are completely unknown to KVM, and the one bit known to KVM + * is simply a reflection of hardware capabilities. + */ + if (!msr->host_initiated && data != svm->msr_decfg) return 1; svm->msr_decfg = data; From b58b808cbe93e8abe936b285ae534c9927789242 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 11:19:27 -0700 Subject: [PATCH 070/655] KVM: x86: Move MSR_TYPE_{R,W,RW} values from VMX to x86, as enums Move VMX's MSR_TYPE_{R,W,RW} #defines to x86.h, as enums, so that they can be used by common x86 code, e.g. instead of doing "bool write". Opportunistically tweak the definitions to make it more obvious that the values are bitmasks, not arbitrary ascending values. No functional change intended. Link: https://lore.kernel.org/r/20240802181935.292540-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/vmx.h | 4 ---- arch/x86/kvm/x86.h | 6 ++++++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 42498fa63abb..3839afb921e2 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -17,10 +17,6 @@ #include "run_flags.h" #include "../mmu.h" -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -#define MSR_TYPE_RW 3 - #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) #ifdef CONFIG_X86_64 diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 50596f6f8320..499adef96038 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -504,6 +504,12 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type); +enum kvm_msr_access { + MSR_TYPE_R = BIT(0), + MSR_TYPE_W = BIT(1), + MSR_TYPE_RW = MSR_TYPE_R | MSR_TYPE_W, +}; + /* * Internal error codes that are used to indicate that MSR emulation encountered * an error that should result in #GP in the guest, unless userspace From aaecae7b6a2b19a874a7df0d474f44f3a5b5a74e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 11:19:28 -0700 Subject: [PATCH 071/655] KVM: x86: Rename KVM_MSR_RET_INVALID to KVM_MSR_RET_UNSUPPORTED Rename the "INVALID" internal MSR error return code to "UNSUPPORTED" to try and make it more clear that access was denied because the MSR itself is unsupported/unknown. "INVALID" is too ambiguous, as it could just as easily mean the value for WRMSR as invalid. Avoid UNKNOWN and UNIMPLEMENTED, as the error code is used for MSRs that _are_ actually implemented by KVM, e.g. if the MSR is unsupported because an associated feature flag is not present in guest CPUID. Opportunistically beef up the comments for the internal MSR error codes. Link: https://lore.kernel.org/r/20240802181935.292540-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/x86.c | 12 ++++++------ arch/x86/kvm/x86.h | 15 +++++++++++---- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index eaa41a6a00ee..a265f6e621fc 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2835,7 +2835,7 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr) msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; break; default: - return KVM_MSR_RET_INVALID; + return KVM_MSR_RET_UNSUPPORTED; } return 0; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f18c2d8c7476..e5b253e4d421 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2006,7 +2006,7 @@ int vmx_get_msr_feature(struct kvm_msr_entry *msr) return 1; return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); default: - return KVM_MSR_RET_INVALID; + return KVM_MSR_RET_UNSUPPORTED; } } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ffdf251bfef5..ea2f3ff93957 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1687,7 +1687,7 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) msr.index = index; r = kvm_get_msr_feature(&msr); - if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false)) + if (r == KVM_MSR_RET_UNSUPPORTED && kvm_msr_ignored_check(index, 0, false)) r = 0; *data = msr.data; @@ -1884,7 +1884,7 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu, { int ret = __kvm_set_msr(vcpu, index, data, host_initiated); - if (ret == KVM_MSR_RET_INVALID) + if (ret == KVM_MSR_RET_UNSUPPORTED) if (kvm_msr_ignored_check(index, data, true)) ret = 0; @@ -1929,7 +1929,7 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, { int ret = __kvm_get_msr(vcpu, index, data, host_initiated); - if (ret == KVM_MSR_RET_INVALID) { + if (ret == KVM_MSR_RET_UNSUPPORTED) { /* Unconditionally clear *data for simplicity */ *data = 0; if (kvm_msr_ignored_check(index, 0, false)) @@ -1998,7 +1998,7 @@ static int complete_fast_rdmsr(struct kvm_vcpu *vcpu) static u64 kvm_msr_reason(int r) { switch (r) { - case KVM_MSR_RET_INVALID: + case KVM_MSR_RET_UNSUPPORTED: return KVM_MSR_EXIT_REASON_UNKNOWN; case KVM_MSR_RET_FILTERED: return KVM_MSR_EXIT_REASON_FILTER; @@ -4146,7 +4146,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) kvm_is_msr_to_save(msr)) break; - return KVM_MSR_RET_INVALID; + return KVM_MSR_RET_UNSUPPORTED; } return 0; } @@ -4507,7 +4507,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; } - return KVM_MSR_RET_INVALID; + return KVM_MSR_RET_UNSUPPORTED; } return 0; } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 499adef96038..f47b9905ba78 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -512,11 +512,18 @@ enum kvm_msr_access { /* * Internal error codes that are used to indicate that MSR emulation encountered - * an error that should result in #GP in the guest, unless userspace - * handles it. + * an error that should result in #GP in the guest, unless userspace handles it. + * Note, '1', '0', and negative numbers are off limits, as they are used by KVM + * as part of KVM's lightly documented internal KVM_RUN return codes. + * + * UNSUPPORTED - The MSR isn't supported, either because it is completely + * unknown to KVM, or because the MSR should not exist according + * to the vCPU model. + * + * FILTERED - Access to the MSR is denied by a userspace MSR filter. */ -#define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */ -#define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */ +#define KVM_MSR_RET_UNSUPPORTED 2 +#define KVM_MSR_RET_FILTERED 3 #define __cr4_reserved_bits(__cpu_has, __c) \ ({ \ From 74c6c98a598a1fa650f9f8dfb095d66e987ed9cf Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 11:19:29 -0700 Subject: [PATCH 072/655] KVM: x86: Refactor kvm_x86_ops.get_msr_feature() to avoid kvm_msr_entry Refactor get_msr_feature() to take the index and data pointer as distinct parameters in anticipation of eliminating "struct kvm_msr_entry" usage further up the primary callchain. No functional change intended. Link: https://lore.kernel.org/r/20240802181935.292540-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm/svm.c | 16 +++++++--------- arch/x86/kvm/vmx/vmx.c | 6 +++--- arch/x86/kvm/vmx/x86_ops.h | 2 +- arch/x86/kvm/x86.c | 2 +- 5 files changed, 13 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4a68cb3eba78..fe61cff1f49d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1806,7 +1806,7 @@ struct kvm_x86_ops { int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd); void (*guest_memory_reclaimed)(struct kvm *kvm); - int (*get_msr_feature)(struct kvm_msr_entry *entry); + int (*get_msr_feature)(u32 msr, u64 *data); int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, void *insn, int insn_len); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a265f6e621fc..314dd4aacfe9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2825,14 +2825,14 @@ static int efer_trap(struct kvm_vcpu *vcpu) return kvm_complete_insn_gp(vcpu, ret); } -static int svm_get_msr_feature(struct kvm_msr_entry *msr) +static int svm_get_msr_feature(u32 msr, u64 *data) { - msr->data = 0; + *data = 0; - switch (msr->index) { + switch (msr) { case MSR_AMD64_DE_CFG: if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) - msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; + *data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; break; default: return KVM_MSR_RET_UNSUPPORTED; @@ -3179,14 +3179,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) kvm_pr_unimpl_wrmsr(vcpu, ecx, data); break; case MSR_AMD64_DE_CFG: { - struct kvm_msr_entry msr_entry; + u64 supported_de_cfg; - msr_entry.index = msr->index; - if (svm_get_msr_feature(&msr_entry)) + if (svm_get_msr_feature(ecx, &supported_de_cfg)) return 1; - /* Check the supported bits */ - if (data & ~msr_entry.data) + if (data & ~supported_de_cfg) return 1; /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e5b253e4d421..3d24eb4aeca2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1998,13 +1998,13 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, return !(msr->data & ~valid_bits); } -int vmx_get_msr_feature(struct kvm_msr_entry *msr) +int vmx_get_msr_feature(u32 msr, u64 *data) { - switch (msr->index) { + switch (msr) { case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!nested) return 1; - return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); + return vmx_get_vmx_msr(&vmcs_config.nested, msr, data); default: return KVM_MSR_RET_UNSUPPORTED; } diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index ce3221cd1d01..9a0304eb847b 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -56,7 +56,7 @@ bool vmx_has_emulated_msr(struct kvm *kvm, u32 index); void vmx_msr_filter_changed(struct kvm_vcpu *vcpu); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); -int vmx_get_msr_feature(struct kvm_msr_entry *msr); +int vmx_get_msr_feature(u32 msr, u64 *data); int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg); void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ea2f3ff93957..29d1205f62d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1672,7 +1672,7 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr) rdmsrl_safe(msr->index, &msr->data); break; default: - return kvm_x86_call(get_msr_feature)(msr); + return kvm_x86_call(get_msr_feature)(msr->index, &msr->data); } return 0; } From b848f24bd74a699745c1145e8cb707884d80694e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 11:19:30 -0700 Subject: [PATCH 073/655] KVM: x86: Rename get_msr_feature() APIs to get_feature_msr() Rename all APIs related to feature MSRs from get_msr_feature() to get_feature_msr(). The APIs get "feature MSRs", not "MSR features". And unlike kvm_{g,s}et_msr_common(), the "feature" adjective doesn't describe the helper itself. No functional change intended. Link: https://lore.kernel.org/r/20240802181935.292540-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm-x86-ops.h | 2 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm/svm.c | 6 +++--- arch/x86/kvm/vmx/main.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/vmx/x86_ops.h | 2 +- arch/x86/kvm/x86.c | 12 ++++++------ 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 68ad4f923664..9afbf8bcb521 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -125,7 +125,7 @@ KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from) KVM_X86_OP_OPTIONAL(vm_move_enc_context_from) KVM_X86_OP_OPTIONAL(guest_memory_reclaimed) -KVM_X86_OP(get_msr_feature) +KVM_X86_OP(get_feature_msr) KVM_X86_OP(check_emulate_instruction) KVM_X86_OP(apic_init_signal_blocked) KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fe61cff1f49d..95396e4cb3da 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1806,7 +1806,7 @@ struct kvm_x86_ops { int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd); void (*guest_memory_reclaimed)(struct kvm *kvm); - int (*get_msr_feature)(u32 msr, u64 *data); + int (*get_feature_msr)(u32 msr, u64 *data); int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, void *insn, int insn_len); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 314dd4aacfe9..d8cfe8f23327 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2825,7 +2825,7 @@ static int efer_trap(struct kvm_vcpu *vcpu) return kvm_complete_insn_gp(vcpu, ret); } -static int svm_get_msr_feature(u32 msr, u64 *data) +static int svm_get_feature_msr(u32 msr, u64 *data) { *data = 0; @@ -3181,7 +3181,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_AMD64_DE_CFG: { u64 supported_de_cfg; - if (svm_get_msr_feature(ecx, &supported_de_cfg)) + if (svm_get_feature_msr(ecx, &supported_de_cfg)) return 1; if (data & ~supported_de_cfg) @@ -5002,7 +5002,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .vcpu_unblocking = avic_vcpu_unblocking, .update_exception_bitmap = svm_update_exception_bitmap, - .get_msr_feature = svm_get_msr_feature, + .get_feature_msr = svm_get_feature_msr, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 0bf35ebe8a1b..4f6023a0deb3 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -41,7 +41,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vcpu_put = vmx_vcpu_put, .update_exception_bitmap = vmx_update_exception_bitmap, - .get_msr_feature = vmx_get_msr_feature, + .get_feature_msr = vmx_get_feature_msr, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3d24eb4aeca2..cf85f8d50ccb 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1998,7 +1998,7 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, return !(msr->data & ~valid_bits); } -int vmx_get_msr_feature(u32 msr, u64 *data) +int vmx_get_feature_msr(u32 msr, u64 *data) { switch (msr) { case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index 9a0304eb847b..eeafd121fb08 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -56,7 +56,7 @@ bool vmx_has_emulated_msr(struct kvm *kvm, u32 index); void vmx_msr_filter_changed(struct kvm_vcpu *vcpu); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); -int vmx_get_msr_feature(u32 msr, u64 *data); +int vmx_get_feature_msr(u32 msr, u64 *data); int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg); void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 29d1205f62d3..3efe086bea4c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1659,7 +1659,7 @@ static u64 kvm_get_arch_capabilities(void) return data; } -static int kvm_get_msr_feature(struct kvm_msr_entry *msr) +static int kvm_get_feature_msr(struct kvm_msr_entry *msr) { switch (msr->index) { case MSR_IA32_ARCH_CAPABILITIES: @@ -1672,12 +1672,12 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr) rdmsrl_safe(msr->index, &msr->data); break; default: - return kvm_x86_call(get_msr_feature)(msr->index, &msr->data); + return kvm_x86_call(get_feature_msr)(msr->index, &msr->data); } return 0; } -static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) +static int do_get_feature_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { struct kvm_msr_entry msr; int r; @@ -1685,7 +1685,7 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) /* Unconditionally clear the output for simplicity */ msr.data = 0; msr.index = index; - r = kvm_get_msr_feature(&msr); + r = kvm_get_feature_msr(&msr); if (r == KVM_MSR_RET_UNSUPPORTED && kvm_msr_ignored_check(index, 0, false)) r = 0; @@ -4943,7 +4943,7 @@ long kvm_arch_dev_ioctl(struct file *filp, break; } case KVM_GET_MSRS: - r = msr_io(NULL, argp, do_get_msr_feature, 1); + r = msr_io(NULL, argp, do_get_feature_msr, 1); break; #ifdef CONFIG_KVM_HYPERV case KVM_GET_SUPPORTED_HV_CPUID: @@ -7382,7 +7382,7 @@ static void kvm_probe_feature_msr(u32 msr_index) .index = msr_index, }; - if (kvm_get_msr_feature(&msr)) + if (kvm_get_feature_msr(&msr)) return; msr_based_features[num_msr_based_features++] = msr_index; From 7075f163615072a74bae7c5344210adec5f9ea2a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 11:19:31 -0700 Subject: [PATCH 074/655] KVM: x86: Refactor kvm_get_feature_msr() to avoid struct kvm_msr_entry Refactor kvm_get_feature_msr() to take the components of kvm_msr_entry as separate parameters, along with a vCPU pointer, i.e. to give it the same prototype as kvm_{g,s}et_msr_ignored_check(). This will allow using a common inner helper for handling accesses to "regular" and feature MSRs. No functional change intended. Link: https://lore.kernel.org/r/20240802181935.292540-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3efe086bea4c..0c2fa6c590a2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1659,39 +1659,38 @@ static u64 kvm_get_arch_capabilities(void) return data; } -static int kvm_get_feature_msr(struct kvm_msr_entry *msr) +static int kvm_get_feature_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, + bool host_initiated) { - switch (msr->index) { + WARN_ON_ONCE(!host_initiated); + + switch (index) { case MSR_IA32_ARCH_CAPABILITIES: - msr->data = kvm_get_arch_capabilities(); + *data = kvm_get_arch_capabilities(); break; case MSR_IA32_PERF_CAPABILITIES: - msr->data = kvm_caps.supported_perf_cap; + *data = kvm_caps.supported_perf_cap; break; case MSR_IA32_UCODE_REV: - rdmsrl_safe(msr->index, &msr->data); + rdmsrl_safe(index, data); break; default: - return kvm_x86_call(get_feature_msr)(msr->index, &msr->data); + return kvm_x86_call(get_feature_msr)(index, data); } return 0; } static int do_get_feature_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { - struct kvm_msr_entry msr; int r; /* Unconditionally clear the output for simplicity */ - msr.data = 0; - msr.index = index; - r = kvm_get_feature_msr(&msr); + *data = 0; + r = kvm_get_feature_msr(vcpu, index, data, true); if (r == KVM_MSR_RET_UNSUPPORTED && kvm_msr_ignored_check(index, 0, false)) r = 0; - *data = msr.data; - return r; } @@ -7378,11 +7377,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) static void kvm_probe_feature_msr(u32 msr_index) { - struct kvm_msr_entry msr = { - .index = msr_index, - }; + u64 data; - if (kvm_get_feature_msr(&msr)) + if (kvm_get_feature_msr(NULL, msr_index, &data, true)) return; msr_based_features[num_msr_based_features++] = msr_index; From 1cec2034980ad03ebf8ce0f187a8f3101c33e611 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 11:19:32 -0700 Subject: [PATCH 075/655] KVM: x86: Funnel all fancy MSR return value handling into a common helper Add a common helper, kvm_do_msr_access(), to invoke the "leaf" APIs that are type and access specific, and more importantly to handle errors that are returned from the leaf APIs. I.e. turn kvm_msr_ignored_check() from a a helper that is called on an error, into a trampoline that detects errors *and* applies relevant side effects, e.g. logging unimplemented accesses. Because the leaf APIs are used for guest accesses, userspace accesses, and KVM accesses, and because KVM supports restricting access to MSRs from userspace via filters, the error handling is subtly non-trivial. E.g. KVM has had at least one bug escape due to making each "outer" function handle errors. See commit 3376ca3f1a20 ("KVM: x86: Fix KVM_GET_MSRS stack info leak"). Link: https://lore.kernel.org/r/20240802181935.292540-8-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 86 +++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c2fa6c590a2..faf1ba786a34 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -304,25 +304,40 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { static struct kmem_cache *x86_emulator_cache; -/* - * When called, it means the previous get/set msr reached an invalid msr. - * Return true if we want to ignore/silent this failed msr access. - */ -static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write) -{ - const char *op = write ? "wrmsr" : "rdmsr"; +typedef int (*msr_access_t)(struct kvm_vcpu *vcpu, u32 index, u64 *data, + bool host_initiated); - if (ignore_msrs) { - if (report_ignored_msrs) - kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", - op, msr, data); - /* Mask the error */ - return true; - } else { +static __always_inline int kvm_do_msr_access(struct kvm_vcpu *vcpu, u32 msr, + u64 *data, bool host_initiated, + enum kvm_msr_access rw, + msr_access_t msr_access_fn) +{ + const char *op = rw == MSR_TYPE_W ? "wrmsr" : "rdmsr"; + int ret; + + BUILD_BUG_ON(rw != MSR_TYPE_R && rw != MSR_TYPE_W); + + /* + * Zero the data on read failures to avoid leaking stack data to the + * guest and/or userspace, e.g. if the failure is ignored below. + */ + ret = msr_access_fn(vcpu, msr, data, host_initiated); + if (ret && rw == MSR_TYPE_R) + *data = 0; + + if (ret != KVM_MSR_RET_UNSUPPORTED) + return ret; + + if (!ignore_msrs) { kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n", - op, msr, data); - return false; + op, msr, *data); + return ret; } + + if (report_ignored_msrs) + kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", op, msr, *data); + + return 0; } static struct kmem_cache *kvm_alloc_emulator_cache(void) @@ -1682,16 +1697,8 @@ static int kvm_get_feature_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, static int do_get_feature_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { - int r; - - /* Unconditionally clear the output for simplicity */ - *data = 0; - r = kvm_get_feature_msr(vcpu, index, data, true); - - if (r == KVM_MSR_RET_UNSUPPORTED && kvm_msr_ignored_check(index, 0, false)) - r = 0; - - return r; + return kvm_do_msr_access(vcpu, index, data, true, MSR_TYPE_R, + kvm_get_feature_msr); } static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) @@ -1878,16 +1885,17 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, return kvm_x86_call(set_msr)(vcpu, &msr); } +static int _kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, + bool host_initiated) +{ + return __kvm_set_msr(vcpu, index, *data, host_initiated); +} + static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu, u32 index, u64 data, bool host_initiated) { - int ret = __kvm_set_msr(vcpu, index, data, host_initiated); - - if (ret == KVM_MSR_RET_UNSUPPORTED) - if (kvm_msr_ignored_check(index, data, true)) - ret = 0; - - return ret; + return kvm_do_msr_access(vcpu, index, &data, host_initiated, MSR_TYPE_W, + _kvm_set_msr); } /* @@ -1926,16 +1934,8 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated) { - int ret = __kvm_get_msr(vcpu, index, data, host_initiated); - - if (ret == KVM_MSR_RET_UNSUPPORTED) { - /* Unconditionally clear *data for simplicity */ - *data = 0; - if (kvm_msr_ignored_check(index, 0, false)) - ret = 0; - } - - return ret; + return kvm_do_msr_access(vcpu, index, data, host_initiated, MSR_TYPE_R, + __kvm_get_msr); } static int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) From 3adef9034596a8ba6415e6e460209cd9fc524e81 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 11:19:33 -0700 Subject: [PATCH 076/655] KVM: x86: Hoist x86.c's global msr_* variables up above kvm_do_msr_access() Move the definitions of the various MSR arrays above kvm_do_msr_access() so that kvm_do_msr_access() can query the arrays when handling failures, e.g. to squash errors if userspace tries to read an MSR that isn't fully supported, but that KVM advertised as being an MSR-to-save. No functional change intended. Link: https://lore.kernel.org/r/20240802181935.292540-9-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 368 ++++++++++++++++++++++----------------------- 1 file changed, 184 insertions(+), 184 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index faf1ba786a34..92cb4aa7efd7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -304,6 +304,190 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { static struct kmem_cache *x86_emulator_cache; +/* + * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) track + * the set of MSRs that KVM exposes to userspace through KVM_GET_MSRS, + * KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. msrs_to_save holds MSRs that + * require host support, i.e. should be probed via RDMSR. emulated_msrs holds + * MSRs that KVM emulates without strictly requiring host support. + * msr_based_features holds MSRs that enumerate features, i.e. are effectively + * CPUID leafs. Note, msr_based_features isn't mutually exclusive with + * msrs_to_save and emulated_msrs. + */ + +static const u32 msrs_to_save_base[] = { + MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, + MSR_STAR, +#ifdef CONFIG_X86_64 + MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, +#endif + MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, + MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL, + MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, + MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, + MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B, + MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B, + MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, + MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, + MSR_IA32_UMWAIT_CONTROL, + + MSR_IA32_XFD, MSR_IA32_XFD_ERR, +}; + +static const u32 msrs_to_save_pmu[] = { + MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, + MSR_ARCH_PERFMON_FIXED_CTR0 + 2, + MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, + MSR_CORE_PERF_GLOBAL_CTRL, + MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, + + /* This part of MSRs should match KVM_MAX_NR_INTEL_GP_COUNTERS. */ + MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, + MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, + MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, + MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, + MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, + MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, + MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, + MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, + + MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, + MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, + + /* This part of MSRs should match KVM_MAX_NR_AMD_GP_COUNTERS. */ + MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2, + MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5, + MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2, + MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5, + + MSR_AMD64_PERF_CNTR_GLOBAL_CTL, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, +}; + +static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) + + ARRAY_SIZE(msrs_to_save_pmu)]; +static unsigned num_msrs_to_save; + +static const u32 emulated_msrs_all[] = { + MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, + MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, + +#ifdef CONFIG_KVM_HYPERV + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, + HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, + HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY, + HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, + HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, + HV_X64_MSR_RESET, + HV_X64_MSR_VP_INDEX, + HV_X64_MSR_VP_RUNTIME, + HV_X64_MSR_SCONTROL, + HV_X64_MSR_STIMER0_CONFIG, + HV_X64_MSR_VP_ASSIST_PAGE, + HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL, + HV_X64_MSR_TSC_EMULATION_STATUS, HV_X64_MSR_TSC_INVARIANT_CONTROL, + HV_X64_MSR_SYNDBG_OPTIONS, + HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS, + HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER, + HV_X64_MSR_SYNDBG_PENDING_BUFFER, +#endif + + MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, + MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK, + + MSR_IA32_TSC_ADJUST, + MSR_IA32_TSC_DEADLINE, + MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_PERF_CAPABILITIES, + MSR_IA32_MISC_ENABLE, + MSR_IA32_MCG_STATUS, + MSR_IA32_MCG_CTL, + MSR_IA32_MCG_EXT_CTL, + MSR_IA32_SMBASE, + MSR_SMI_COUNT, + MSR_PLATFORM_INFO, + MSR_MISC_FEATURES_ENABLES, + MSR_AMD64_VIRT_SPEC_CTRL, + MSR_AMD64_TSC_RATIO, + MSR_IA32_POWER_CTL, + MSR_IA32_UCODE_REV, + + /* + * KVM always supports the "true" VMX control MSRs, even if the host + * does not. The VMX MSRs as a whole are considered "emulated" as KVM + * doesn't strictly require them to exist in the host (ignoring that + * KVM would refuse to load in the first place if the core set of MSRs + * aren't supported). + */ + MSR_IA32_VMX_BASIC, + MSR_IA32_VMX_TRUE_PINBASED_CTLS, + MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + MSR_IA32_VMX_TRUE_EXIT_CTLS, + MSR_IA32_VMX_TRUE_ENTRY_CTLS, + MSR_IA32_VMX_MISC, + MSR_IA32_VMX_CR0_FIXED0, + MSR_IA32_VMX_CR4_FIXED0, + MSR_IA32_VMX_VMCS_ENUM, + MSR_IA32_VMX_PROCBASED_CTLS2, + MSR_IA32_VMX_EPT_VPID_CAP, + MSR_IA32_VMX_VMFUNC, + + MSR_K7_HWCR, + MSR_KVM_POLL_CONTROL, +}; + +static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)]; +static unsigned num_emulated_msrs; + +/* + * List of MSRs that control the existence of MSR-based features, i.e. MSRs + * that are effectively CPUID leafs. VMX MSRs are also included in the set of + * feature MSRs, but are handled separately to allow expedited lookups. + */ +static const u32 msr_based_features_all_except_vmx[] = { + MSR_AMD64_DE_CFG, + MSR_IA32_UCODE_REV, + MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_PERF_CAPABILITIES, +}; + +static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) + + (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)]; +static unsigned int num_msr_based_features; + +/* + * All feature MSRs except uCode revID, which tracks the currently loaded uCode + * patch, are immutable once the vCPU model is defined. + */ +static bool kvm_is_immutable_feature_msr(u32 msr) +{ + int i; + + if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR) + return true; + + for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) { + if (msr == msr_based_features_all_except_vmx[i]) + return msr != MSR_IA32_UCODE_REV; + } + + return false; +} + +static bool kvm_is_msr_to_save(u32 msr_index) +{ + unsigned int i; + + for (i = 0; i < num_msrs_to_save; i++) { + if (msrs_to_save[i] == msr_index) + return true; + } + + return false; +} + typedef int (*msr_access_t)(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); @@ -1425,178 +1609,6 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc); -/* - * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) track - * the set of MSRs that KVM exposes to userspace through KVM_GET_MSRS, - * KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. msrs_to_save holds MSRs that - * require host support, i.e. should be probed via RDMSR. emulated_msrs holds - * MSRs that KVM emulates without strictly requiring host support. - * msr_based_features holds MSRs that enumerate features, i.e. are effectively - * CPUID leafs. Note, msr_based_features isn't mutually exclusive with - * msrs_to_save and emulated_msrs. - */ - -static const u32 msrs_to_save_base[] = { - MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, - MSR_STAR, -#ifdef CONFIG_X86_64 - MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, -#endif - MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, - MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL, - MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, - MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, - MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B, - MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B, - MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, - MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, - MSR_IA32_UMWAIT_CONTROL, - - MSR_IA32_XFD, MSR_IA32_XFD_ERR, -}; - -static const u32 msrs_to_save_pmu[] = { - MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, - MSR_ARCH_PERFMON_FIXED_CTR0 + 2, - MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, - MSR_CORE_PERF_GLOBAL_CTRL, - MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, - - /* This part of MSRs should match KVM_MAX_NR_INTEL_GP_COUNTERS. */ - MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, - MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, - MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, - MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, - MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, - MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, - MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, - MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, - - MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, - MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, - - /* This part of MSRs should match KVM_MAX_NR_AMD_GP_COUNTERS. */ - MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2, - MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5, - MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2, - MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5, - - MSR_AMD64_PERF_CNTR_GLOBAL_CTL, - MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, - MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, -}; - -static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) + - ARRAY_SIZE(msrs_to_save_pmu)]; -static unsigned num_msrs_to_save; - -static const u32 emulated_msrs_all[] = { - MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, - MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, - -#ifdef CONFIG_KVM_HYPERV - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, - HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, - HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY, - HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, - HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, - HV_X64_MSR_RESET, - HV_X64_MSR_VP_INDEX, - HV_X64_MSR_VP_RUNTIME, - HV_X64_MSR_SCONTROL, - HV_X64_MSR_STIMER0_CONFIG, - HV_X64_MSR_VP_ASSIST_PAGE, - HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL, - HV_X64_MSR_TSC_EMULATION_STATUS, HV_X64_MSR_TSC_INVARIANT_CONTROL, - HV_X64_MSR_SYNDBG_OPTIONS, - HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS, - HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER, - HV_X64_MSR_SYNDBG_PENDING_BUFFER, -#endif - - MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, - MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK, - - MSR_IA32_TSC_ADJUST, - MSR_IA32_TSC_DEADLINE, - MSR_IA32_ARCH_CAPABILITIES, - MSR_IA32_PERF_CAPABILITIES, - MSR_IA32_MISC_ENABLE, - MSR_IA32_MCG_STATUS, - MSR_IA32_MCG_CTL, - MSR_IA32_MCG_EXT_CTL, - MSR_IA32_SMBASE, - MSR_SMI_COUNT, - MSR_PLATFORM_INFO, - MSR_MISC_FEATURES_ENABLES, - MSR_AMD64_VIRT_SPEC_CTRL, - MSR_AMD64_TSC_RATIO, - MSR_IA32_POWER_CTL, - MSR_IA32_UCODE_REV, - - /* - * KVM always supports the "true" VMX control MSRs, even if the host - * does not. The VMX MSRs as a whole are considered "emulated" as KVM - * doesn't strictly require them to exist in the host (ignoring that - * KVM would refuse to load in the first place if the core set of MSRs - * aren't supported). - */ - MSR_IA32_VMX_BASIC, - MSR_IA32_VMX_TRUE_PINBASED_CTLS, - MSR_IA32_VMX_TRUE_PROCBASED_CTLS, - MSR_IA32_VMX_TRUE_EXIT_CTLS, - MSR_IA32_VMX_TRUE_ENTRY_CTLS, - MSR_IA32_VMX_MISC, - MSR_IA32_VMX_CR0_FIXED0, - MSR_IA32_VMX_CR4_FIXED0, - MSR_IA32_VMX_VMCS_ENUM, - MSR_IA32_VMX_PROCBASED_CTLS2, - MSR_IA32_VMX_EPT_VPID_CAP, - MSR_IA32_VMX_VMFUNC, - - MSR_K7_HWCR, - MSR_KVM_POLL_CONTROL, -}; - -static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)]; -static unsigned num_emulated_msrs; - -/* - * List of MSRs that control the existence of MSR-based features, i.e. MSRs - * that are effectively CPUID leafs. VMX MSRs are also included in the set of - * feature MSRs, but are handled separately to allow expedited lookups. - */ -static const u32 msr_based_features_all_except_vmx[] = { - MSR_AMD64_DE_CFG, - MSR_IA32_UCODE_REV, - MSR_IA32_ARCH_CAPABILITIES, - MSR_IA32_PERF_CAPABILITIES, -}; - -static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) + - (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)]; -static unsigned int num_msr_based_features; - -/* - * All feature MSRs except uCode revID, which tracks the currently loaded uCode - * patch, are immutable once the vCPU model is defined. - */ -static bool kvm_is_immutable_feature_msr(u32 msr) -{ - int i; - - if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR) - return true; - - for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) { - if (msr == msr_based_features_all_except_vmx[i]) - return msr != MSR_IA32_UCODE_REV; - } - - return false; -} - /* * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM * does not yet virtualize. These include: @@ -3744,18 +3756,6 @@ static void record_steal_time(struct kvm_vcpu *vcpu) mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } -static bool kvm_is_msr_to_save(u32 msr_index) -{ - unsigned int i; - - for (i = 0; i < num_msrs_to_save; i++) { - if (msrs_to_save[i] == msr_index) - return true; - } - - return false; -} - int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u32 msr = msr_info->index; From 64a5d7a1091ff6ee70d2155b9dccfe8107d35ffa Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 11:19:34 -0700 Subject: [PATCH 077/655] KVM: x86: Suppress failures on userspace access to advertised, unsupported MSRs Extend KVM's suppression of failures due to a userspace access to an unsupported, but advertised as a "to save" MSR to all MSRs, not just those that happen to reach the default case statements in kvm_get_msr_common() and kvm_set_msr_common(). KVM's soon-to-be-established ABI is that if an MSR is advertised to userspace, then userspace is allowed to read the MSR, and write back the value that was read, i.e. why an MSR is unsupported doesn't change KVM's ABI. Practically speaking, this is very nearly a nop, as the only other paths that return KVM_MSR_RET_UNSUPPORTED are {svm,vmx}_get_feature_msr(), and it's unlikely, though not impossible, that userspace is using KVM_GET_MSRS on unsupported MSRs. The primary goal of moving the suppression to common code is to allow returning KVM_MSR_RET_UNSUPPORTED as appropriate throughout KVM, without having to manually handle the "is userspace accessing an advertised" waiver. I.e. this will allow formalizing KVM's ABI without incurring a high maintenance cost. Link: https://lore.kernel.org/r/20240802181935.292540-10-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 92cb4aa7efd7..1762bde488a2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -512,6 +512,15 @@ static __always_inline int kvm_do_msr_access(struct kvm_vcpu *vcpu, u32 msr, if (ret != KVM_MSR_RET_UNSUPPORTED) return ret; + /* + * Userspace is allowed to read MSRs, and write '0' to MSRs, that KVM + * reports as to-be-saved, even if an MSR isn't fully supported. + * Simply check that @data is '0', which covers both the write '0' case + * and all reads (in which case @data is zeroed on failure; see above). + */ + if (host_initiated && !*data && kvm_is_msr_to_save(msr)) + return 0; + if (!ignore_msrs) { kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n", op, msr, *data); @@ -4137,14 +4146,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (kvm_pmu_is_valid_msr(vcpu, msr)) return kvm_pmu_set_msr(vcpu, msr_info); - /* - * Userspace is allowed to write '0' to MSRs that KVM reports - * as to-be-saved, even if an MSRs isn't fully supported. - */ - if (msr_info->host_initiated && !data && - kvm_is_msr_to_save(msr)) - break; - return KVM_MSR_RET_UNSUPPORTED; } return 0; @@ -4496,16 +4497,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) return kvm_pmu_get_msr(vcpu, msr_info); - /* - * Userspace is allowed to read MSRs that KVM reports as - * to-be-saved, even if an MSR isn't fully supported. - */ - if (msr_info->host_initiated && - kvm_is_msr_to_save(msr_info->index)) { - msr_info->data = 0; - break; - } - return KVM_MSR_RET_UNSUPPORTED; } return 0; From 44dd0f5732b466a6e4d2a9b3aad1678f43f061af Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 11:19:35 -0700 Subject: [PATCH 078/655] KVM: x86: Suppress userspace access failures on unsupported, "emulated" MSRs Extend KVM's suppression of userspace MSR access failures to MSRs that KVM reports as emulated, but are ultimately unsupported, e.g. if the VMX MSRs are emulated by KVM, but are unsupported given the vCPU model. Suggested-by: Weijiang Yang Reviewed-by: Weijiang Yang Link: https://lore.kernel.org/r/20240802181935.292540-11-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1762bde488a2..00e792725052 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -476,7 +476,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr) return false; } -static bool kvm_is_msr_to_save(u32 msr_index) +static bool kvm_is_advertised_msr(u32 msr_index) { unsigned int i; @@ -485,6 +485,11 @@ static bool kvm_is_msr_to_save(u32 msr_index) return true; } + for (i = 0; i < num_emulated_msrs; i++) { + if (emulated_msrs[i] == msr_index) + return true; + } + return false; } @@ -514,11 +519,11 @@ static __always_inline int kvm_do_msr_access(struct kvm_vcpu *vcpu, u32 msr, /* * Userspace is allowed to read MSRs, and write '0' to MSRs, that KVM - * reports as to-be-saved, even if an MSR isn't fully supported. + * advertises to userspace, even if an MSR isn't fully supported. * Simply check that @data is '0', which covers both the write '0' case * and all reads (in which case @data is zeroed on failure; see above). */ - if (host_initiated && !*data && kvm_is_msr_to_save(msr)) + if (host_initiated && !*data && kvm_is_advertised_msr(msr)) return 0; if (!ignore_msrs) { From 24a7e944966cc8a285e6581dcc98edebeee76c97 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 16 Aug 2024 15:01:38 +0200 Subject: [PATCH 079/655] KVM: selftests: Move Hyper-V specific functions out of processor.c Since there is 'hyperv.c' for Hyper-V specific functions already, move Hyper-V specific functions out of processor.c there. No functional change intended. Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20240816130139.286246-2-vkuznets@redhat.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/include/x86_64/hyperv.h | 4 ++ .../selftests/kvm/include/x86_64/processor.h | 7 ++- .../testing/selftests/kvm/lib/x86_64/hyperv.c | 59 ++++++++++++++++++ .../selftests/kvm/lib/x86_64/processor.c | 61 ------------------- .../selftests/kvm/x86_64/xen_vmcall_test.c | 1 + 5 files changed, 68 insertions(+), 64 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index fa65b908b13e..a2e7cf7ee0ad 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -343,4 +343,8 @@ struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, /* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ #define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0) +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index a0c1440017bb..e247f99e0473 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -25,6 +25,10 @@ extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; extern uint64_t guest_tsc_khz; +#ifndef MAX_NR_CPUID_ENTRIES +#define MAX_NR_CPUID_ENTRIES 100 +#endif + /* Forced emulation prefix, used to invoke the emulator unconditionally. */ #define KVM_FEP "ud2; .byte 'k', 'v', 'm';" @@ -908,8 +912,6 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, uint32_t function, uint32_t index); const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); -const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); -const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); static inline uint32_t kvm_cpu_fms(void) { @@ -1009,7 +1011,6 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) } void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); -void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function, diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c index efb7e7a1354d..b4a5e4ad7105 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c @@ -8,6 +8,65 @@ #include "processor.h" #include "hyperv.h" +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) +{ + static struct kvm_cpuid2 *cpuid; + int kvm_fd; + + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + kvm_fd = open_kvm_dev_path_or_exit(); + + kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + close(kvm_fd); + return cpuid; +} + +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) +{ + static struct kvm_cpuid2 *cpuid_full; + const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; + int i, nent = 0; + + if (!cpuid_full) { + cpuid_sys = kvm_get_supported_cpuid(); + cpuid_hv = kvm_get_supported_hv_cpuid(); + + cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); + if (!cpuid_full) { + perror("malloc"); + abort(); + } + + /* Need to skip KVM CPUID leaves 0x400000xx */ + for (i = 0; i < cpuid_sys->nent; i++) { + if (cpuid_sys->entries[i].function >= 0x40000000 && + cpuid_sys->entries[i].function < 0x40000100) + continue; + cpuid_full->entries[nent] = cpuid_sys->entries[i]; + nent++; + } + + memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, + cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); + cpuid_full->nent = nent + cpuid_hv->nent; + } + + vcpu_init_cpuid(vcpu, cpuid_full); +} + +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + + vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + return cpuid; +} + struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, vm_vaddr_t *p_hv_pages_gva) { diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 153739f2e201..7876f052ca39 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -19,8 +19,6 @@ #define KERNEL_DS 0x10 #define KERNEL_TSS 0x18 -#define MAX_NR_CPUID_ENTRIES 100 - vm_vaddr_t exception_handlers; bool host_cpu_is_amd; bool host_cpu_is_intel; @@ -1195,65 +1193,6 @@ void xen_hypercall(uint64_t nr, uint64_t a0, void *a1) GUEST_ASSERT(!__xen_hypercall(nr, a0, a1)); } -const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) -{ - static struct kvm_cpuid2 *cpuid; - int kvm_fd; - - if (cpuid) - return cpuid; - - cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - kvm_fd = open_kvm_dev_path_or_exit(); - - kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - close(kvm_fd); - return cpuid; -} - -void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) -{ - static struct kvm_cpuid2 *cpuid_full; - const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; - int i, nent = 0; - - if (!cpuid_full) { - cpuid_sys = kvm_get_supported_cpuid(); - cpuid_hv = kvm_get_supported_hv_cpuid(); - - cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); - if (!cpuid_full) { - perror("malloc"); - abort(); - } - - /* Need to skip KVM CPUID leaves 0x400000xx */ - for (i = 0; i < cpuid_sys->nent; i++) { - if (cpuid_sys->entries[i].function >= 0x40000000 && - cpuid_sys->entries[i].function < 0x40000100) - continue; - cpuid_full->entries[nent] = cpuid_sys->entries[i]; - nent++; - } - - memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, - cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); - cpuid_full->nent = nent + cpuid_hv->nent; - } - - vcpu_init_cpuid(vcpu, cpuid_full); -} - -const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - - vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - return cpuid; -} - unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c index e149d0574961..2585087cdf5c 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c @@ -10,6 +10,7 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "hyperv.h" #define HCALL_REGION_GPA 0xc0000000ULL #define HCALL_REGION_SLOT 10 From d8414067cc17bd2070e6667763124754e2932251 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 16 Aug 2024 15:01:39 +0200 Subject: [PATCH 080/655] KVM: selftests: Re-enable hyperv_evmcs/hyperv_svm_test on bare metal KVM_CAP_HYPERV_DIRECT_TLBFLUSH is only reported when KVM runs on top of Hyper-V and hyperv_evmcs/hyperv_svm_test don't need that, these tests check that the feature is properly emulated for Hyper-V on KVM guests. There's no corresponding CAP for that, the feature is reported in KVM_GET_SUPPORTED_HV_CPUID. Hyper-V specific CPUIDs are not reported by KVM_GET_SUPPORTED_CPUID, implement dedicated kvm_hv_cpu_has() helper to do the job. Fixes: 6dac1195181c ("KVM: selftests: Make Hyper-V tests explicitly require KVM Hyper-V support") Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20240816130139.286246-3-vkuznets@redhat.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/include/x86_64/hyperv.h | 14 ++++++++++++++ tools/testing/selftests/kvm/lib/x86_64/hyperv.c | 8 ++++++++ tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c | 2 +- .../testing/selftests/kvm/x86_64/hyperv_svm_test.c | 2 +- 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index a2e7cf7ee0ad..6849e2552f1b 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -186,6 +186,18 @@ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14) +/* HYPERV_CPUID_NESTED_FEATURES.EAX */ +#define HV_X64_NESTED_DIRECT_FLUSH \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17) +#define HV_X64_NESTED_GUEST_MAPPING_FLUSH \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18) +#define HV_X64_NESTED_MSR_BITMAP \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19) + +/* HYPERV_CPUID_NESTED_FEATURES.EBX */ +#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0) + /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ #define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \ KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1) @@ -347,4 +359,6 @@ const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); +bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature); + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c index b4a5e4ad7105..15bc8cd583aa 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c @@ -67,6 +67,14 @@ const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) return cpuid; } +bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature) +{ + if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) + return false; + + return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature); +} + struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, vm_vaddr_t *p_hv_pages_gva) { diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c index e192720bfe14..74cf19661309 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c @@ -242,7 +242,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH)); + TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index b987a3d79715..0ddb63229bcb 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -157,7 +157,7 @@ int main(int argc, char *argv[]) int stage; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH)); + TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); From 92f6d4130497f2bedfaf86a4e46e890cf8983307 Mon Sep 17 00:00:00 2001 From: Ilias Stamatis Date: Thu, 18 Jul 2024 20:35:38 +0100 Subject: [PATCH 081/655] KVM: Fix coalesced_mmio_has_room() to avoid premature userspace exit The following calculation used in coalesced_mmio_has_room() to check whether the ring buffer is full is wrong and results in premature exits if the start of the valid entries is in the first half of the ring buffer. avail = (ring->first - last - 1) % KVM_COALESCED_MMIO_MAX; if (avail == 0) /* full */ Because negative values are handled using two's complement, and KVM computes the result as an unsigned value, the above will get a false positive if "first < last" and the ring is half-full. The above might have worked as expected in python for example: >>> (-86) % 170 84 However it doesn't work the same way in C. printf("avail: %d\n", (-86) % 170); printf("avail: %u\n", (-86) % 170); printf("avail: %u\n", (-86u) % 170u); Using gcc-11 these print: avail: -86 avail: 4294967210 avail: 0 For illustration purposes, given a 4-bit integer and a ring size of 0xA (unsigned), 0xA == 0x1010 == -6, and thus (-6u % 0xA) == 0. Fix the calculation and allow all but one entries in the buffer to be used as originally intended. Note, KVM's behavior is self-healing to some extent, as KVM will allow the entire buffer to be used if ring->first is beyond the halfway point. In other words, in the unlikely scenario that a use case benefits from being able to coalesce more than 86 entries at once, KVM will still provide such behavior, sometimes. Note #2, the % operator in C is not the modulo operator but the remainder operator. Modulo and remainder operators differ with respect to negative values. But, the relevant values in KVM are all unsigned, so it's a moot point in this case anyway. Note #3, this is almost a pure revert of the buggy commit, plus a READ_ONCE() to provide additional safety. Thue buggy commit justified the change with "it paves the way for making this function lockless", but it's not at all clear what was intended, nor is there any evidence that the buggy code was somehow safer. (a) the fields in question were already accessed locklessly, from the perspective that they could be modified by userspace at any time, and (b) the lock guarding the ring itself was changed, but never dropped, i.e. whatever lockless scheme (SRCU?) was planned never landed. Fixes: 105f8d40a737 ("KVM: Calculate available entries in coalesced mmio ring") Signed-off-by: Ilias Stamatis Reviewed-by: Paul Durrant Link: https://lore.kernel.org/r/20240718193543.624039-2-ilstam@amazon.com [sean: rework changelog to clarify behavior, call out weirdness of buggy commit] Signed-off-by: Sean Christopherson --- virt/kvm/coalesced_mmio.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 1b90acb6e3fe..184c5c40c9c1 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -43,7 +43,6 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev, u32 last) { struct kvm_coalesced_mmio_ring *ring; - unsigned avail; /* Are we able to batch it ? */ @@ -52,8 +51,7 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev, u32 last) * there is always one unused entry in the buffer */ ring = dev->kvm->coalesced_mmio_ring; - avail = (ring->first - last - 1) % KVM_COALESCED_MMIO_MAX; - if (avail == 0) { + if ((last + 1) % KVM_COALESCED_MMIO_MAX == READ_ONCE(ring->first)) { /* full */ return 0; } From 9a948c0c8e741776ee6d938b49d34d22034fbb7d Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Wed, 14 Aug 2024 11:34:15 +0800 Subject: [PATCH 082/655] ceph: Remove unused declarations These functions is never implemented and used. Signed-off-by: Yue Haibing Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.h | 3 --- fs/ceph/super.h | 2 -- include/linux/ceph/osd_client.h | 2 -- 3 files changed, 7 deletions(-) diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 9bcc7f181bfe..585ab5a6d87d 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -559,9 +559,6 @@ extern struct ceph_mds_session * ceph_get_mds_session(struct ceph_mds_session *s); extern void ceph_put_mds_session(struct ceph_mds_session *s); -extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, - struct ceph_msg *msg, int mds); - extern int ceph_mdsc_init(struct ceph_fs_client *fsc); extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); extern void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6e817bf1337c..c88bf53f68e9 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1056,8 +1056,6 @@ extern int ceph_fill_trace(struct super_block *sb, extern int ceph_readdir_prepopulate(struct ceph_mds_request *req, struct ceph_mds_session *session); -extern int ceph_inode_holds_cap(struct inode *inode, int mask); - extern bool ceph_inode_set_size(struct inode *inode, loff_t size); extern void __ceph_do_pending_vmtruncate(struct inode *inode); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index f66f6aac74f6..d7941478158c 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -449,8 +449,6 @@ extern int ceph_osdc_init(struct ceph_osd_client *osdc, extern void ceph_osdc_stop(struct ceph_osd_client *osdc); extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc); -extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, - struct ceph_msg *msg); extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); From 2015716adbd9cac8505ed461f5f330e832be0c84 Mon Sep 17 00:00:00 2001 From: Chen Yufan Date: Thu, 22 Aug 2024 17:55:41 +0800 Subject: [PATCH 083/655] ceph: Convert to use jiffies macro Use time_after_eq macro instead of using jiffies directly to handle wraparound. [ xiubli: adjust the header files order ] Signed-off-by: Chen Yufan Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 808c9c048276..6561a6cd94de 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "super.h" #include "mds_client.h" @@ -4659,7 +4660,7 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) * slowness doesn't block mdsc delayed work, * preventing send_renew_caps() from running. */ - if (jiffies - loop_start >= 5 * HZ) + if (time_after_eq(jiffies, loop_start + 5 * HZ)) break; } spin_unlock(&mdsc->cap_delay_lock); From ede0b1d30b82829d6bc7924be18c7ae09cb1eb33 Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Thu, 22 Aug 2024 21:39:04 +0800 Subject: [PATCH 084/655] libceph: use min() to simplify code in ceph_dns_resolve_name() When resolving name in ceph_dns_resolve_name(), the end address of name is determined by the minimum value of delim_p and colon_p. So using min() here is more in line with the context. Signed-off-by: Li Zetao Reviewed-by: Simon Horman Signed-off-by: Ilya Dryomov --- net/ceph/messenger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 3c8b78d9c4d1..d1b5705dc0c6 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1254,7 +1254,7 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen, colon_p = memchr(name, ':', namelen); if (delim_p && colon_p) - end = delim_p < colon_p ? delim_p : colon_p; + end = min(delim_p, colon_p); else if (!delim_p && colon_p) end = colon_p; else { From 21b91d40575fb64f3f280f6c3af586e32a704a92 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 8 Aug 2024 22:05:54 +0800 Subject: [PATCH 085/655] efi: Remove unused declaration efi_initialize_iomem_resources() Since commit cf8e8658100d ("arch: Remove Itanium (IA-64) architecture"), this is not used anymore. Signed-off-by: Yue Haibing Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/efi.h b/include/linux/efi.h index 6bf3c4fe8511..e28d88066033 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -764,8 +764,6 @@ extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); extern int __efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); extern void efi_mem_reserve(phys_addr_t addr, u64 size); extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size); -extern void efi_initialize_iomem_resources(struct resource *code_resource, - struct resource *data_resource, struct resource *bss_resource); extern u64 efi_get_fdt_params(struct efi_memory_map_data *data); extern struct kobject *efi_kobj; From d7171eb494353e03f3cde1a6f665e19c243c98e8 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 22 Aug 2024 19:24:22 -0500 Subject: [PATCH 086/655] efi/cper: Print correctable AER information Currently, cper_print_pcie() only logs Uncorrectable Error Status, Mask and Severity registers along with the TLP header. If a correctable error is received immediately preceding or following an Uncorrectable Fatal Error, its information is lost since Correctable Error Status and Mask registers are not logged. As such, to avoid skipping any possible error information, Correctable Error Status and Mask registers should also be logged. Additionally, ensure that AER information is also available through cper_print_pcie() for Correctable and Uncorrectable Non-Fatal Errors. Signed-off-by: Yazen Ghannam Tested-by: Avadhut Naik Signed-off-by: Avadhut Naik Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/cper.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 7d2cdd9e2227..b69e68ef3f02 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -434,12 +434,17 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", pfx, pcie->bridge.secondary_status, pcie->bridge.control); - /* Fatal errors call __ghes_panic() before AER handler prints this */ - if ((pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) && - (gdata->error_severity & CPER_SEV_FATAL)) { + /* + * Print all valid AER info. Record may be from BERT (boot-time) or GHES (run-time). + * + * Fatal errors call __ghes_panic() before AER handler prints this. + */ + if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) { struct aer_capability_regs *aer; aer = (struct aer_capability_regs *)pcie->aer_info; + printk("%saer_cor_status: 0x%08x, aer_cor_mask: 0x%08x\n", + pfx, aer->cor_status, aer->cor_mask); printk("%saer_uncor_status: 0x%08x, aer_uncor_mask: 0x%08x\n", pfx, aer->uncor_status, aer->uncor_mask); printk("%saer_uncor_severity: 0x%08x\n", From 947697c6f0f75f9866f2f891a102dece7a09a064 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 22 Aug 2024 10:18:52 +0530 Subject: [PATCH 087/655] uapi: Define GENMASK_U128 This adds GENMASK_U128() and __GENMASK_U128() macros using __BITS_PER_U128 and __int128 data types. These macros will be used in providing support for generating 128 bit masks. The macros wouldn't work in all assembler flavors for reasons described in the comments on top of declarations. Enforce it for more by adding !__ASSEMBLY__ guard. Cc: Yury Norov Cc: Rasmus Villemoes Cc: Arnd Bergmann > Cc: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org Reviewed-by: Arnd Bergmann Signed-off-by: Anshuman Khandual Signed-off-by: Yury Norov --- include/linux/bits.h | 15 +++++++++++++++ include/uapi/linux/bits.h | 3 +++ include/uapi/linux/const.h | 17 +++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/include/linux/bits.h b/include/linux/bits.h index 0eb24d21aac2..60044b608817 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -36,4 +36,19 @@ #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) +#if !defined(__ASSEMBLY__) +/* + * Missing asm support + * + * __GENMASK_U128() depends on _BIT128() which would not work + * in the asm code, as it shifts an 'unsigned __init128' data + * type instead of direct representation of 128 bit constants + * such as long and unsigned long. The fundamental problem is + * that a 128 bit constant will get silently truncated by the + * gcc compiler. + */ +#define GENMASK_U128(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) +#endif + #endif /* __LINUX_BITS_H */ diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 3c2a101986a3..5ee30f882736 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -12,4 +12,7 @@ (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_U128(h, l) \ + ((_BIT128((h)) << 1) - (_BIT128(l))) + #endif /* _UAPI_LINUX_BITS_H */ diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h index a429381e7ca5..e16be0d37746 100644 --- a/include/uapi/linux/const.h +++ b/include/uapi/linux/const.h @@ -28,6 +28,23 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#if !defined(__ASSEMBLY__) +/* + * Missing asm support + * + * __BIT128() would not work in the asm code, as it shifts an + * 'unsigned __init128' data type as direct representation of + * 128 bit constants is not supported in the gcc compiler, as + * they get silently truncated. + * + * TODO: Please revisit this implementation when gcc compiler + * starts representing 128 bit constants directly like long + * and unsigned long etc. Subsequently drop the comment for + * GENMASK_U128() which would then start supporting asm code. + */ +#define _BIT128(x) ((unsigned __int128)(1) << (x)) +#endif + #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) From d7bcc37436c7d373a715d48a7cd74cd3b1724a68 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 22 Aug 2024 10:18:53 +0530 Subject: [PATCH 088/655] lib/test_bits.c: Add tests for GENMASK_U128() This adds GENMASK_U128() tests although currently only 64 bit wide masks are being tested. Cc: Andrew Morton Cc: linux-kernel@vger.kernel.org Reviewed-by: Arnd Bergmann Signed-off-by: Anshuman Khandual Signed-off-by: Yury Norov --- lib/test_bits.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/lib/test_bits.c b/lib/test_bits.c index 01313980f175..c7b38d91e1f1 100644 --- a/lib/test_bits.c +++ b/lib/test_bits.c @@ -39,6 +39,36 @@ static void genmask_ull_test(struct kunit *test) #endif } +static void genmask_u128_test(struct kunit *test) +{ +#ifdef CONFIG_ARCH_SUPPORTS_INT128 + /* Below 64 bit masks */ + KUNIT_EXPECT_EQ(test, 0x0000000000000001ull, GENMASK_U128(0, 0)); + KUNIT_EXPECT_EQ(test, 0x0000000000000003ull, GENMASK_U128(1, 0)); + KUNIT_EXPECT_EQ(test, 0x0000000000000006ull, GENMASK_U128(2, 1)); + KUNIT_EXPECT_EQ(test, 0x00000000ffffffffull, GENMASK_U128(31, 0)); + KUNIT_EXPECT_EQ(test, 0x000000ffffe00000ull, GENMASK_U128(39, 21)); + KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, GENMASK_U128(63, 0)); + + /* Above 64 bit masks - only 64 bit portion can be validated once */ + KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, GENMASK_U128(64, 0) >> 1); + KUNIT_EXPECT_EQ(test, 0x00000000ffffffffull, GENMASK_U128(81, 50) >> 50); + KUNIT_EXPECT_EQ(test, 0x0000000000ffffffull, GENMASK_U128(87, 64) >> 64); + KUNIT_EXPECT_EQ(test, 0x0000000000ff0000ull, GENMASK_U128(87, 80) >> 64); + + KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, GENMASK_U128(127, 0) >> 64); + KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, (u64)GENMASK_U128(127, 0)); + KUNIT_EXPECT_EQ(test, 0x0000000000000003ull, GENMASK_U128(127, 126) >> 126); + KUNIT_EXPECT_EQ(test, 0x0000000000000001ull, GENMASK_U128(127, 127) >> 127); +#ifdef TEST_GENMASK_FAILURES + /* these should fail compilation */ + GENMASK_U128(0, 1); + GENMASK_U128(0, 10); + GENMASK_U128(9, 10); +#endif /* TEST_GENMASK_FAILURES */ +#endif /* CONFIG_ARCH_SUPPORTS_INT128 */ +} + static void genmask_input_check_test(struct kunit *test) { unsigned int x, y; @@ -56,12 +86,16 @@ static void genmask_input_check_test(struct kunit *test) /* Valid input */ KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(1, 1)); KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(39, 21)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(100, 80)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(110, 65)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(127, 0)); } static struct kunit_case bits_test_cases[] = { KUNIT_CASE(genmask_test), KUNIT_CASE(genmask_ull_test), + KUNIT_CASE(genmask_u128_test), KUNIT_CASE(genmask_input_check_test), {} }; From 907fa79d787f6f762c0afb67b8b02be8ed1f7bd2 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 21 Aug 2024 10:40:21 -0700 Subject: [PATCH 089/655] MAINTAINERS: scale modules with more reviewers We're looking to add Rust module support, and I don't speak Rust yet. The compromise was reached that in order to scale we'd get volunteers committed from the Rust community willing to review both Rust and C code for modules so we can ensure we get proper reviews for both parts of the code and so that we can scale. Add those who have stepped up to help. Acked-by: Sami Tolvanen Acked-by: Petr Pavlu Acked-by: Daniel Gomez Signed-off-by: Luis Chamberlain --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index f328373463b0..7e2cf251427d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15454,6 +15454,9 @@ F: include/dt-bindings/clock/mobileye,eyeq5-clk.h MODULE SUPPORT M: Luis Chamberlain +R: Petr Pavlu +R: Sami Tolvanen +R: Daniel Gomez L: linux-modules@vger.kernel.org L: linux-kernel@vger.kernel.org S: Maintained From 71bf395a276f0578d19e0ae137a7d1d816d08e0e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2024 16:50:58 -0700 Subject: [PATCH 090/655] KVM: x86: Enforce x2APIC's must-be-zero reserved ICR bits Inject a #GP on a WRMSR(ICR) that attempts to set any reserved bits that are must-be-zero on both Intel and AMD, i.e. any reserved bits other than the BUSY bit, which Intel ignores and basically says is undefined. KVM's xapic_state_test selftest has been fudging the bug since commit 4b88b1a518b3 ("KVM: selftests: Enhance handling WRMSR ICR register in x2APIC mode"), which essentially removed the testcase instead of fixing the bug. WARN if the nodecode path triggers a #GP, as the CPU is supposed to check reserved bits for ICR when it's partially virtualized. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240719235107.3023592-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d77cd3b87e85..c51c9bf6bd54 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2470,7 +2470,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) * maybe-unecessary write, and both are in the noise anyways. */ if (apic_x2apic_mode(apic) && offset == APIC_ICR) - kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR)); + WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR))); else kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); } @@ -3194,8 +3194,21 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) return 0; } +#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13)) + int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) { + if (data & X2APIC_ICR_RESERVED_BITS) + return 1; + + /* + * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but + * only AMD requires it to be zero, Intel essentially just ignores the + * bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled, + * the CPU performs the reserved bits checks, i.e. the underlying CPU + * behavior will "win". Arbitrarily clear the BUSY bit, as there is no + * sane way to provide consistent behavior with respect to hardware. + */ data &= ~APIC_ICR_BUSY; kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); From d33234342f8b468e719e05649fd26549fb37ef8a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2024 16:50:59 -0700 Subject: [PATCH 091/655] KVM: x86: Move x2APIC ICR helper above kvm_apic_write_nodecode() Hoist kvm_x2apic_icr_write() above kvm_apic_write_nodecode() so that a local helper to _read_ the x2APIC ICR can be added and used in the nodecode path without needing a forward declaration. No functional change intended. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240719235107.3023592-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.c | 46 ++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c51c9bf6bd54..63be07d7c782 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2453,6 +2453,29 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); +#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13)) + +int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) +{ + if (data & X2APIC_ICR_RESERVED_BITS) + return 1; + + /* + * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but + * only AMD requires it to be zero, Intel essentially just ignores the + * bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled, + * the CPU performs the reserved bits checks, i.e. the underlying CPU + * behavior will "win". Arbitrarily clear the BUSY bit, as there is no + * sane way to provide consistent behavior with respect to hardware. + */ + data &= ~APIC_ICR_BUSY; + + kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); + kvm_lapic_set_reg64(apic, APIC_ICR, data); + trace_kvm_apic_write(APIC_ICR, data); + return 0; +} + /* emulate APIC access in a trap manner */ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) { @@ -3194,29 +3217,6 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) return 0; } -#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13)) - -int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) -{ - if (data & X2APIC_ICR_RESERVED_BITS) - return 1; - - /* - * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but - * only AMD requires it to be zero, Intel essentially just ignores the - * bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled, - * the CPU performs the reserved bits checks, i.e. the underlying CPU - * behavior will "win". Arbitrarily clear the BUSY bit, as there is no - * sane way to provide consistent behavior with respect to hardware. - */ - data &= ~APIC_ICR_BUSY; - - kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); - kvm_lapic_set_reg64(apic, APIC_ICR, data); - trace_kvm_apic_write(APIC_ICR, data); - return 0; -} - static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data) { u32 low; From 73b42dc69be8564d4951a14d00f827929fe5ef79 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2024 16:51:00 -0700 Subject: [PATCH 092/655] KVM: x86: Re-split x2APIC ICR into ICR+ICR2 for AMD (x2AVIC) Re-introduce the "split" x2APIC ICR storage that KVM used prior to Intel's IPI virtualization support, but only for AMD. While not stated anywhere in the APM, despite stating the ICR is a single 64-bit register, AMD CPUs store the 64-bit ICR as two separate 32-bit values in ICR and ICR2. When IPI virtualization (IPIv on Intel, all AVIC flavors on AMD) is enabled, KVM needs to match CPU behavior as some ICR ICR writes will be handled by the CPU, not by KVM. Add a kvm_x86_ops knob to control the underlying format used by the CPU to store the x2APIC ICR, and tune it to AMD vs. Intel regardless of whether or not x2AVIC is enabled. If KVM is handling all ICR writes, the storage format for x2APIC mode doesn't matter, and having the behavior follow AMD versus Intel will provide better test coverage and ease debugging. Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode") Cc: stable@vger.kernel.org Cc: Maxim Levitsky Cc: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20240719235107.3023592-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/lapic.c | 42 +++++++++++++++++++++++---------- arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/main.c | 2 ++ 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 95396e4cb3da..f9dfb2d62053 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1727,6 +1727,8 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); + + const bool x2apic_icr_is_split; const unsigned long required_apicv_inhibits; bool allow_apicv_in_x2apic_without_x2apic_virtualization; void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 63be07d7c782..c7180cb5f464 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2471,11 +2471,25 @@ int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) data &= ~APIC_ICR_BUSY; kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); - kvm_lapic_set_reg64(apic, APIC_ICR, data); + if (kvm_x86_ops.x2apic_icr_is_split) { + kvm_lapic_set_reg(apic, APIC_ICR, data); + kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32); + } else { + kvm_lapic_set_reg64(apic, APIC_ICR, data); + } trace_kvm_apic_write(APIC_ICR, data); return 0; } +static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic) +{ + if (kvm_x86_ops.x2apic_icr_is_split) + return (u64)kvm_lapic_get_reg(apic, APIC_ICR) | + (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32; + + return kvm_lapic_get_reg64(apic, APIC_ICR); +} + /* emulate APIC access in a trap manner */ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) { @@ -2493,7 +2507,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) * maybe-unecessary write, and both are in the noise anyways. */ if (apic_x2apic_mode(apic) && offset == APIC_ICR) - WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR))); + WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic))); else kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); } @@ -3013,18 +3027,22 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, /* * In x2APIC mode, the LDR is fixed and based on the id. And - * ICR is internally a single 64-bit register, but needs to be - * split to ICR+ICR2 in userspace for backwards compatibility. + * if the ICR is _not_ split, ICR is internally a single 64-bit + * register, but needs to be split to ICR+ICR2 in userspace for + * backwards compatibility. */ - if (set) { + if (set) *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); - icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | - (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; - __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); - } else { - icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); - __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); + if (!kvm_x86_ops.x2apic_icr_is_split) { + if (set) { + icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | + (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; + __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); + } else { + icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); + __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); + } } } @@ -3222,7 +3240,7 @@ static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data) u32 low; if (reg == APIC_ICR) { - *data = kvm_lapic_get_reg64(apic, APIC_ICR); + *data = kvm_x2apic_icr_read(apic); return 0; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d8cfe8f23327..eb3de01602b9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5053,6 +5053,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .enable_nmi_window = svm_enable_nmi_window, .enable_irq_window = svm_enable_irq_window, .update_cr8_intercept = svm_update_cr8_intercept, + + .x2apic_icr_is_split = true, .set_virtual_apic_mode = avic_refresh_virtual_apic_mode, .refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl, .apicv_post_state_restore = avic_apicv_post_state_restore, diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 4f6023a0deb3..0a094ebad4b1 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -89,6 +89,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .enable_nmi_window = vmx_enable_nmi_window, .enable_irq_window = vmx_enable_irq_window, .update_cr8_intercept = vmx_update_cr8_intercept, + + .x2apic_icr_is_split = false, .set_virtual_apic_mode = vmx_set_virtual_apic_mode, .set_apic_access_page_addr = vmx_set_apic_access_page_addr, .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, From d1c2cdca5a08f422b791670c11f9d4e3ed0a5518 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2024 16:51:01 -0700 Subject: [PATCH 093/655] KVM: selftests: Open code vcpu_run() equivalent in guest_printf test Open code a version of vcpu_run() in the guest_printf test in anticipation of adding UCALL_ABORT handling to _vcpu_run(). The guest_printf test intentionally generates asserts to verify the output, and thus needs to bypass common assert handling. Open code a helper in the guest_printf test, as it's not expected that any other test would want to skip _only_ the UCALL_ABORT handling. Link: https://lore.kernel.org/r/20240719235107.3023592-5-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/guest_print_test.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index 8092c2d0f5d6..bcf582852db9 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -107,6 +107,21 @@ static void ucall_abort(const char *assert_msg, const char *expected_assert_msg) expected_assert_msg, &assert_msg[offset]); } +/* + * Open code vcpu_run(), sans the UCALL_ABORT handling, so that intentional + * guest asserts guest can be verified instead of being reported as failures. + */ +static void do_vcpu_run(struct kvm_vcpu *vcpu) +{ + int r; + + do { + r = __vcpu_run(vcpu); + } while (r == -1 && errno == EINTR); + + TEST_ASSERT(!r, KVM_IOCTL_ERROR(KVM_RUN, r)); +} + static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, const char *expected_assert) { @@ -114,7 +129,7 @@ static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, struct ucall uc; while (1) { - vcpu_run(vcpu); + do_vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, "Unexpected exit reason: %u (%s),", @@ -159,7 +174,7 @@ static void test_limits(void) vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits); run = vcpu->run; - vcpu_run(vcpu); + do_vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, "Unexpected exit reason: %u (%s),", From ed24ba6c2c3450c96dcd804f81893d11e52463cc Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2024 16:51:02 -0700 Subject: [PATCH 094/655] KVM: selftests: Report unhandled exceptions on x86 as regular guest asserts Now that selftests support printf() in the guest, report unexpected exceptions via the regular assertion framework. Exceptions were special cased purely to provide a better error message. Convert only x86 for now, as it's low-hanging fruit (already formats the assertion in the guest), and converting x86 will allow adding asserts in x86 library code without needing to update multiple tests. Once all other architectures are converted, this will allow moving the reporting to common code, which will in turn allow adding asserts in common library code, and will also allow removing UCALL_UNHANDLED. Link: https://lore.kernel.org/r/20240719235107.3023592-6-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/x86_64/processor.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 153739f2e201..814a604c0891 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -566,10 +566,8 @@ void route_exception(struct ex_regs *regs) if (kvm_fixup_exception(regs)) return; - ucall_assert(UCALL_UNHANDLED, - "Unhandled exception in guest", __FILE__, __LINE__, - "Unhandled exception '0x%lx' at guest RIP '0x%lx'", - regs->vector, regs->rip); + GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'", + regs->vector, regs->rip); } static void vm_init_descriptor_tables(struct kvm_vm *vm) @@ -611,7 +609,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { struct ucall uc; - if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) + if (get_ucall(vcpu, &uc) == UCALL_ABORT) REPORT_GUEST_ASSERT(uc); } From f2e91e874179a27d4c29b3f31706b37e1e6bcf54 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2024 16:51:03 -0700 Subject: [PATCH 095/655] KVM: selftests: Add x86 helpers to play nice with x2APIC MSR #GPs Add helpers to allow and expect #GP on x2APIC MSRs, and opportunistically have the existing helper spit out a more useful error message if an unexpected exception occurs. Link: https://lore.kernel.org/r/20240719235107.3023592-7-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/include/x86_64/apic.h | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h index 0f268b55fa06..51990094effd 100644 --- a/tools/testing/selftests/kvm/include/x86_64/apic.h +++ b/tools/testing/selftests/kvm/include/x86_64/apic.h @@ -11,6 +11,7 @@ #include #include "processor.h" +#include "ucall_common.h" #define APIC_DEFAULT_GPA 0xfee00000ULL @@ -93,9 +94,27 @@ static inline uint64_t x2apic_read_reg(unsigned int reg) return rdmsr(APIC_BASE_MSR + (reg >> 4)); } -static inline void x2apic_write_reg(unsigned int reg, uint64_t value) +static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value) { - wrmsr(APIC_BASE_MSR + (reg >> 4), value); + return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value); } +static inline void x2apic_write_reg(unsigned int reg, uint64_t value) +{ + uint8_t fault = x2apic_write_reg_safe(reg, value); + + __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n", + fault, APIC_BASE_MSR + (reg >> 4), value); +} + +static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value) +{ + uint8_t fault = x2apic_write_reg_safe(reg, value); + + __GUEST_ASSERT(fault == GP_VECTOR, + "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n", + APIC_BASE_MSR + (reg >> 4), value, fault); +} + + #endif /* SELFTEST_KVM_APIC_H */ From faf06a238254cec0c8c9bc0876caede63fcfeb24 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2024 16:51:04 -0700 Subject: [PATCH 096/655] KVM: selftests: Skip ICR.BUSY test in xapic_state_test if x2APIC is enabled Don't test the ICR BUSY bit when x2APIC is enabled as AMD and Intel have different behavior (AMD #GPs, Intel ignores), and the fact that the CPU performs the reserved bit checks when IPI virtualization is enabled makes it impossible for KVM to precisely emulate one or the other. Link: https://lore.kernel.org/r/20240719235107.3023592-8-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/x86_64/xapic_state_test.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 618cd2442390..d5a7adaa9502 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -70,12 +70,10 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val) vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; - if (!x->is_x2apic) { + if (!x->is_x2apic) val &= (-1u | (0xffull << (32 + 24))); - TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); - } else { - TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); - } + + TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); } #define X2APIC_RSVED_BITS_MASK (GENMASK_ULL(31,20) | \ @@ -91,7 +89,15 @@ static void __test_icr(struct xapic_vcpu *x, uint64_t val) */ val &= ~X2APIC_RSVED_BITS_MASK; } - ____test_icr(x, val | APIC_ICR_BUSY); + + /* + * The BUSY bit is reserved on both AMD and Intel, but only AMD treats + * it is as _must_ be zero. Intel simply ignores the bit. Don't test + * the BUSY bit for x2APIC, as there is no single correct behavior. + */ + if (!x->is_x2apic) + ____test_icr(x, val | APIC_ICR_BUSY); + ____test_icr(x, val & ~(u64)APIC_ICR_BUSY); } From 3426cb48adb4dc00b75e89c95d257d699f4d75ce Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2024 16:51:05 -0700 Subject: [PATCH 097/655] KVM: selftests: Test x2APIC ICR reserved bits Actually test x2APIC ICR reserved bits instead of deliberately skipping them. The behavior that is observed when IPI virtualization is enabled is the architecturally correct behavior, KVM is the one who was wrong, i.e. KVM was missing reserved bit checks. Fixes: 4b88b1a518b3 ("KVM: selftests: Enhance handling WRMSR ICR register in x2APIC mode") Link: https://lore.kernel.org/r/20240719235107.3023592-9-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/x86_64/xapic_state_test.c | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index d5a7adaa9502..a17b75fb2506 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -31,6 +31,10 @@ static void xapic_guest_code(void) } } +#define X2APIC_RSVD_BITS_MASK (GENMASK_ULL(31, 20) | \ + GENMASK_ULL(17, 16) | \ + GENMASK_ULL(13, 13)) + static void x2apic_guest_code(void) { asm volatile("cli"); @@ -41,7 +45,10 @@ static void x2apic_guest_code(void) uint64_t val = x2apic_read_reg(APIC_IRR) | x2apic_read_reg(APIC_IRR + 0x10) << 32; - x2apic_write_reg(APIC_ICR, val); + if (val & X2APIC_RSVD_BITS_MASK) + x2apic_write_reg_fault(APIC_ICR, val); + else + x2apic_write_reg(APIC_ICR, val); GUEST_SYNC(val); } while (1); } @@ -72,24 +79,14 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val) (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; if (!x->is_x2apic) val &= (-1u | (0xffull << (32 + 24))); + else if (val & X2APIC_RSVD_BITS_MASK) + return; TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); } -#define X2APIC_RSVED_BITS_MASK (GENMASK_ULL(31,20) | \ - GENMASK_ULL(17,16) | \ - GENMASK_ULL(13,13)) - static void __test_icr(struct xapic_vcpu *x, uint64_t val) { - if (x->is_x2apic) { - /* Hardware writing vICR register requires reserved bits 31:20, - * 17:16 and 13 kept as zero to avoid #GP exception. Data value - * written to vICR should mask out those bits above. - */ - val &= ~X2APIC_RSVED_BITS_MASK; - } - /* * The BUSY bit is reserved on both AMD and Intel, but only AMD treats * it is as _must_ be zero. Intel simply ignores the bit. Don't test From 0cb26ec320851f685280ff061f84855d0e97bf86 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2024 16:51:06 -0700 Subject: [PATCH 098/655] KVM: selftests: Verify the guest can read back the x2APIC ICR it wrote Now that the BUSY bit mess is gone (for x2APIC), verify that the *guest* can read back the ICR value that it wrote. Due to the divergent behavior between AMD and Intel with respect to the backing storage of the ICR in the vAPIC page, emulating a seemingly simple MSR write is quite complex. Link: https://lore.kernel.org/r/20240719235107.3023592-10-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86_64/xapic_state_test.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index a17b75fb2506..dbd4f23ce92e 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -45,10 +45,12 @@ static void x2apic_guest_code(void) uint64_t val = x2apic_read_reg(APIC_IRR) | x2apic_read_reg(APIC_IRR + 0x10) << 32; - if (val & X2APIC_RSVD_BITS_MASK) + if (val & X2APIC_RSVD_BITS_MASK) { x2apic_write_reg_fault(APIC_ICR, val); - else + } else { x2apic_write_reg(APIC_ICR, val); + GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val); + } GUEST_SYNC(val); } while (1); } From 5a7c7d148e488f43cf9c8e64fa5e1bd715ae0485 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2024 16:51:07 -0700 Subject: [PATCH 099/655] KVM: selftests: Play nice with AMD's AVIC errata When AVIC, and thus IPI virtualization on AMD, is enabled, the CPU will virtualize ICR writes. Unfortunately, the CPU doesn't do a very good job, as it fails to clear the BUSY bit and also allows writing ICR2[23:0], despite them being "RESERVED MBZ". Account for the quirky behavior in the xapic_state test to avoid failures in a configuration that likely has no hope of ever being enabled in production. Link: https://lore.kernel.org/r/20240719235107.3023592-11-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/x86_64/xapic_state_test.c | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index dbd4f23ce92e..88bcca188799 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -13,6 +13,7 @@ struct xapic_vcpu { struct kvm_vcpu *vcpu; bool is_x2apic; + bool has_xavic_errata; }; static void xapic_guest_code(void) @@ -79,12 +80,17 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val) vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; - if (!x->is_x2apic) - val &= (-1u | (0xffull << (32 + 24))); - else if (val & X2APIC_RSVD_BITS_MASK) + if (!x->is_x2apic) { + if (!x->has_xavic_errata) + val &= (-1u | (0xffull << (32 + 24))); + } else if (val & X2APIC_RSVD_BITS_MASK) { return; + } - TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); + if (x->has_xavic_errata) + TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + else + TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); } static void __test_icr(struct xapic_vcpu *x, uint64_t val) @@ -236,6 +242,15 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code); x.is_x2apic = false; + /* + * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit), + * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel + * drops writes, AMD does not). Account for the errata when checking + * that KVM reads back what was written. + */ + x.has_xavic_errata = host_cpu_is_amd && + get_kvm_amd_param_bool("avic"); + vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); From ce3b90bd0a165c0473e462b9182e30e0659f99cf Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 13:08:53 -0700 Subject: [PATCH 100/655] KVM: selftests: Remove unused kvm_memcmp_hva_gva() Remove sefltests' kvm_memcmp_hva_gva(), which has literally never had a single user since it was introduced by commit 783e9e51266eb ("kvm: selftests: add API testing infrastructure"). Link: https://lore.kernel.org/r/20240802200853.336512-1-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/include/kvm_util.h | 2 - tools/testing/selftests/kvm/lib/kvm_util.c | 70 ------------------- 2 files changed, 72 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 63c2aaae51f3..acd2db809e83 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -428,8 +428,6 @@ const char *vm_guest_mode_string(uint32_t i); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp); void kvm_vm_release(struct kvm_vm *vmp); -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, - size_t len); void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); int kvm_memfd_alloc(size_t size, bool hugepages); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 56b170b725b3..f7b7185dff10 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -794,76 +794,6 @@ int kvm_memfd_alloc(size_t size, bool hugepages) return fd; } -/* - * Memory Compare, host virtual to guest virtual - * - * Input Args: - * hva - Starting host virtual address - * vm - Virtual Machine - * gva - Starting guest virtual address - * len - number of bytes to compare - * - * Output Args: None - * - * Input/Output Args: None - * - * Return: - * Returns 0 if the bytes starting at hva for a length of len - * are equal the guest virtual bytes starting at gva. Returns - * a value < 0, if bytes at hva are less than those at gva. - * Otherwise a value > 0 is returned. - * - * Compares the bytes starting at the host virtual address hva, for - * a length of len, to the guest bytes starting at the guest virtual - * address given by gva. - */ -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) -{ - size_t amt; - - /* - * Compare a batch of bytes until either a match is found - * or all the bytes have been compared. - */ - for (uintptr_t offset = 0; offset < len; offset += amt) { - uintptr_t ptr1 = (uintptr_t)hva + offset; - - /* - * Determine host address for guest virtual address - * at offset. - */ - uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); - - /* - * Determine amount to compare on this pass. - * Don't allow the comparsion to cross a page boundary. - */ - amt = len - offset; - if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) - amt = vm->page_size - (ptr1 % vm->page_size); - if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) - amt = vm->page_size - (ptr2 % vm->page_size); - - assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); - assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); - - /* - * Perform the comparison. If there is a difference - * return that result to the caller, otherwise need - * to continue on looking for a mismatch. - */ - int ret = memcmp((void *)ptr1, (void *)ptr2, amt); - if (ret != 0) - return ret; - } - - /* - * No mismatch found. Let the caller know the two memory - * areas are equal. - */ - return 0; -} - static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, struct userspace_mem_region *region) { From c0d1a39d1d20e5e770bad72bbe1e9d4fa1367e28 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 13:14:29 -0700 Subject: [PATCH 101/655] KVM: selftests: Always unlink memory regions when deleting (VM free) Unlink memory regions when freeing a VM, even though it's not strictly necessary since all tracking structures are freed soon after. The time spent deleting entries is negligible, and not unlinking entries is confusing, e.g. it's easy to overlook that the tree structures are freed by the caller. Link: https://lore.kernel.org/r/20240802201429.338412-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/kvm_util.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f7b7185dff10..a2b7df5f1d39 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -712,16 +712,13 @@ void kvm_vm_release(struct kvm_vm *vmp) } static void __vm_mem_region_delete(struct kvm_vm *vm, - struct userspace_mem_region *region, - bool unlink) + struct userspace_mem_region *region) { int ret; - if (unlink) { - rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); - rb_erase(®ion->hva_node, &vm->regions.hva_tree); - hash_del(®ion->slot_node); - } + rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); + rb_erase(®ion->hva_node, &vm->regions.hva_tree); + hash_del(®ion->slot_node); region->region.memory_size = 0; vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); @@ -762,7 +759,7 @@ void kvm_vm_free(struct kvm_vm *vmp) /* Free userspace_mem_regions. */ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) - __vm_mem_region_delete(vmp, region, false); + __vm_mem_region_delete(vmp, region); /* Free sparsebit arrays. */ sparsebit_free(&vmp->vpages_valid); @@ -1200,7 +1197,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) */ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) { - __vm_mem_region_delete(vm, memslot2region(vm, slot), true); + __vm_mem_region_delete(vm, memslot2region(vm, slot)); } void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, From 174b6e4a25ea80c2432cedd8e2760e152a6d7f82 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 13:38:58 -0700 Subject: [PATCH 102/655] KVM: x86/mmu: Decrease indentation in logic to sync new indirect shadow page Combine the back-to-back if-statements for synchronizing children when linking a new indirect shadow page in order to decrease the indentation, and to make it easier to "see" the logic in its entirety. No functional change intended. Link: https://lore.kernel.org/r/20240802203900.348808-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/paging_tmpl.h | 40 ++++++++++++++++------------------ 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 69941cebb3a8..0e97e080a997 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -674,27 +674,25 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, sp = kvm_mmu_get_child_sp(vcpu, it.sptep, table_gfn, false, access); - if (sp != ERR_PTR(-EEXIST)) { - /* - * We must synchronize the pagetable before linking it - * because the guest doesn't need to flush tlb when - * the gpte is changed from non-present to present. - * Otherwise, the guest may use the wrong mapping. - * - * For PG_LEVEL_4K, kvm_mmu_get_page() has already - * synchronized it transiently via kvm_sync_page(). - * - * For higher level pagetable, we synchronize it via - * the slower mmu_sync_children(). If it needs to - * break, some progress has been made; return - * RET_PF_RETRY and retry on the next #PF. - * KVM_REQ_MMU_SYNC is not necessary but it - * expedites the process. - */ - if (sp->unsync_children && - mmu_sync_children(vcpu, sp, false)) - return RET_PF_RETRY; - } + /* + * Synchronize the new page before linking it, as the CPU (KVM) + * is architecturally disallowed from inserting non-present + * entries into the TLB, i.e. the guest isn't required to flush + * the TLB when changing the gPTE from non-present to present. + * + * For PG_LEVEL_4K, kvm_mmu_find_shadow_page() has already + * synchronized the page via kvm_sync_page(). + * + * For higher level pages, which cannot be unsync themselves + * but can have unsync children, synchronize via the slower + * mmu_sync_children(). If KVM needs to drop mmu_lock due to + * contention or to reschedule, instruct the caller to retry + * the #PF (mmu_sync_children() ensures forward progress will + * be made). + */ + if (sp != ERR_PTR(-EEXIST) && sp->unsync_children && + mmu_sync_children(vcpu, sp, false)) + return RET_PF_RETRY; /* * Verify that the gpte in the page we've just write From 7d67b03e6fff3934913a38674f269f55964bdd65 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 13:38:59 -0700 Subject: [PATCH 103/655] KVM: x86/mmu: Drop pointless "return" wrapper label in FNAME(fetch) Drop the pointless and poorly named "out_gpte_changed" label, in FNAME(fetch), and instead return RET_PF_RETRY directly. No functional change intended. Link: https://lore.kernel.org/r/20240802203900.348808-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/paging_tmpl.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 0e97e080a997..480c54122991 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -646,10 +646,10 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, * really care if it changes underneath us after this point). */ if (FNAME(gpte_changed)(vcpu, gw, top_level)) - goto out_gpte_changed; + return RET_PF_RETRY; if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) - goto out_gpte_changed; + return RET_PF_RETRY; /* * Load a new root and retry the faulting instruction in the extremely @@ -659,7 +659,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, */ if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) { kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu); - goto out_gpte_changed; + return RET_PF_RETRY; } for_each_shadow_entry(vcpu, fault->addr, it) { @@ -699,7 +699,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, * protected is still there. */ if (FNAME(gpte_changed)(vcpu, gw, it.level - 1)) - goto out_gpte_changed; + return RET_PF_RETRY; if (sp != ERR_PTR(-EEXIST)) link_shadow_page(vcpu, it.sptep, sp); @@ -753,9 +753,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, FNAME(pte_prefetch)(vcpu, gw, it.sptep); return ret; - -out_gpte_changed: - return RET_PF_RETRY; } /* From 1dc9cc1c4c230fbc6bf6322f150d4b81f712cfb9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 13:39:00 -0700 Subject: [PATCH 104/655] KVM: x86/mmu: Reword a misleading comment about checking gpte_changed() Rewrite the comment in FNAME(fetch) to explain why KVM needs to check that the gPTE is still fresh before continuing the shadow page walk, even if KVM already has a linked shadow page for the gPTE in question. No functional change intended. Link: https://lore.kernel.org/r/20240802203900.348808-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/paging_tmpl.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 480c54122991..405bd7ceee2a 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -695,8 +695,14 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, return RET_PF_RETRY; /* - * Verify that the gpte in the page we've just write - * protected is still there. + * Verify that the gpte in the page, which is now either + * write-protected or unsync, wasn't modified between the fault + * and acquiring mmu_lock. This needs to be done even when + * reusing an existing shadow page to ensure the information + * gathered by the walker matches the information stored in the + * shadow page (which could have been modified by a different + * vCPU even if the page was already linked). Holding mmu_lock + * prevents the shadow page from changing after this point. */ if (FNAME(gpte_changed)(vcpu, gw, it.level - 1)) return RET_PF_RETRY; From 48547fe75ea7d5bf1ff9425a0a5d4d32b3a77777 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 13:45:09 -0700 Subject: [PATCH 105/655] KVM: SVM: Add a helper to convert a SME-aware PA back to a struct page Add __sme_pa_to_page() to pair with __sme_page_pa() and use it to replace open coded equivalents, including for "iopm_base", which previously avoided having to do __sme_clr() by storing the raw PA in the global variable. Opportunistically convert __sme_page_pa() to a helper to provide type safety. No functional change intended. Link: https://lore.kernel.org/r/20240802204511.352017-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 9 ++++----- arch/x86/kvm/svm/svm.h | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d6f252555ab3..dd1cfee3e38f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1124,8 +1124,7 @@ static void svm_hardware_unsetup(void) for_each_possible_cpu(cpu) svm_cpu_uninit(cpu); - __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), - get_order(IOPM_SIZE)); + __free_pages(__sme_pa_to_page(iopm_base), get_order(IOPM_SIZE)); iopm_base = 0; } @@ -1301,7 +1300,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu) if (!kvm_hlt_in_guest(vcpu->kvm)) svm_set_intercept(svm, INTERCEPT_HLT); - control->iopm_base_pa = __sme_set(iopm_base); + control->iopm_base_pa = iopm_base; control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); control->int_ctl = V_INTR_MASKING_MASK; @@ -1503,7 +1502,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) sev_free_vcpu(vcpu); - __free_page(pfn_to_page(__sme_clr(svm->vmcb01.pa) >> PAGE_SHIFT)); + __free_page(__sme_pa_to_page(svm->vmcb01.pa)); __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE)); } @@ -5251,7 +5250,7 @@ static __init int svm_hardware_setup(void) iopm_va = page_address(iopm_pages); memset(iopm_va, 0xff, PAGE_SIZE * (1 << order)); - iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; + iopm_base = __sme_page_pa(iopm_pages); init_msrpm_offsets(); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 76107c7d0595..2b095acdb97f 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -25,7 +25,21 @@ #include "cpuid.h" #include "kvm_cache_regs.h" -#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) +/* + * Helpers to convert to/from physical addresses for pages whose address is + * consumed directly by hardware. Even though it's a physical address, SVM + * often restricts the address to the natural width, hence 'unsigned long' + * instead of 'hpa_t'. + */ +static inline unsigned long __sme_page_pa(struct page *page) +{ + return __sme_set(page_to_pfn(page) << PAGE_SHIFT); +} + +static inline struct page *__sme_pa_to_page(unsigned long pa) +{ + return pfn_to_page(__sme_clr(pa) >> PAGE_SHIFT); +} #define IOPM_SIZE PAGE_SIZE * 3 #define MSRPM_SIZE PAGE_SIZE * 2 From 1b5ef14dc656a25280d56795b73cf90dad64ad44 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 13:45:10 -0700 Subject: [PATCH 106/655] KVM: SVM: Add host SEV-ES save area structure into VMCB via a union Incorporate the _host_ SEV-ES save area into the VMCB as a union with the legacy save area. The SEV-ES variant used to save/load host state is larger than the legacy save area, but resides at the same offset. Prefix the field with "host" to make it as obvious as possible that the SEV-ES variant in the VMCB is only ever used for host state. Guest state for SEV-ES VMs is stored in a completely separate page (VMSA), albeit with the same layout as the host state. Add a compile-time assert to ensure the VMCB layout is correct, i.e. that KVM's layout matches the architectural definitions. No functional change intended. Link: https://lore.kernel.org/r/20240802204511.352017-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/svm.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f0dea3750ca9..2b59b9951c90 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -516,6 +516,20 @@ struct ghcb { u32 ghcb_usage; } __packed; +struct vmcb { + struct vmcb_control_area control; + union { + struct vmcb_save_area save; + + /* + * For SEV-ES VMs, the save area in the VMCB is used only to + * save/load host state. Guest state resides in a separate + * page, the aptly named VM Save Area (VMSA), that is encrypted + * with the guest's private key. + */ + struct sev_es_save_area host_sev_es_save; + }; +} __packed; #define EXPECTED_VMCB_SAVE_AREA_SIZE 744 #define EXPECTED_GHCB_SAVE_AREA_SIZE 1032 @@ -532,6 +546,7 @@ static inline void __unused_size_checks(void) BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE); BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE); BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE); + BUILD_BUG_ON(offsetof(struct vmcb, save) != EXPECTED_VMCB_CONTROL_AREA_SIZE); BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE); /* Check offsets of reserved fields */ @@ -568,11 +583,6 @@ static inline void __unused_size_checks(void) BUILD_BUG_RESERVED_OFFSET(ghcb, 0xff0); } -struct vmcb { - struct vmcb_control_area control; - struct vmcb_save_area save; -} __packed; - #define SVM_CPUID_FUNC 0x8000000a #define SVM_SELECTOR_S_SHIFT 4 From 32071fa355e73495e509a28511a81b4baab51ff6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 13:45:11 -0700 Subject: [PATCH 107/655] KVM: SVM: Track the per-CPU host save area as a VMCB pointer The host save area is a VMCB, track it as such to help readers follow along, but mostly to cleanup/simplify the retrieval of the SEV-ES host save area. Note, the compile-time assertion that offsetof(struct vmcb, save) == EXPECTED_VMCB_CONTROL_AREA_SIZE ensures that the SEV-ES save area is indeed at offset 0x400 (whoever added the expected/architectural VMCB offsets apparently likes decimal). No functional change intended. Link: https://lore.kernel.org/r/20240802204511.352017-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 15 ++++++++------- arch/x86/kvm/svm/svm.h | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index dd1cfee3e38f..cac9e36960ca 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -573,7 +573,7 @@ static void __svm_write_tsc_multiplier(u64 multiplier) static __always_inline struct sev_es_save_area *sev_es_host_save_area(struct svm_cpu_data *sd) { - return page_address(sd->save_area) + 0x400; + return &sd->save_area->host_sev_es_save; } static inline void kvm_cpu_svm_disable(void) @@ -696,7 +696,7 @@ static void svm_cpu_uninit(int cpu) return; kfree(sd->sev_vmcbs); - __free_page(sd->save_area); + __free_page(__sme_pa_to_page(sd->save_area_pa)); sd->save_area_pa = 0; sd->save_area = NULL; } @@ -704,23 +704,24 @@ static void svm_cpu_uninit(int cpu) static int svm_cpu_init(int cpu) { struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); + struct page *save_area_page; int ret = -ENOMEM; memset(sd, 0, sizeof(struct svm_cpu_data)); - sd->save_area = snp_safe_alloc_page_node(cpu_to_node(cpu), GFP_KERNEL); - if (!sd->save_area) + save_area_page = snp_safe_alloc_page_node(cpu_to_node(cpu), GFP_KERNEL); + if (!save_area_page) return ret; ret = sev_cpu_init(sd); if (ret) goto free_save_area; - sd->save_area_pa = __sme_page_pa(sd->save_area); + sd->save_area = page_address(save_area_page); + sd->save_area_pa = __sme_page_pa(save_area_page); return 0; free_save_area: - __free_page(sd->save_area); - sd->save_area = NULL; + __free_page(save_area_page); return ret; } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 2b095acdb97f..43fa6a16eb19 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -335,7 +335,7 @@ struct svm_cpu_data { u32 next_asid; u32 min_asid; - struct page *save_area; + struct vmcb *save_area; unsigned long save_area_pa; struct vmcb *current_vmcb; From 2f6fcfa1f4264c1f035ddd092ebd046499f7cbea Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Tue, 9 Jul 2024 11:29:36 -0700 Subject: [PATCH 108/655] KVM: selftests: Add SEV-ES shutdown test Regression test for ae20eef5 ("KVM: SVM: Update SEV-ES shutdown intercepts with more metadata"). Test confirms userspace is correctly indicated of a guest shutdown not previous behavior of an EINVAL from KVM_RUN. Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Alper Gun Cc: Tom Lendacky Cc: Michael Roth Cc: kvm@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: Peter Gonda Tested-by: Pratik R. Sampat Link: https://lore.kernel.org/r/20240709182936.146487-1-pgonda@google.com [sean: clobber IDT to ensure #UD leads to SHUTDOWN] Signed-off-by: Sean Christopherson --- .../selftests/kvm/x86_64/sev_smoke_test.c | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c index 7c70c0da4fb7..2e9197eb1652 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c @@ -160,6 +160,36 @@ static void test_sev(void *guest_code, uint64_t policy) kvm_vm_free(vm); } +static void guest_shutdown_code(void) +{ + struct desc_ptr idt; + + /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */ + memset(&idt, 0, sizeof(idt)); + __asm__ __volatile__("lidt %0" :: "m"(idt)); + + __asm__ __volatile__("ud2"); +} + +static void test_sev_es_shutdown(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + uint32_t type = KVM_X86_SEV_ES_VM; + + vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu); + + vm_sev_launch(vm, SEV_POLICY_ES, NULL); + + vcpu_run(vcpu); + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN, + "Wanted SHUTDOWN, got %s", + exit_reason_str(vcpu->run->exit_reason)); + + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); @@ -171,6 +201,8 @@ int main(int argc, char *argv[]) test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG); test_sev(guest_sev_es_code, SEV_POLICY_ES); + test_sev_es_shutdown(); + if (kvm_has_cap(KVM_CAP_XCRS) && (xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) { test_sync_vmsa(0); From 215b3cb7a84f8d97b81fe8536cec05a11496da51 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 28 Aug 2024 11:14:45 -0700 Subject: [PATCH 109/655] KVM: selftests: Add a test for coalesced MMIO (and PIO on x86) Add a test to verify that KVM correctly exits (or not) when a vCPU's coalesced I/O ring is full (or isn't). Iterate over all legal starting points in the ring (with an empty ring), and verify that KVM doesn't exit until the ring is full. Opportunistically verify that KVM exits immediately on non-coalesced I/O, either because the MMIO/PIO region was never registered, or because a previous region was unregistered. This is a regression test for a KVM bug where KVM would prematurely exit due to bad math resulting in a false positive if the first entry in the ring was before the halfway mark. See commit 92f6d4130497 ("KVM: Fix coalesced_mmio_has_room() to avoid premature userspace exit"). Enable the test for x86, arm64, and risc-v, i.e. all architectures except s390, which doesn't have MMIO. On x86, which has both MMIO and PIO, interleave MMIO and PIO into the same ring, as KVM shouldn't exit until a non-coalesced I/O is encountered, regardless of whether the ring is filled with MMIO, PIO, or both. Lastly, wrap the coalesced I/O ring in a structure to prepare for a potential future where KVM supports multiple ring buffers beyond KVM's "default" built-in buffer. Link: https://lore.kernel.org/all/20240820133333.1724191-1-ilstam@amazon.com Cc: Ilias Stamatis Cc: Marc Zyngier Cc: Oliver Upton Cc: Anup Patel Link: https://lore.kernel.org/r/20240828181446.652474-2-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile | 3 + .../testing/selftests/kvm/coalesced_io_test.c | 236 ++++++++++++++++++ .../testing/selftests/kvm/include/kvm_util.h | 26 ++ 3 files changed, 265 insertions(+) create mode 100644 tools/testing/selftests/kvm/coalesced_io_test.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 48d32c5aa3eb..45cb70c048bb 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -130,6 +130,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test TEST_GEN_PROGS_x86_64 += access_tracking_perf_test +TEST_GEN_PROGS_x86_64 += coalesced_io_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test @@ -165,6 +166,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += arch_timer +TEST_GEN_PROGS_aarch64 += coalesced_io_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test @@ -198,6 +200,7 @@ TEST_GEN_PROGS_s390x += kvm_binary_stats_test TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += riscv/ebreak_test TEST_GEN_PROGS_riscv += arch_timer +TEST_GEN_PROGS_riscv += coalesced_io_test TEST_GEN_PROGS_riscv += demand_paging_test TEST_GEN_PROGS_riscv += dirty_log_test TEST_GEN_PROGS_riscv += get-reg-list diff --git a/tools/testing/selftests/kvm/coalesced_io_test.c b/tools/testing/selftests/kvm/coalesced_io_test.c new file mode 100644 index 000000000000..60cb25454899 --- /dev/null +++ b/tools/testing/selftests/kvm/coalesced_io_test.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "ucall_common.h" + +struct kvm_coalesced_io { + struct kvm_coalesced_mmio_ring *ring; + uint32_t ring_size; + uint64_t mmio_gpa; + uint64_t *mmio; + + /* + * x86-only, but define pio_port for all architectures to minimize the + * amount of #ifdeffery and complexity, without having to sacrifice + * verbose error messages. + */ + uint8_t pio_port; +}; + +static struct kvm_coalesced_io kvm_builtin_io_ring; + +#ifdef __x86_64__ +static const int has_pio = 1; +#else +static const int has_pio = 0; +#endif + +static void guest_code(struct kvm_coalesced_io *io) +{ + int i, j; + + for (;;) { + for (j = 0; j < 1 + has_pio; j++) { + /* + * KVM always leaves one free entry, i.e. exits to + * userspace before the last entry is filled. + */ + for (i = 0; i < io->ring_size - 1; i++) { +#ifdef __x86_64__ + if (i & 1) + outl(io->pio_port, io->pio_port + i); + else +#endif + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); + } +#ifdef __x86_64__ + if (j & 1) + outl(io->pio_port, io->pio_port + i); + else +#endif + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); + } + GUEST_SYNC(0); + + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); +#ifdef __x86_64__ + outl(io->pio_port, io->pio_port + i); +#endif + } +} + +static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, + uint32_t ring_start, + uint32_t expected_exit) +{ + const bool want_pio = expected_exit == KVM_EXIT_IO; + struct kvm_coalesced_mmio_ring *ring = io->ring; + struct kvm_run *run = vcpu->run; + uint32_t pio_value; + + WRITE_ONCE(ring->first, ring_start); + WRITE_ONCE(ring->last, ring_start); + + vcpu_run(vcpu); + + /* + * Annoyingly, reading PIO data is safe only for PIO exits, otherwise + * data_offset is garbage, e.g. an MMIO gpa. + */ + if (run->exit_reason == KVM_EXIT_IO) + pio_value = *(uint32_t *)((void *)run + run->io.data_offset); + else + pio_value = 0; + + TEST_ASSERT((!want_pio && (run->exit_reason == KVM_EXIT_MMIO && run->mmio.is_write && + run->mmio.phys_addr == io->mmio_gpa && run->mmio.len == 8 && + *(uint64_t *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) || + (want_pio && (run->exit_reason == KVM_EXIT_IO && run->io.port == io->pio_port && + run->io.direction == KVM_EXIT_IO_OUT && run->io.count == 1 && + pio_value == io->pio_port + io->ring_size - 1)), + "For start = %u, expected exit on %u-byte %s write 0x%llx = %lx, got exit_reason = %u (%s)\n " + "(MMIO addr = 0x%llx, write = %u, len = %u, data = %lx)\n " + "(PIO port = 0x%x, write = %u, len = %u, count = %u, data = %x", + ring_start, want_pio ? 4 : 8, want_pio ? "PIO" : "MMIO", + want_pio ? (unsigned long long)io->pio_port : io->mmio_gpa, + (want_pio ? io->pio_port : io->mmio_gpa) + io->ring_size - 1, run->exit_reason, + run->exit_reason == KVM_EXIT_MMIO ? "MMIO" : run->exit_reason == KVM_EXIT_IO ? "PIO" : "other", + run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(uint64_t *)run->mmio.data, + run->io.port, run->io.direction, run->io.size, run->io.count, pio_value); +} + +static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, + uint32_t ring_start, + uint32_t expected_exit) +{ + struct kvm_coalesced_mmio_ring *ring = io->ring; + int i; + + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, expected_exit); + + TEST_ASSERT((ring->last + 1) % io->ring_size == ring->first, + "Expected ring to be full (minus 1), first = %u, last = %u, max = %u, start = %u", + ring->first, ring->last, io->ring_size, ring_start); + + for (i = 0; i < io->ring_size - 1; i++) { + uint32_t idx = (ring->first + i) % io->ring_size; + struct kvm_coalesced_mmio *entry = &ring->coalesced_mmio[idx]; + +#ifdef __x86_64__ + if (i & 1) + TEST_ASSERT(entry->phys_addr == io->pio_port && + entry->len == 4 && entry->pio && + *(uint32_t *)entry->data == io->pio_port + i, + "Wanted 4-byte port I/O 0x%x = 0x%x in entry %u, got %u-byte %s 0x%llx = 0x%x", + io->pio_port, io->pio_port + i, i, + entry->len, entry->pio ? "PIO" : "MMIO", + entry->phys_addr, *(uint32_t *)entry->data); + else +#endif + TEST_ASSERT(entry->phys_addr == io->mmio_gpa && + entry->len == 8 && !entry->pio, + "Wanted 8-byte MMIO to 0x%lx = %lx in entry %u, got %u-byte %s 0x%llx = 0x%lx", + io->mmio_gpa, io->mmio_gpa + i, i, + entry->len, entry->pio ? "PIO" : "MMIO", + entry->phys_addr, *(uint64_t *)entry->data); + } +} + +static void test_coalesced_io(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, uint32_t ring_start) +{ + struct kvm_coalesced_mmio_ring *ring = io->ring; + + kvm_vm_register_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */); +#ifdef __x86_64__ + kvm_vm_register_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */); +#endif + + vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_MMIO); +#ifdef __x86_64__ + vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_IO); +#endif + + /* + * Verify ucall, which may use non-coalesced MMIO or PIO, generates an + * immediate exit. + */ + WRITE_ONCE(ring->first, ring_start); + WRITE_ONCE(ring->last, ring_start); + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + TEST_ASSERT_EQ(ring->first, ring_start); + TEST_ASSERT_EQ(ring->last, ring_start); + + /* Verify that non-coalesced MMIO/PIO generates an exit to userspace. */ + kvm_vm_unregister_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */); + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_MMIO); + +#ifdef __x86_64__ + kvm_vm_unregister_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */); + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_IO); +#endif +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_MMIO)); + +#ifdef __x86_64__ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_PIO)); +#endif + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + kvm_builtin_io_ring = (struct kvm_coalesced_io) { + /* + * The I/O ring is a kernel-allocated page whose address is + * relative to each vCPU's run page, with the page offset + * provided by KVM in the return of KVM_CAP_COALESCED_MMIO. + */ + .ring = (void *)vcpu->run + + (kvm_check_cap(KVM_CAP_COALESCED_MMIO) * getpagesize()), + + /* + * The size of the I/O ring is fixed, but KVM defines the sized + * based on the kernel's PAGE_SIZE. Thus, userspace must query + * the host's page size at runtime to compute the ring size. + */ + .ring_size = (getpagesize() - sizeof(struct kvm_coalesced_mmio_ring)) / + sizeof(struct kvm_coalesced_mmio), + + /* + * Arbitrary address+port (MMIO mustn't overlap memslots), with + * the MMIO GPA identity mapped in the guest. + */ + .mmio_gpa = 4ull * SZ_1G, + .mmio = (uint64_t *)(4ull * SZ_1G), + .pio_port = 0x80, + }; + + virt_map(vm, (uint64_t)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1); + + sync_global_to_guest(vm, kvm_builtin_io_ring); + vcpu_args_set(vcpu, 1, &kvm_builtin_io_ring); + + for (i = 0; i < kvm_builtin_io_ring.ring_size; i++) + test_coalesced_io(vcpu, &kvm_builtin_io_ring, i); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 63c2aaae51f3..b297a84f7be5 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -460,6 +460,32 @@ static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); } +static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm, + uint64_t address, + uint64_t size, bool pio) +{ + struct kvm_coalesced_mmio_zone zone = { + .addr = address, + .size = size, + .pio = pio, + }; + + vm_ioctl(vm, KVM_REGISTER_COALESCED_MMIO, &zone); +} + +static inline void kvm_vm_unregister_coalesced_io(struct kvm_vm *vm, + uint64_t address, + uint64_t size, bool pio) +{ + struct kvm_coalesced_mmio_zone zone = { + .addr = address, + .size = size, + .pio = pio, + }; + + vm_ioctl(vm, KVM_UNREGISTER_COALESCED_MMIO, &zone); +} + static inline int vm_get_stats_fd(struct kvm_vm *vm) { int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); From e027ba1b83ad017a56c108eea2f42eb9f8ae5204 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 28 Aug 2024 11:14:46 -0700 Subject: [PATCH 110/655] KVM: Clean up coalesced MMIO ring full check Fold coalesced_mmio_has_room() into its sole caller, coalesced_mmio_write(), as it's really just a single line of code, has a goofy return value, and is unnecessarily brittle. E.g. if coalesced_mmio_has_room() were to check ring->last directly, or the caller failed to use READ_ONCE(), KVM would be susceptible to TOCTOU attacks from userspace. Opportunistically add a comment explaining why on earth KVM leaves one entry free, which may not be obvious to readers that aren't familiar with ring buffers. No functional change intended. Reviewed-by: Ilias Stamatis Cc: Paul Durrant Link: https://lore.kernel.org/r/20240828181446.652474-3-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/coalesced_mmio.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 184c5c40c9c1..375d6285475e 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -40,25 +40,6 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, return 1; } -static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev, u32 last) -{ - struct kvm_coalesced_mmio_ring *ring; - - /* Are we able to batch it ? */ - - /* last is the first free entry - * check if we don't meet the first used entry - * there is always one unused entry in the buffer - */ - ring = dev->kvm->coalesced_mmio_ring; - if ((last + 1) % KVM_COALESCED_MMIO_MAX == READ_ONCE(ring->first)) { - /* full */ - return 0; - } - - return 1; -} - static int coalesced_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, int len, const void *val) @@ -72,9 +53,15 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu, spin_lock(&dev->kvm->ring_lock); + /* + * last is the index of the entry to fill. Verify userspace hasn't + * set last to be out of range, and that there is room in the ring. + * Leave one entry free in the ring so that userspace can differentiate + * between an empty ring and a full ring. + */ insert = READ_ONCE(ring->last); - if (!coalesced_mmio_has_room(dev, insert) || - insert >= KVM_COALESCED_MMIO_MAX) { + if (insert >= KVM_COALESCED_MMIO_MAX || + (insert + 1) % KVM_COALESCED_MMIO_MAX == READ_ONCE(ring->first)) { spin_unlock(&dev->kvm->ring_lock); return -EOPNOTSUPP; } From 9d15171f39f0996fcfaec1788d3522eb581af347 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 28 Aug 2024 14:58:00 -0700 Subject: [PATCH 111/655] KVM: selftests: Explicitly include committed one-off assets in .gitignore Add KVM selftests' one-off assets, e.g. the Makefile, to the .gitignore so that they are explicitly included. The justification for omitting the one-offs was that including them wouldn't help prevent mistakes: Deliberately do not include the one-off assets, e.g. config, settings, .gitignore itself, etc as Git doesn't ignore files that are already in the repository. Adding the one-off assets won't prevent mistakes where developers forget to --force add files that don't match the "allowed". Turns out that's not the case, as W=1 will generate warnings, and the amazing-as-always kernel test bot reports new warnings: tools/testing/selftests/kvm/.gitignore: warning: ignored by one of the .gitignore files tools/testing/selftests/kvm/Makefile: warning: ignored by one of the .gitignore files >> tools/testing/selftests/kvm/Makefile.kvm: warning: ignored by one of the .gitignore files tools/testing/selftests/kvm/config: warning: ignored by one of the .gitignore files tools/testing/selftests/kvm/settings: warning: ignored by one of the .gitignore files Fixes: 43e96957e8b8 ("KVM: selftests: Use pattern matching in .gitignore") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408211818.85zIkDEK-lkp@intel.com Link: https://lore.kernel.org/r/20240828215800.737042-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/.gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 6d9381d60172..7f57abf936e7 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -5,3 +5,7 @@ !*.h !*.S !*.sh +!.gitignore +!config +!settings +!Makefile From 0dd45f2cd8ccf150c6fe7a528e9a5282026ed30c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 12:51:16 -0700 Subject: [PATCH 112/655] KVM: x86: Re-enter guest if WRMSR(X2APIC_ICR) fastpath is successful Re-enter the guest in the fastpath if WRMSR emulation for x2APIC's ICR is successful, as no additional work is needed, i.e. there is no code unique for WRMSR exits between the fastpath and the "!= EXIT_FASTPATH_NONE" check in __vmx_handle_exit(). Link: https://lore.kernel.org/r/20240802195120.325560-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 00e792725052..0c97dd5a0197 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2195,7 +2195,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) data = kvm_read_edx_eax(vcpu); if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) { kvm_skip_emulated_instruction(vcpu); - ret = EXIT_FASTPATH_EXIT_HANDLED; + ret = EXIT_FASTPATH_REENTER_GUEST; } break; case MSR_IA32_TSC_DEADLINE: From ea60229af7fbdcdd06d798f8340a7a9b40770c53 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 12:51:17 -0700 Subject: [PATCH 113/655] KVM: x86: Dedup fastpath MSR post-handling logic Now that the WRMSR fastpath for x2APIC_ICR and TSC_DEADLINE are identical, ignoring the backend MSR handling, consolidate the common bits of skipping the instruction and setting the return value. No functional change intended. Link: https://lore.kernel.org/r/20240802195120.325560-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c97dd5a0197..d392e9097d63 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2186,31 +2186,32 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) { u32 msr = kvm_rcx_read(vcpu); u64 data; - fastpath_t ret = EXIT_FASTPATH_NONE; + fastpath_t ret; + bool handled; kvm_vcpu_srcu_read_lock(vcpu); switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): data = kvm_read_edx_eax(vcpu); - if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) { - kvm_skip_emulated_instruction(vcpu); - ret = EXIT_FASTPATH_REENTER_GUEST; - } + handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); break; case MSR_IA32_TSC_DEADLINE: data = kvm_read_edx_eax(vcpu); - if (!handle_fastpath_set_tscdeadline(vcpu, data)) { - kvm_skip_emulated_instruction(vcpu); - ret = EXIT_FASTPATH_REENTER_GUEST; - } + handled = !handle_fastpath_set_tscdeadline(vcpu, data); break; default: + handled = false; break; } - if (ret != EXIT_FASTPATH_NONE) + if (handled) { + kvm_skip_emulated_instruction(vcpu); + ret = EXIT_FASTPATH_REENTER_GUEST; trace_kvm_msr_write(msr, data); + } else { + ret = EXIT_FASTPATH_NONE; + } kvm_vcpu_srcu_read_unlock(vcpu); From f7f39c50edb9d336274371953275e0d3503b9b75 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 12:51:18 -0700 Subject: [PATCH 114/655] KVM: x86: Exit to userspace if fastpath triggers one on instruction skip Exit to userspace if a fastpath handler triggers such an exit, which can happen when skipping the instruction, e.g. due to userspace single-stepping the guest via KVM_GUESTDBG_SINGLESTEP or because of an emulation failure. Fixes: 404d5d7bff0d ("KVM: X86: Introduce more exit_fastpath_completion enum values") Link: https://lore.kernel.org/r/20240802195120.325560-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f9dfb2d62053..430a0d369322 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -211,6 +211,7 @@ enum exit_fastpath_completion { EXIT_FASTPATH_NONE, EXIT_FASTPATH_REENTER_GUEST, EXIT_FASTPATH_EXIT_HANDLED, + EXIT_FASTPATH_EXIT_USERSPACE, }; typedef enum exit_fastpath_completion fastpath_t; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d392e9097d63..d6c81d59d487 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2206,8 +2206,10 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) } if (handled) { - kvm_skip_emulated_instruction(vcpu); - ret = EXIT_FASTPATH_REENTER_GUEST; + if (!kvm_skip_emulated_instruction(vcpu)) + ret = EXIT_FASTPATH_EXIT_USERSPACE; + else + ret = EXIT_FASTPATH_REENTER_GUEST; trace_kvm_msr_write(msr, data); } else { ret = EXIT_FASTPATH_NONE; @@ -11196,6 +11198,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (vcpu->arch.apic_attention) kvm_lapic_sync_from_vapic(vcpu); + if (unlikely(exit_fastpath == EXIT_FASTPATH_EXIT_USERSPACE)) + return 0; + r = kvm_x86_call(handle_exit)(vcpu, exit_fastpath); return r; From 70cdd2385106a91675ee0ba58facde0254597416 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 12:51:19 -0700 Subject: [PATCH 115/655] KVM: x86: Reorganize code in x86.c to co-locate vCPU blocking/running helpers Shuffle code around in x86.c so that the various helpers related to vCPU blocking/running logic are (a) located near each other and (b) ordered so that HLT emulation can use kvm_vcpu_has_events() in a future path. No functional change intended. Link: https://lore.kernel.org/r/20240802195120.325560-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 264 ++++++++++++++++++++++----------------------- 1 file changed, 132 insertions(+), 132 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d6c81d59d487..c15eb8e7d3c3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9917,51 +9917,6 @@ void kvm_x86_vendor_exit(void) } EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit); -static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason) -{ - /* - * The vCPU has halted, e.g. executed HLT. Update the run state if the - * local APIC is in-kernel, the run loop will detect the non-runnable - * state and halt the vCPU. Exit to userspace if the local APIC is - * managed by userspace, in which case userspace is responsible for - * handling wake events. - */ - ++vcpu->stat.halt_exits; - if (lapic_in_kernel(vcpu)) { - vcpu->arch.mp_state = state; - return 1; - } else { - vcpu->run->exit_reason = reason; - return 0; - } -} - -int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu) -{ - return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT); -} -EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip); - -int kvm_emulate_halt(struct kvm_vcpu *vcpu) -{ - int ret = kvm_skip_emulated_instruction(vcpu); - /* - * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ - return kvm_emulate_halt_noskip(vcpu) && ret; -} -EXPORT_SYMBOL_GPL(kvm_emulate_halt); - -int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) -{ - int ret = kvm_skip_emulated_instruction(vcpu); - - return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, - KVM_EXIT_AP_RESET_HOLD) && ret; -} -EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold); - #ifdef CONFIG_X86_64 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, unsigned long clock_type) @@ -11214,6 +11169,67 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) return r; } +static bool kvm_vcpu_running(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && + !vcpu->arch.apf.halted); +} + +static bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) +{ + if (!list_empty_careful(&vcpu->async_pf.done)) + return true; + + if (kvm_apic_has_pending_init_or_sipi(vcpu) && + kvm_apic_init_sipi_allowed(vcpu)) + return true; + + if (vcpu->arch.pv.pv_unhalted) + return true; + + if (kvm_is_exception_pending(vcpu)) + return true; + + if (kvm_test_request(KVM_REQ_NMI, vcpu) || + (vcpu->arch.nmi_pending && + kvm_x86_call(nmi_allowed)(vcpu, false))) + return true; + +#ifdef CONFIG_KVM_SMM + if (kvm_test_request(KVM_REQ_SMI, vcpu) || + (vcpu->arch.smi_pending && + kvm_x86_call(smi_allowed)(vcpu, false))) + return true; +#endif + + if (kvm_test_request(KVM_REQ_PMI, vcpu)) + return true; + + if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu)) + return true; + + if (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)) + return true; + + if (kvm_hv_has_stimer_pending(vcpu)) + return true; + + if (is_guest_mode(vcpu) && + kvm_x86_ops.nested_ops->has_events && + kvm_x86_ops.nested_ops->has_events(vcpu, false)) + return true; + + if (kvm_xen_has_pending_events(vcpu)) + return true; + + return false; +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); +} + /* Called within kvm->srcu read side. */ static inline int vcpu_block(struct kvm_vcpu *vcpu) { @@ -11285,12 +11301,6 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu) return 1; } -static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && - !vcpu->arch.apf.halted); -} - /* Called within kvm->srcu read side. */ static int vcpu_run(struct kvm_vcpu *vcpu) { @@ -11342,6 +11352,77 @@ static int vcpu_run(struct kvm_vcpu *vcpu) return r; } +static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason) +{ + /* + * The vCPU has halted, e.g. executed HLT. Update the run state if the + * local APIC is in-kernel, the run loop will detect the non-runnable + * state and halt the vCPU. Exit to userspace if the local APIC is + * managed by userspace, in which case userspace is responsible for + * handling wake events. + */ + ++vcpu->stat.halt_exits; + if (lapic_in_kernel(vcpu)) { + vcpu->arch.mp_state = state; + return 1; + } else { + vcpu->run->exit_reason = reason; + return 0; + } +} + +int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu) +{ + return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT); +} +EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip); + +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + int ret = kvm_skip_emulated_instruction(vcpu); + /* + * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered + * KVM_EXIT_DEBUG here. + */ + return kvm_emulate_halt_noskip(vcpu) && ret; +} +EXPORT_SYMBOL_GPL(kvm_emulate_halt); + +int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) +{ + int ret = kvm_skip_emulated_instruction(vcpu); + + return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, + KVM_EXIT_AP_RESET_HOLD) && ret; +} +EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold); + +bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_apicv_active(vcpu) && + kvm_x86_call(dy_apicv_has_pending_interrupt)(vcpu); +} + +bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.preempted_in_kernel; +} + +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) +{ + if (READ_ONCE(vcpu->arch.pv.pv_unhalted)) + return true; + + if (kvm_test_request(KVM_REQ_NMI, vcpu) || +#ifdef CONFIG_KVM_SMM + kvm_test_request(KVM_REQ_SMI, vcpu) || +#endif + kvm_test_request(KVM_REQ_EVENT, vcpu)) + return true; + + return kvm_arch_dy_has_pending_interrupt(vcpu); +} + static inline int complete_emulated_io(struct kvm_vcpu *vcpu) { return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); @@ -13156,87 +13237,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvm_arch_free_memslot(kvm, old); } -static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) -{ - if (!list_empty_careful(&vcpu->async_pf.done)) - return true; - - if (kvm_apic_has_pending_init_or_sipi(vcpu) && - kvm_apic_init_sipi_allowed(vcpu)) - return true; - - if (vcpu->arch.pv.pv_unhalted) - return true; - - if (kvm_is_exception_pending(vcpu)) - return true; - - if (kvm_test_request(KVM_REQ_NMI, vcpu) || - (vcpu->arch.nmi_pending && - kvm_x86_call(nmi_allowed)(vcpu, false))) - return true; - -#ifdef CONFIG_KVM_SMM - if (kvm_test_request(KVM_REQ_SMI, vcpu) || - (vcpu->arch.smi_pending && - kvm_x86_call(smi_allowed)(vcpu, false))) - return true; -#endif - - if (kvm_test_request(KVM_REQ_PMI, vcpu)) - return true; - - if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu)) - return true; - - if (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)) - return true; - - if (kvm_hv_has_stimer_pending(vcpu)) - return true; - - if (is_guest_mode(vcpu) && - kvm_x86_ops.nested_ops->has_events && - kvm_x86_ops.nested_ops->has_events(vcpu, false)) - return true; - - if (kvm_xen_has_pending_events(vcpu)) - return true; - - return false; -} - -int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) -{ - return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); -} - -bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu) -{ - return kvm_vcpu_apicv_active(vcpu) && - kvm_x86_call(dy_apicv_has_pending_interrupt)(vcpu); -} - -bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.preempted_in_kernel; -} - -bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) -{ - if (READ_ONCE(vcpu->arch.pv.pv_unhalted)) - return true; - - if (kvm_test_request(KVM_REQ_NMI, vcpu) || -#ifdef CONFIG_KVM_SMM - kvm_test_request(KVM_REQ_SMI, vcpu) || -#endif - kvm_test_request(KVM_REQ_EVENT, vcpu)) - return true; - - return kvm_arch_dy_has_pending_interrupt(vcpu); -} - bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { if (vcpu->arch.guest_state_protected) From 1876dd69dfe8c29e249070b0e4dc941fc15ac1e4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 12:51:20 -0700 Subject: [PATCH 116/655] KVM: x86: Add fastpath handling of HLT VM-Exits Add a fastpath for HLT VM-Exits by immediately re-entering the guest if it has a pending wake event. When virtual interrupt delivery is enabled, i.e. when KVM doesn't need to manually inject interrupts, this allows KVM to stay in the fastpath run loop when a vIRQ arrives between the guest doing CLI and STI;HLT. Without AMD's Idle HLT-intercept support, the CPU generates a HLT VM-Exit even though KVM will immediately resume the guest. Note, on bare metal, it's relatively uncommon for a modern guest kernel to actually trigger this scenario, as the window between the guest checking for a wake event and committing to HLT is quite small. But in a nested environment, the timings change significantly, e.g. rudimentary testing showed that ~50% of HLT exits where HLT-polling was successful would be serviced by this fastpath, i.e. ~50% of the time that a nested vCPU gets a wake event before KVM schedules out the vCPU, the wake event was pending even before the VM-Exit. Link: https://lore.kernel.org/all/20240528041926.3989-3-manali.shukla@amd.com Link: https://lore.kernel.org/r/20240802195120.325560-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 13 +++++++++++-- arch/x86/kvm/vmx/vmx.c | 2 ++ arch/x86/kvm/x86.c | 23 ++++++++++++++++++++++- arch/x86/kvm/x86.h | 1 + 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index eb3de01602b9..eb64976590fb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4147,12 +4147,21 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu) static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + if (is_guest_mode(vcpu)) return EXIT_FASTPATH_NONE; - if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && - to_svm(vcpu)->vmcb->control.exit_info_1) + switch (svm->vmcb->control.exit_code) { + case SVM_EXIT_MSR: + if (!svm->vmcb->control.exit_info_1) + break; return handle_fastpath_set_msr_irqoff(vcpu); + case SVM_EXIT_HLT: + return handle_fastpath_hlt(vcpu); + default: + break; + } return EXIT_FASTPATH_NONE; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cf85f8d50ccb..9cac0ffb8553 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7265,6 +7265,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu, return handle_fastpath_set_msr_irqoff(vcpu); case EXIT_REASON_PREEMPTION_TIMER: return handle_fastpath_preemption_timer(vcpu, force_immediate_exit); + case EXIT_REASON_HLT: + return handle_fastpath_hlt(vcpu); default: return EXIT_FASTPATH_NONE; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c15eb8e7d3c3..fa455a60b557 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11363,7 +11363,10 @@ static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason) */ ++vcpu->stat.halt_exits; if (lapic_in_kernel(vcpu)) { - vcpu->arch.mp_state = state; + if (kvm_vcpu_has_events(vcpu)) + vcpu->arch.pv.pv_unhalted = false; + else + vcpu->arch.mp_state = state; return 1; } else { vcpu->run->exit_reason = reason; @@ -11388,6 +11391,24 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_halt); +fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu) +{ + int ret; + + kvm_vcpu_srcu_read_lock(vcpu); + ret = kvm_emulate_halt(vcpu); + kvm_vcpu_srcu_read_unlock(vcpu); + + if (!ret) + return EXIT_FASTPATH_EXIT_USERSPACE; + + if (kvm_vcpu_running(vcpu)) + return EXIT_FASTPATH_REENTER_GUEST; + + return EXIT_FASTPATH_EXIT_HANDLED; +} +EXPORT_SYMBOL_GPL(handle_fastpath_hlt); + int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) { int ret = kvm_skip_emulated_instruction(vcpu); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index f47b9905ba78..516eb9e28752 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -334,6 +334,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); +fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu); extern struct kvm_caps kvm_caps; extern struct kvm_host_values kvm_host; From 00565cff01262c888512bffe9d41e4831a6f2cc7 Mon Sep 17 00:00:00 2001 From: Yuesong Li Date: Fri, 30 Aug 2024 11:16:42 +0800 Subject: [PATCH 117/655] dm: Convert to use ERR_CAST() Use ERR_CAST() as it is designed for casting an error pointer to another type. This macro utilizes the __force and __must_check modifiers, which instruct the compiler to verify for errors at the locations where it is employed. Signed-off-by: Yuesong Li Signed-off-by: Mikulas Patocka --- drivers/md/dm-thin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index a0c1620e90c8..89632ce97760 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2948,7 +2948,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device); if (IS_ERR(pmd)) { *error = "Error creating metadata object"; - return (struct pool *)pmd; + return ERR_CAST(pmd); } pool = kzalloc(sizeof(*pool), GFP_KERNEL); From 35c9f09b5691d6dff1f3e62fe1825fa10b644b45 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Sat, 31 Aug 2024 17:48:01 +0800 Subject: [PATCH 118/655] dm integrity: Remove extra unlikely helper In IS_ERR, the unlikely is used for the input parameter, so these is no need to use it again outside. Signed-off-by: Hongbo Li Signed-off-by: Kunwu Chan Signed-off-by: Mikulas Patocka --- drivers/md/dm-integrity.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 51e6964c1305..8306f8511078 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2447,7 +2447,7 @@ static int dm_integrity_map_inline(struct dm_integrity_io *dio) bio->bi_iter.bi_sector += ic->start + SB_SECTORS; bip = bio_integrity_alloc(bio, GFP_NOIO, 1); - if (unlikely(IS_ERR(bip))) { + if (IS_ERR(bip)) { bio->bi_status = errno_to_blk_status(PTR_ERR(bip)); bio_endio(bio); return DM_MAPIO_SUBMITTED; @@ -2520,7 +2520,7 @@ static void dm_integrity_inline_recheck(struct work_struct *w) } bip = bio_integrity_alloc(outgoing_bio, GFP_NOIO, 1); - if (unlikely(IS_ERR(bip))) { + if (IS_ERR(bip)) { bio_put(outgoing_bio); bio->bi_status = errno_to_blk_status(PTR_ERR(bip)); bio_endio(bio); From 26207c6332e83583a74228da9e5278d4fe5d26cf Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Mon, 2 Sep 2024 21:11:23 +0800 Subject: [PATCH 119/655] dm: Make use of __assign_bit() API We have for some time the __assign_bit() API to replace open coded if (foo) __set_bit(n, bar); else __clear_bit(n, bar); Use this API to simplify the code. No functional change intended. Signed-off-by: Hongbo Li Signed-off-by: Mikulas Patocka --- drivers/md/dm-clone-metadata.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c index 2db84cd2202b..14c5c28d938b 100644 --- a/drivers/md/dm-clone-metadata.c +++ b/drivers/md/dm-clone-metadata.c @@ -530,10 +530,7 @@ static int __load_bitset_in_core(struct dm_clone_metadata *cmd) return r; for (i = 0; ; i++) { - if (dm_bitset_cursor_get_value(&c)) - __set_bit(i, cmd->region_map); - else - __clear_bit(i, cmd->region_map); + __assign_bit(i, cmd->region_map, dm_bitset_cursor_get_value(&c)); if (i >= (cmd->nr_regions - 1)) break; From a8fa6483b40943a6c8feea803a2dc8e9982cc766 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 3 Sep 2024 11:50:11 +0200 Subject: [PATCH 120/655] dm integrity: fix gcc 5 warning This commit fixes gcc 5 warning "logical not is only applied to the left hand side of comparison" Reported-by: Geert Uytterhoeven Fixes: fb0987682c62 ("dm-integrity: introduce the Inline mode") Signed-off-by: Mikulas Patocka --- drivers/md/dm-integrity.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 8306f8511078..b3489d3fe7db 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4715,13 +4715,18 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv ti->error = "Block size doesn't match the information in superblock"; goto bad; } - if (!le32_to_cpu(ic->sb->journal_sections) != (ic->mode == 'I')) { - r = -EINVAL; - if (ic->mode != 'I') + if (ic->mode != 'I') { + if (!le32_to_cpu(ic->sb->journal_sections)) { + r = -EINVAL; ti->error = "Corrupted superblock, journal_sections is 0"; - else + goto bad; + } + } else { + if (le32_to_cpu(ic->sb->journal_sections)) { + r = -EINVAL; ti->error = "Corrupted superblock, journal_sections is not 0"; - goto bad; + goto bad; + } } /* make sure that ti->max_io_len doesn't overflow */ if (!ic->meta_dev) { From f0e5311aa8022107d63c54e2f03684ec097d1394 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 28 Aug 2024 01:45:48 +0200 Subject: [PATCH 121/655] firmware_loader: Block path traversal Most firmware names are hardcoded strings, or are constructed from fairly constrained format strings where the dynamic parts are just some hex numbers or such. However, there are a couple codepaths in the kernel where firmware file names contain string components that are passed through from a device or semi-privileged userspace; the ones I could find (not counting interfaces that require root privileges) are: - lpfc_sli4_request_firmware_update() seems to construct the firmware filename from "ModelName", a string that was previously parsed out of some descriptor ("Vital Product Data") in lpfc_fill_vpd() - nfp_net_fw_find() seems to construct a firmware filename from a model name coming from nfp_hwinfo_lookup(pf->hwinfo, "nffw.partno"), which I think parses some descriptor that was read from the device. (But this case likely isn't exploitable because the format string looks like "netronome/nic_%s", and there shouldn't be any *folders* starting with "netronome/nic_". The previous case was different because there, the "%s" is *at the start* of the format string.) - module_flash_fw_schedule() is reachable from the ETHTOOL_MSG_MODULE_FW_FLASH_ACT netlink command, which is marked as GENL_UNS_ADMIN_PERM (meaning CAP_NET_ADMIN inside a user namespace is enough to pass the privilege check), and takes a userspace-provided firmware name. (But I think to reach this case, you need to have CAP_NET_ADMIN over a network namespace that a special kind of ethernet device is mapped into, so I think this is not a viable attack path in practice.) Fix it by rejecting any firmware names containing ".." path components. For what it's worth, I went looking and haven't found any USB device drivers that use the firmware loader dangerously. Cc: stable@vger.kernel.org Reviewed-by: Danilo Krummrich Fixes: abb139e75c2c ("firmware: teach the kernel to load firmware files directly from the filesystem") Signed-off-by: Jann Horn Acked-by: Luis Chamberlain Link: https://lore.kernel.org/r/20240828-firmware-traversal-v3-1-c76529c63b5f@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/main.c | 30 +++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index a03ee4b11134..324a9a3c087a 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -849,6 +849,26 @@ static void fw_log_firmware_info(const struct firmware *fw, const char *name, {} #endif +/* + * Reject firmware file names with ".." path components. + * There are drivers that construct firmware file names from device-supplied + * strings, and we don't want some device to be able to tell us "I would like to + * be sent my firmware from ../../../etc/shadow, please". + * + * Search for ".." surrounded by either '/' or start/end of string. + * + * This intentionally only looks at the firmware name, not at the firmware base + * directory or at symlink contents. + */ +static bool name_contains_dotdot(const char *name) +{ + size_t name_len = strlen(name); + + return strcmp(name, "..") == 0 || strncmp(name, "../", 3) == 0 || + strstr(name, "/../") != NULL || + (name_len >= 3 && strcmp(name+name_len-3, "/..") == 0); +} + /* called from request_firmware() and request_firmware_work_func() */ static int _request_firmware(const struct firmware **firmware_p, const char *name, @@ -869,6 +889,14 @@ _request_firmware(const struct firmware **firmware_p, const char *name, goto out; } + if (name_contains_dotdot(name)) { + dev_warn(device, + "Firmware load for '%s' refused, path contains '..' component\n", + name); + ret = -EINVAL; + goto out; + } + ret = _request_firmware_prepare(&fw, name, device, buf, size, offset, opt_flags); if (ret <= 0) /* error or already assigned */ @@ -946,6 +974,8 @@ _request_firmware(const struct firmware **firmware_p, const char *name, * @name will be used as $FIRMWARE in the uevent environment and * should be distinctive enough not to be confused with any other * firmware image for this or any other device. + * It must not contain any ".." path components - "foo/bar..bin" is + * allowed, but "foo/../bar.bin" is not. * * Caller must hold the reference count of @device. * From b45ed06f46737f8c2ee65698f4305409f2386674 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 13 Aug 2024 22:19:32 +0800 Subject: [PATCH 122/655] drivers/base: Introduce device_match_t for device finding APIs There are several drivers/base APIs for finding a specific device, and they currently use the following good type for the @match parameter: int (*match)(struct device *dev, const void *data) Since these operations do not modify the caller-provided @*data, this type is worthy of a dedicated typedef: typedef int (*device_match_t)(struct device *dev, const void *data) Advantages of using device_match_t: - Shorter API declarations and definitions - Prevent further APIs from using a bad type for @match So introduce device_match_t and apply it to the existing (bus|class|driver|auxiliary)_find_device() APIs. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240813-dev_match_api-v3-1-6c6878a99b9f@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/auxiliary.c | 2 +- drivers/base/bus.c | 2 +- drivers/base/class.c | 3 +-- drivers/base/driver.c | 2 +- include/linux/auxiliary_bus.h | 2 +- include/linux/device/bus.h | 6 ++++-- include/linux/device/class.h | 2 +- include/linux/device/driver.h | 2 +- 8 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index 54b92839e05c..7823888af4f6 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -352,7 +352,7 @@ EXPORT_SYMBOL_GPL(__auxiliary_device_add); */ struct auxiliary_device *auxiliary_find_device(struct device *start, const void *data, - int (*match)(struct device *dev, const void *data)) + device_match_t match) { struct device *dev; diff --git a/drivers/base/bus.c b/drivers/base/bus.c index abf090ace833..657c93c38b0d 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -391,7 +391,7 @@ EXPORT_SYMBOL_GPL(bus_for_each_dev); */ struct device *bus_find_device(const struct bus_type *bus, struct device *start, const void *data, - int (*match)(struct device *dev, const void *data)) + device_match_t match) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; diff --git a/drivers/base/class.c b/drivers/base/class.c index 7b38fdf8e1d7..ae22fa992c04 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -433,8 +433,7 @@ EXPORT_SYMBOL_GPL(class_for_each_device); * code. There's no locking restriction. */ struct device *class_find_device(const struct class *class, const struct device *start, - const void *data, - int (*match)(struct device *, const void *)) + const void *data, device_match_t match) { struct subsys_private *sp = class_to_subsys(class); struct class_dev_iter iter; diff --git a/drivers/base/driver.c b/drivers/base/driver.c index 88c6fd1f1992..b4eb5b89c4ee 100644 --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -150,7 +150,7 @@ EXPORT_SYMBOL_GPL(driver_for_each_device); */ struct device *driver_find_device(const struct device_driver *drv, struct device *start, const void *data, - int (*match)(struct device *dev, const void *data)) + device_match_t match) { struct klist_iter i; struct device *dev; diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 662b8ae54b6a..31762324bcc9 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -271,6 +271,6 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); struct auxiliary_device *auxiliary_find_device(struct device *start, const void *data, - int (*match)(struct device *dev, const void *data)); + device_match_t match); #endif /* _AUXILIARY_BUS_H_ */ diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 807831d6bf0f..cdc4757217f9 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -126,6 +126,9 @@ struct bus_attribute { int __must_check bus_create_file(const struct bus_type *bus, struct bus_attribute *attr); void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr); +/* Matching function type for drivers/base APIs to find a specific device */ +typedef int (*device_match_t)(struct device *dev, const void *data); + /* Generic device matching functions that all busses can use to match with */ int device_match_name(struct device *dev, const void *name); int device_match_of_node(struct device *dev, const void *np); @@ -139,8 +142,7 @@ int device_match_any(struct device *dev, const void *unused); int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); struct device *bus_find_device(const struct bus_type *bus, struct device *start, - const void *data, - int (*match)(struct device *dev, const void *data)); + const void *data, device_match_t match); /** * bus_find_device_by_name - device iterator for locating a particular device * of a specific name. diff --git a/include/linux/device/class.h b/include/linux/device/class.h index c576b49c55c2..518c9c83d64b 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -95,7 +95,7 @@ void class_dev_iter_exit(struct class_dev_iter *iter); int class_for_each_device(const struct class *class, const struct device *start, void *data, int (*fn)(struct device *dev, void *data)); struct device *class_find_device(const struct class *class, const struct device *start, - const void *data, int (*match)(struct device *, const void *)); + const void *data, device_match_t match); /** * class_find_device_by_name - device iterator for locating a particular device diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 1fc8b68786de..5c04b8e3833b 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -157,7 +157,7 @@ int __must_check driver_for_each_device(struct device_driver *drv, struct device void *data, int (*fn)(struct device *dev, void *)); struct device *driver_find_device(const struct device_driver *drv, struct device *start, const void *data, - int (*match)(struct device *dev, const void *data)); + device_match_t match); /** * driver_find_device_by_name - device iterator for locating a particular device From 4a74f22386ccb1ddd06eb242bdd02548a7199bda Mon Sep 17 00:00:00 2001 From: Yuesong Li Date: Wed, 21 Aug 2024 12:04:32 +0800 Subject: [PATCH 123/655] driver:base:core: Adding a "Return:" line in comment for device_link_add() The original document doesn't explain the return value directly which leads to confusing in error checking. You can find the reason here: Link: https://lore.kernel.org/all/1d4c39e109bcf288d5900670e024a315.sboyd@kernel.org/ Signed-off-by: Yuesong Li Link: https://lore.kernel.org/r/20240821040432.4049183-1-liyuesong@vivo.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 4bc8b88d697e..ec2197aec0b7 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -673,6 +673,9 @@ postcore_initcall(devlink_class_init); * @supplier: Supplier end of the link. * @flags: Link flags. * + * Return: On success, a device_link struct will be returned. + * On error or invalid flag settings, NULL will be returned. + * * The caller is responsible for the proper synchronization of the link creation * with runtime PM. First, setting the DL_FLAG_PM_RUNTIME flag will cause the * runtime PM framework to take the link into account. Second, if the From a169a663bfa8198f33a5c1002634cc89e5128025 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Thu, 22 Aug 2024 20:38:35 +0800 Subject: [PATCH 124/655] driver core: class: Check namespace relevant parameters in class_register() Device class has two namespace relevant fields which are usually associated by the following usage: struct class { ... const struct kobj_ns_type_operations *ns_type; const void *(*namespace)(const struct device *dev); ... } if (dev->class && dev->class->ns_type) dev->class->namespace(dev); (1) The usage looks weird since it checks @ns_type but calls namespace() (2) The usage implies both fields have dependency but their dependency is not currently enforced yet. It is found for all existing class definitions that the other filed is also assigned once one is assigned in current kernel tree. Fixed by enforcing above existing dependency that both fields are required for a device class to support namespace via parameter checks. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240822-class_fix-v1-1-2a6d38ba913a@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/class.c | 11 +++++++++++ drivers/base/core.c | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/base/class.c b/drivers/base/class.c index ae22fa992c04..cb5359235c70 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -183,6 +183,17 @@ int class_register(const struct class *cls) pr_debug("device class '%s': registering\n", cls->name); + if (cls->ns_type && !cls->namespace) { + pr_err("%s: class '%s' does not have namespace\n", + __func__, cls->name); + return -EINVAL; + } + if (!cls->ns_type && cls->namespace) { + pr_err("%s: class '%s' does not have ns_type\n", + __func__, cls->name); + return -EINVAL; + } + cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) return -ENOMEM; diff --git a/drivers/base/core.c b/drivers/base/core.c index ec2197aec0b7..30408a4a4927 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2584,7 +2584,7 @@ static const void *device_namespace(const struct kobject *kobj) const struct device *dev = kobj_to_dev(kobj); const void *ns = NULL; - if (dev->class && dev->class->ns_type) + if (dev->class && dev->class->namespace) ns = dev->class->namespace(dev); return ns; From 24e041e1e48d06f25a12caaf73728a4ec2e511fe Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 23 Aug 2024 15:55:44 +0800 Subject: [PATCH 125/655] platform: Make platform_bus_type constant Since commit d492cc2573a0 ("driver core: device.h: make struct bus_type a const *"), the driver core can properly handle constant struct bus_type, move the platform_bus_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20240823075544.144426-1-kunwu.chan@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 2 +- include/linux/platform_device.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 4c3ee6521ba5..6f2a33722c52 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1474,7 +1474,7 @@ static const struct dev_pm_ops platform_dev_pm_ops = { USE_PLATFORM_PM_SLEEP_OPS }; -struct bus_type platform_bus_type = { +const struct bus_type platform_bus_type = { .name = "platform", .dev_groups = platform_dev_groups, .match = platform_match, diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index d422db6eec63..7132623e4658 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -52,7 +52,7 @@ struct platform_device { extern int platform_device_register(struct platform_device *); extern void platform_device_unregister(struct platform_device *); -extern struct bus_type platform_bus_type; +extern const struct bus_type platform_bus_type; extern struct device platform_bus; extern struct resource *platform_get_resource(struct platform_device *, From ba6353748e71bd1d7e422fec2b5c2e2dfc2e3bd9 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Thu, 22 Aug 2024 15:28:02 -0500 Subject: [PATCH 126/655] driver core: don't always lock parent in shutdown Don't lock a parent device unless it is needed in device_shutdown. This is in preparation for making device shutdown asynchronous, when it will be needed to allow children of a common parent to shut down simultaneously. Signed-off-by: Stuart Hayes Signed-off-by: David Jeffery Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Keith Busch Tested-by: Keith Busch Link: https://lore.kernel.org/r/20240822202805.6379-2-stuart.w.hayes@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 30408a4a4927..988aff79ee7f 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4816,7 +4816,7 @@ void device_shutdown(void) spin_unlock(&devices_kset->list_lock); /* hold lock to avoid race with probe/release */ - if (parent) + if (parent && dev->bus && dev->bus->need_parent_lock) device_lock(parent); device_lock(dev); @@ -4840,7 +4840,7 @@ void device_shutdown(void) } device_unlock(dev); - if (parent) + if (parent && dev->bus && dev->bus->need_parent_lock) device_unlock(parent); put_device(dev); From 95dc7565253a8564911190ebd1e4ffceb4de208a Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Thu, 22 Aug 2024 15:28:03 -0500 Subject: [PATCH 127/655] driver core: separate function to shutdown one device Make a separate function for the part of device_shutdown() that does the shutown for a single device. This is in preparation for making device shutdown asynchronous. Signed-off-by: Stuart Hayes Signed-off-by: David Jeffery Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Keith Busch Tested-by: Keith Busch Link: https://lore.kernel.org/r/20240822202805.6379-3-stuart.w.hayes@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 66 ++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 988aff79ee7f..8598421cbb7f 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4779,6 +4779,41 @@ int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) } EXPORT_SYMBOL_GPL(device_change_owner); +static void shutdown_one_device(struct device *dev) +{ + /* hold lock to avoid race with probe/release */ + if (dev->parent && dev->bus && dev->bus->need_parent_lock) + device_lock(dev->parent); + device_lock(dev); + + /* Don't allow any more runtime suspends */ + pm_runtime_get_noresume(dev); + pm_runtime_barrier(dev); + + if (dev->class && dev->class->shutdown_pre) { + if (initcall_debug) + dev_info(dev, "shutdown_pre\n"); + dev->class->shutdown_pre(dev); + } + if (dev->bus && dev->bus->shutdown) { + if (initcall_debug) + dev_info(dev, "shutdown\n"); + dev->bus->shutdown(dev); + } else if (dev->driver && dev->driver->shutdown) { + if (initcall_debug) + dev_info(dev, "shutdown\n"); + dev->driver->shutdown(dev); + } + + device_unlock(dev); + if (dev->parent && dev->bus && dev->bus->need_parent_lock) + device_unlock(dev->parent); + + put_device(dev); + if (dev->parent) + put_device(dev->parent); +} + /** * device_shutdown - call ->shutdown() on each device to shutdown. */ @@ -4815,36 +4850,7 @@ void device_shutdown(void) list_del_init(&dev->kobj.entry); spin_unlock(&devices_kset->list_lock); - /* hold lock to avoid race with probe/release */ - if (parent && dev->bus && dev->bus->need_parent_lock) - device_lock(parent); - device_lock(dev); - - /* Don't allow any more runtime suspends */ - pm_runtime_get_noresume(dev); - pm_runtime_barrier(dev); - - if (dev->class && dev->class->shutdown_pre) { - if (initcall_debug) - dev_info(dev, "shutdown_pre\n"); - dev->class->shutdown_pre(dev); - } - if (dev->bus && dev->bus->shutdown) { - if (initcall_debug) - dev_info(dev, "shutdown\n"); - dev->bus->shutdown(dev); - } else if (dev->driver && dev->driver->shutdown) { - if (initcall_debug) - dev_info(dev, "shutdown\n"); - dev->driver->shutdown(dev); - } - - device_unlock(dev); - if (parent && dev->bus && dev->bus->need_parent_lock) - device_unlock(parent); - - put_device(dev); - put_device(parent); + shutdown_one_device(dev); spin_lock(&devices_kset->list_lock); } From 8064952c65045f05ee2671fe437770e50c151776 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Thu, 22 Aug 2024 15:28:04 -0500 Subject: [PATCH 128/655] driver core: shut down devices asynchronously Add code to allow asynchronous shutdown of devices, ensuring that each device is shut down before its parents & suppliers. Only devices with drivers that have async_shutdown_enable enabled will be shut down asynchronously. This can dramatically reduce system shutdown/reboot time on systems that have multiple devices that take many seconds to shut down (like certain NVMe drives). On one system tested, the shutdown time went from 11 minutes without this patch to 55 seconds with the patch. Signed-off-by: Stuart Hayes Signed-off-by: David Jeffery Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Keith Busch Tested-by: Keith Busch Link: https://lore.kernel.org/r/20240822202805.6379-4-stuart.w.hayes@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 4 +++ drivers/base/core.c | 54 ++++++++++++++++++++++++++++++++++- include/linux/device/driver.h | 2 ++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index 8cf04a557bdb..ea18aa70f151 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -10,6 +10,7 @@ * shared outside of the drivers/base/ directory. * */ +#include #include /** @@ -97,6 +98,8 @@ struct driver_private { * the device; typically because it depends on another driver getting * probed first. * @async_driver - pointer to device driver awaiting probe via async_probe + * @shutdown_after - used during device shutdown to ensure correct shutdown + * ordering. * @device - pointer back to the struct device that this structure is * associated with. * @dead - This device is currently either in the process of or has been @@ -114,6 +117,7 @@ struct device_private { struct list_head deferred_probe; const struct device_driver *async_driver; char *deferred_probe_reason; + async_cookie_t shutdown_after; struct device *device; u8 dead:1; }; diff --git a/drivers/base/core.c b/drivers/base/core.c index 8598421cbb7f..6113bc1092ae 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -3524,6 +3525,7 @@ static int device_private_init(struct device *dev) klist_init(&dev->p->klist_children, klist_children_get, klist_children_put); INIT_LIST_HEAD(&dev->p->deferred_probe); + dev->p->shutdown_after = 0; return 0; } @@ -4779,6 +4781,8 @@ int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) } EXPORT_SYMBOL_GPL(device_change_owner); +static ASYNC_DOMAIN(sd_domain); + static void shutdown_one_device(struct device *dev) { /* hold lock to avoid race with probe/release */ @@ -4814,12 +4818,34 @@ static void shutdown_one_device(struct device *dev) put_device(dev->parent); } +/** + * shutdown_one_device_async + * @data: the pointer to the struct device to be shutdown + * @cookie: not used + * + * Shuts down one device, after waiting for shutdown_after to complete. + * shutdown_after should be set to the cookie of the last child or consumer + * of this device to be shutdown (if any), or to the cookie of the previous + * device to be shut down for devices that don't enable asynchronous shutdown. + */ +static void shutdown_one_device_async(void *data, async_cookie_t cookie) +{ + struct device *dev = data; + + async_synchronize_cookie_domain(dev->p->shutdown_after + 1, &sd_domain); + + shutdown_one_device(dev); +} + /** * device_shutdown - call ->shutdown() on each device to shutdown. */ void device_shutdown(void) { struct device *dev, *parent; + async_cookie_t cookie = 0; + struct device_link *link; + int idx; wait_for_device_probe(); device_block_probing(); @@ -4850,11 +4876,37 @@ void device_shutdown(void) list_del_init(&dev->kobj.entry); spin_unlock(&devices_kset->list_lock); - shutdown_one_device(dev); + + /* + * Set cookie for devices that will be shut down synchronously + */ + if (!dev->driver || !dev->driver->async_shutdown_enable) + dev->p->shutdown_after = cookie; + + get_device(dev); + get_device(parent); + + cookie = async_schedule_domain(shutdown_one_device_async, + dev, &sd_domain); + /* + * Ensure parent & suppliers wait for this device to shut down + */ + if (parent) { + parent->p->shutdown_after = cookie; + put_device(parent); + } + + idx = device_links_read_lock(); + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, + device_links_read_lock_held()) + link->supplier->p->shutdown_after = cookie; + device_links_read_unlock(idx); + put_device(dev); spin_lock(&devices_kset->list_lock); } spin_unlock(&devices_kset->list_lock); + async_synchronize_full_domain(&sd_domain); } /* diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 5c04b8e3833b..14c9211b82d6 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -56,6 +56,7 @@ enum probe_type { * @mod_name: Used for built-in modules. * @suppress_bind_attrs: Disables bind/unbind via sysfs. * @probe_type: Type of the probe (synchronous or asynchronous) to use. + * @async_shutdown_enable: Enables devices to be shutdown asynchronously. * @of_match_table: The open firmware table. * @acpi_match_table: The ACPI match table. * @probe: Called to query the existence of a specific device, @@ -102,6 +103,7 @@ struct device_driver { bool suppress_bind_attrs; /* disables bind/unbind via sysfs */ enum probe_type probe_type; + bool async_shutdown_enable; const struct of_device_id *of_match_table; const struct acpi_device_id *acpi_match_table; From ba82e10c3c6b5b5d2c8279a8bd0dae5c2abaacfc Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Thu, 22 Aug 2024 15:28:05 -0500 Subject: [PATCH 129/655] nvme-pci: Make driver prefer asynchronous shutdown Set the driver default to enable asynchronous shutdown. Signed-off-by: Stuart Hayes Signed-off-by: David Jeffery Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Keith Busch Tested-by: Keith Busch Link: https://lore.kernel.org/r/20240822202805.6379-5-stuart.w.hayes@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6cd9395ba9ec..58d0d517fead 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3580,6 +3580,7 @@ static struct pci_driver nvme_driver = { .shutdown = nvme_shutdown, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, + .async_shutdown_enable = true, #ifdef CONFIG_PM_SLEEP .pm = &nvme_dev_pm_ops, #endif From 8ab0f4605d5ce3c0d386b3828b07719f1e8e0505 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 23 Aug 2024 14:24:40 +0800 Subject: [PATCH 130/655] bus: fsl-mc: make fsl_mc_bus_type const Since commit d492cc2573a0 ("driver core: device.h: make struct bus_type a const *"), the driver core can properly handle constant struct bus_type, move the fsl_mc_bus_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Kunwu Chan Acked-by: Christophe Leroy # for Link: https://lore.kernel.org/r/20240823062440.113628-1-kunwu.chan@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/bus/fsl-mc/fsl-mc-bus.c | 2 +- include/linux/fsl/mc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index dd68b8191a0a..930d8a3ba722 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -309,7 +309,7 @@ static struct attribute *fsl_mc_bus_attrs[] = { ATTRIBUTE_GROUPS(fsl_mc_bus); -struct bus_type fsl_mc_bus_type = { +const struct bus_type fsl_mc_bus_type = { .name = "fsl-mc", .match = fsl_mc_bus_match, .uevent = fsl_mc_bus_uevent, diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 083c860fd28e..c90ec889bfc2 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -436,7 +436,7 @@ void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev); struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, u16 if_id); -extern struct bus_type fsl_mc_bus_type; +extern const struct bus_type fsl_mc_bus_type; extern struct device_type fsl_mc_bus_dprc_type; extern struct device_type fsl_mc_bus_dpni_type; From 903c44939abc02e2f3d6f2ad65fa090f7e5df5b6 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Sat, 24 Aug 2024 17:07:43 +0800 Subject: [PATCH 131/655] driver core: Make parameter check consistent for API cluster device_(for_each|find)_child() The following API cluster takes the same type parameter list, but do not have consistent parameter check as shown below. device_for_each_child(struct device *parent, ...) // check (!parent->p) device_for_each_child_reverse(struct device *parent, ...) // same as above device_find_child(struct device *parent, ...) // check (!parent) Fixed by using consistent check (!parent || !parent->p) which covers both existing checks for the cluster. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240824-const_dfc_prepare-v3-1-32127ea32bba@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 6113bc1092ae..b69b82da8837 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3998,7 +3998,7 @@ int device_for_each_child(struct device *parent, void *data, struct device *child; int error = 0; - if (!parent->p) + if (!parent || !parent->p) return 0; klist_iter_init(&parent->p->klist_children, &i); @@ -4028,7 +4028,7 @@ int device_for_each_child_reverse(struct device *parent, void *data, struct device *child; int error = 0; - if (!parent->p) + if (!parent || !parent->p) return 0; klist_iter_init(&parent->p->klist_children, &i); @@ -4062,7 +4062,7 @@ struct device *device_find_child(struct device *parent, void *data, struct klist_iter i; struct device *child; - if (!parent) + if (!parent || !parent->p) return NULL; klist_iter_init(&parent->p->klist_children, &i); From fea64fa04c31426eae512751e0c5342345c5741c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 30 Aug 2024 10:33:52 +0200 Subject: [PATCH 132/655] devres: Correclty strip percpu address space of devm_free_percpu() argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit devm_free_percpu() calls devres_release() with a pointer in percpu address space. devres_release() expects pointers in the generic address space, so address space needs to be stripped from the argument. When strict percpu address space checks are enabled, then the current direct cast from the percpu address space to the generic address space fails the compilation on x86_64 with: devres.c:1234:32: error: cast to generic address space pointer from disjoint ‘__seg_gs’ address space pointer Add intermediate casts to unsigned long to remove address space of the pointer before casting it to the generic AS, as advised in [1] and [2]. Side note: sparse still requires __force, although the documentation [2] allows casts to unsigned long without __force attribute. Found by GCC's named address space checks. There were no changes in the resulting object file. [1] https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces [2] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name Signed-off-by: Uros Bizjak Cc: Greg Kroah-Hartman Cc: Rafael J. Wysocki Link: https://lore.kernel.org/r/20240830083406.9695-1-ubizjak@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/devres.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index a2ce0ead06a6..2152eec0c135 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -1231,6 +1231,6 @@ void devm_free_percpu(struct device *dev, void __percpu *pdata) * devm_free_pages() does. */ WARN_ON(devres_release(dev, devm_percpu_release, devm_percpu_match, - (__force void *)pdata)); + (void *)(__force unsigned long)pdata)); } EXPORT_SYMBOL_GPL(devm_free_percpu); From 1f9651bfc51326fbed10df820c4e80f885f921d8 Mon Sep 17 00:00:00 2001 From: Yuesong Li Date: Thu, 29 Aug 2024 20:52:35 +0800 Subject: [PATCH 133/655] cxl/port: Convert to use ERR_CAST() Use ERR_CAST() as it is designed for casting an error pointer to another type. This macro utilizes the __force and __must_check modifiers, which instruct the compiler to verify for errors at the locations where it is employed. Signed-off-by: Yuesong Li Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20240829125235.3266865-1-liyuesong@vivo.com Signed-off-by: Dave Jiang --- drivers/cxl/core/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 1d5007e3795a..bdd3275ad417 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -941,7 +941,7 @@ struct cxl_root *devm_cxl_add_root(struct device *host, port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); if (IS_ERR(port)) - return (struct cxl_root *)port; + return ERR_CAST(port); cxl_root = to_cxl_root(port); cxl_root->ops = ops; From 1c1d348b08ab7c106dd96bafb120f0888827174b Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Tue, 27 Aug 2024 17:51:23 +0800 Subject: [PATCH 134/655] tools/testing/cxl: Use dev_is_platform() Use dev_is_platform() instead of checking bus type directly. Signed-off-by: Kunwu Chan Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20240827095123.168696-1-kunwu.chan@linux.dev Signed-off-by: Dave Jiang --- tools/testing/cxl/mock_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/cxl/mock_acpi.c b/tools/testing/cxl/mock_acpi.c index 55813de26d46..8da94378ccec 100644 --- a/tools/testing/cxl/mock_acpi.c +++ b/tools/testing/cxl/mock_acpi.c @@ -18,7 +18,7 @@ struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev) goto out; } - if (dev->bus == &platform_bus_type) + if (dev_is_platform(dev)) goto out; adev = to_acpi_device(dev); From fa724cd747cdc347cde0d2ef6ba4ee34344f72cb Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Fri, 30 Aug 2024 16:00:16 +0800 Subject: [PATCH 135/655] cxl: Remove duplicate included header file core.h The header file core.h is included twice. Remove the last one. The compilation test has passed. Signed-off-by: Hongbo Li Acked-by: Jonathan Cameron Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20240830080016.3542184-1-lihongbo22@huawei.com Signed-off-by: Dave Jiang --- drivers/cxl/core/cdat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index bb83867d9fec..e2e1ccda88fa 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -9,7 +9,6 @@ #include "cxlmem.h" #include "core.h" #include "cxl.h" -#include "core.h" struct dsmas_entry { struct range dpa_range; From dd2617ebd2a69c012001a29274557199409eff39 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 30 Aug 2024 01:31:36 +0000 Subject: [PATCH 136/655] cxl/port: Use __free() to drop put_device() for cxl_port Using scope-based resource management __free() marco with a new helper called put_cxl_port() to drop open coded the put_device() used to dereference the 'struct device' in cxl_port. Suggested-by: Dan Williams Signed-off-by: Li Ming Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20240830013138.2256244-1-ming4.li@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 6 ++---- drivers/cxl/core/port.c | 30 +++++++++++++----------------- drivers/cxl/cxl.h | 1 + drivers/cxl/mem.c | 5 ++--- drivers/cxl/pci.c | 7 ++----- 5 files changed, 20 insertions(+), 29 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 51132a575b27..4725e37d90fb 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -915,15 +915,13 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) struct pci_dev *pdev = to_pci_dev(cxlds->dev); struct aer_capability_regs aer_regs; struct cxl_dport *dport; - struct cxl_port *port; int severity; - port = cxl_pci_find_port(pdev, &dport); + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); if (!port) return; - put_device(&port->dev); - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) return; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index bdd3275ad417..e1e762076476 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1477,12 +1477,11 @@ static void cxl_detach_ep(void *data) .cxlmd = cxlmd, .depth = i, }; - struct device *dev; struct cxl_ep *ep; bool died = false; - dev = bus_find_device(&cxl_bus_type, NULL, &ctx, - port_has_memdev); + struct device *dev __free(put_device) = + bus_find_device(&cxl_bus_type, NULL, &ctx, port_has_memdev); if (!dev) continue; port = to_cxl_port(dev); @@ -1512,7 +1511,6 @@ static void cxl_detach_ep(void *data) dev_name(&port->dev)); delete_switch_port(port); } - put_device(&port->dev); device_unlock(&parent_port->dev); } } @@ -1540,7 +1538,6 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, struct device *dport_dev) { struct device *dparent = grandparent(dport_dev); - struct cxl_port *port, *parent_port = NULL; struct cxl_dport *dport, *parent_dport; resource_size_t component_reg_phys; int rc; @@ -1556,12 +1553,18 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, return -ENXIO; } - parent_port = find_cxl_port(dparent, &parent_dport); + struct cxl_port *parent_port __free(put_cxl_port) = + find_cxl_port(dparent, &parent_dport); if (!parent_port) { /* iterate to create this parent_port */ return -EAGAIN; } + /* + * Definition with __free() here to keep the sequence of + * dereferencing the device of the port before the parent_port releasing. + */ + struct cxl_port *port __free(put_cxl_port) = NULL; device_lock(&parent_port->dev); if (!parent_port->dev.driver) { dev_warn(&cxlmd->dev, @@ -1596,10 +1599,8 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, */ rc = -ENXIO; } - put_device(&port->dev); } - put_device(&parent_port->dev); return rc; } @@ -1630,7 +1631,6 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) struct device *dport_dev = grandparent(iter); struct device *uport_dev; struct cxl_dport *dport; - struct cxl_port *port; /* * The terminal "grandparent" in PCI is NULL and @platform_bus @@ -1649,7 +1649,8 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) dev_dbg(dev, "scan: iter: %s dport_dev: %s parent: %s\n", dev_name(iter), dev_name(dport_dev), dev_name(uport_dev)); - port = find_cxl_port(dport_dev, &dport); + struct cxl_port *port __free(put_cxl_port) = + find_cxl_port(dport_dev, &dport); if (port) { dev_dbg(&cxlmd->dev, "found already registered port %s:%s\n", @@ -1664,18 +1665,13 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) * the parent_port lock as the current port may be being * reaped. */ - if (rc && rc != -EBUSY) { - put_device(&port->dev); + if (rc && rc != -EBUSY) return rc; - } /* Any more ports to add between this one and the root? */ - if (!dev_is_cxl_root_child(&port->dev)) { - put_device(&port->dev); + if (!dev_is_cxl_root_child(&port->dev)) continue; - } - put_device(&port->dev); return 0; } diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 9afb407d438f..84cf3b4d60a1 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -744,6 +744,7 @@ struct cxl_root *find_cxl_root(struct cxl_port *port); void put_cxl_root(struct cxl_root *cxl_root); DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_cxl_root(_T)) +DEFINE_FREE(put_cxl_port, struct cxl_port *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); void cxl_bus_rescan(void); void cxl_bus_drain(void); diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 7de232eaeb17..ab9b8ab8df44 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -109,7 +109,6 @@ static int cxl_mem_probe(struct device *dev) struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *endpoint_parent; - struct cxl_port *parent_port; struct cxl_dport *dport; struct dentry *dentry; int rc; @@ -146,7 +145,8 @@ static int cxl_mem_probe(struct device *dev) if (rc) return rc; - parent_port = cxl_mem_find_port(cxlmd, &dport); + struct cxl_port *parent_port __free(put_cxl_port) = + cxl_mem_find_port(cxlmd, &dport); if (!parent_port) { dev_err(dev, "CXL port topology not found\n"); return -ENXIO; @@ -179,7 +179,6 @@ static int cxl_mem_probe(struct device *dev) rc = devm_cxl_add_endpoint(endpoint_parent, cxlmd, dport); unlock: device_unlock(endpoint_parent); - put_device(&parent_port->dev); if (rc) return rc; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 4be35dc22202..26e75499abdd 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -473,7 +473,6 @@ static bool is_cxl_restricted(struct pci_dev *pdev) static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, struct cxl_register_map *map) { - struct cxl_port *port; struct cxl_dport *dport; resource_size_t component_reg_phys; @@ -482,14 +481,12 @@ static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, .resource = CXL_RESOURCE_NONE, }; - port = cxl_pci_find_port(pdev, &dport); + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); if (!port) return -EPROBE_DEFER; component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); - - put_device(&port->dev); - if (component_reg_phys == CXL_RESOURCE_NONE) return -ENXIO; From 7f569e917b7866b6760e2cfc3a9549cabc890071 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 30 Aug 2024 01:31:37 +0000 Subject: [PATCH 137/655] cxl/port: Use scoped_guard()/guard() to drop device_lock() for cxl_port A device_lock() and device_unlock() pair can be replaced by a cleanup helper scoped_guard() or guard(), that can enhance code readability. In CXL subsystem, still use device_lock() and device_unlock() pairs for cxl port resource protection, most of them can be replaced by a scoped_guard() or a guard() simply. Suggested-by: Dan Williams Signed-off-by: Li Ming Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20240830013138.2256244-2-ming4.li@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 3 +- drivers/cxl/core/port.c | 99 +++++++++++++++++---------------------- drivers/cxl/core/region.c | 25 +++++----- drivers/cxl/mem.c | 22 ++++----- drivers/cxl/pmem.c | 16 +++---- 5 files changed, 75 insertions(+), 90 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index e5cdeafdf76e..0a913b30c7fc 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1214,7 +1214,7 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) int rc; /* synchronize with cxl_mem_probe() and decoder write operations */ - device_lock(&cxlmd->dev); + guard(device)(&cxlmd->dev); endpoint = cxlmd->endpoint; down_read(&cxl_region_rwsem); /* @@ -1226,7 +1226,6 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) else rc = -EBUSY; up_read(&cxl_region_rwsem); - device_unlock(&cxlmd->dev); return rc; } diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index e1e762076476..ebd7ddadbb54 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1258,18 +1258,13 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_add_rch_dport, CXL); static int add_ep(struct cxl_ep *new) { struct cxl_port *port = new->dport->port; - int rc; - device_lock(&port->dev); - if (port->dead) { - device_unlock(&port->dev); + guard(device)(&port->dev); + if (port->dead) return -ENXIO; - } - rc = xa_insert(&port->endpoints, (unsigned long)new->ep, new, - GFP_KERNEL); - device_unlock(&port->dev); - return rc; + return xa_insert(&port->endpoints, (unsigned long)new->ep, + new, GFP_KERNEL); } /** @@ -1393,14 +1388,14 @@ static void delete_endpoint(void *data) struct cxl_port *endpoint = cxlmd->endpoint; struct device *host = endpoint_host(endpoint); - device_lock(host); - if (host->driver && !endpoint->dead) { - devm_release_action(host, cxl_unlink_parent_dport, endpoint); - devm_release_action(host, cxl_unlink_uport, endpoint); - devm_release_action(host, unregister_port, endpoint); + scoped_guard(device, host) { + if (host->driver && !endpoint->dead) { + devm_release_action(host, cxl_unlink_parent_dport, endpoint); + devm_release_action(host, cxl_unlink_uport, endpoint); + devm_release_action(host, unregister_port, endpoint); + } + cxlmd->endpoint = NULL; } - cxlmd->endpoint = NULL; - device_unlock(host); put_device(&endpoint->dev); put_device(host); } @@ -1565,40 +1560,38 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, * dereferencing the device of the port before the parent_port releasing. */ struct cxl_port *port __free(put_cxl_port) = NULL; - device_lock(&parent_port->dev); - if (!parent_port->dev.driver) { - dev_warn(&cxlmd->dev, - "port %s:%s disabled, failed to enumerate CXL.mem\n", - dev_name(&parent_port->dev), dev_name(uport_dev)); - port = ERR_PTR(-ENXIO); - goto out; - } - - port = find_cxl_port_at(parent_port, dport_dev, &dport); - if (!port) { - component_reg_phys = find_component_registers(uport_dev); - port = devm_cxl_add_port(&parent_port->dev, uport_dev, - component_reg_phys, parent_dport); - /* retry find to pick up the new dport information */ - if (!IS_ERR(port)) - port = find_cxl_port_at(parent_port, dport_dev, &dport); - } -out: - device_unlock(&parent_port->dev); - - if (IS_ERR(port)) - rc = PTR_ERR(port); - else { - dev_dbg(&cxlmd->dev, "add to new port %s:%s\n", - dev_name(&port->dev), dev_name(port->uport_dev)); - rc = cxl_add_ep(dport, &cxlmd->dev); - if (rc == -EBUSY) { - /* - * "can't" happen, but this error code means - * something to the caller, so translate it. - */ - rc = -ENXIO; + scoped_guard(device, &parent_port->dev) { + if (!parent_port->dev.driver) { + dev_warn(&cxlmd->dev, + "port %s:%s disabled, failed to enumerate CXL.mem\n", + dev_name(&parent_port->dev), dev_name(uport_dev)); + return -ENXIO; } + + port = find_cxl_port_at(parent_port, dport_dev, &dport); + if (!port) { + component_reg_phys = find_component_registers(uport_dev); + port = devm_cxl_add_port(&parent_port->dev, uport_dev, + component_reg_phys, parent_dport); + if (IS_ERR(port)) + return PTR_ERR(port); + + /* retry find to pick up the new dport information */ + port = find_cxl_port_at(parent_port, dport_dev, &dport); + if (!port) + return -ENXIO; + } + } + + dev_dbg(&cxlmd->dev, "add to new port %s:%s\n", + dev_name(&port->dev), dev_name(port->uport_dev)); + rc = cxl_add_ep(dport, &cxlmd->dev); + if (rc == -EBUSY) { + /* + * "can't" happen, but this error code means + * something to the caller, so translate it. + */ + rc = -ENXIO; } return rc; @@ -1979,7 +1972,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, CXL); int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) { struct cxl_port *port; - int rc; if (WARN_ON_ONCE(!cxld)) return -EINVAL; @@ -1989,11 +1981,8 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) port = to_cxl_port(cxld->dev.parent); - device_lock(&port->dev); - rc = cxl_decoder_add_locked(cxld, target_map); - device_unlock(&port->dev); - - return rc; + guard(device)(&port->dev); + return cxl_decoder_add_locked(cxld, target_map); } EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 21ad5f242875..9a7c001eff1e 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3094,11 +3094,11 @@ static void cxlr_release_nvdimm(void *_cxlr) struct cxl_region *cxlr = _cxlr; struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb; - device_lock(&cxl_nvb->dev); - if (cxlr->cxlr_pmem) - devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister, - cxlr->cxlr_pmem); - device_unlock(&cxl_nvb->dev); + scoped_guard(device, &cxl_nvb->dev) { + if (cxlr->cxlr_pmem) + devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister, + cxlr->cxlr_pmem); + } cxlr->cxl_nvb = NULL; put_device(&cxl_nvb->dev); } @@ -3134,13 +3134,14 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr) dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), dev_name(dev)); - device_lock(&cxl_nvb->dev); - if (cxl_nvb->dev.driver) - rc = devm_add_action_or_reset(&cxl_nvb->dev, - cxlr_pmem_unregister, cxlr_pmem); - else - rc = -ENXIO; - device_unlock(&cxl_nvb->dev); + scoped_guard(device, &cxl_nvb->dev) { + if (cxl_nvb->dev.driver) + rc = devm_add_action_or_reset(&cxl_nvb->dev, + cxlr_pmem_unregister, + cxlr_pmem); + else + rc = -ENXIO; + } if (rc) goto err_bridge; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index ab9b8ab8df44..ae94018a01bd 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -168,19 +168,17 @@ static int cxl_mem_probe(struct device *dev) cxl_setup_parent_dport(dev, dport); - device_lock(endpoint_parent); - if (!endpoint_parent->driver) { - dev_err(dev, "CXL port topology %s not enabled\n", - dev_name(endpoint_parent)); - rc = -ENXIO; - goto unlock; - } + scoped_guard(device, endpoint_parent) { + if (!endpoint_parent->driver) { + dev_err(dev, "CXL port topology %s not enabled\n", + dev_name(endpoint_parent)); + return -ENXIO; + } - rc = devm_cxl_add_endpoint(endpoint_parent, cxlmd, dport); -unlock: - device_unlock(endpoint_parent); - if (rc) - return rc; + rc = devm_cxl_add_endpoint(endpoint_parent, cxlmd, dport); + if (rc) + return rc; + } /* * The kernel may be operating out of CXL memory on this device, diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 4ef93da22335..647c7e25ef3a 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -233,15 +233,13 @@ static int detach_nvdimm(struct device *dev, void *data) if (!is_cxl_nvdimm(dev)) return 0; - device_lock(dev); - if (!dev->driver) - goto out; - - cxl_nvd = to_cxl_nvdimm(dev); - if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data) - release = true; -out: - device_unlock(dev); + scoped_guard(device, dev) { + if (dev->driver) { + cxl_nvd = to_cxl_nvdimm(dev); + if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data) + release = true; + } + } if (release) device_release_driver(dev); return 0; From 91c0e9d6a205ab0e318dc0dc6e72cbbdb21f8094 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 30 Aug 2024 01:31:38 +0000 Subject: [PATCH 138/655] cxl/port: Refactor __devm_cxl_add_port() to drop goto pattern In __devm_cxl_add_port(), there is a 'goto' to call put_device() for the error cases between device_initialize() and device_add() to dereference the 'struct device' of a new cxl_port. The 'goto' pattern in the case can be removed by refactoring. Introducing a new function called cxl_port_add() which is used to add the 'struct device' of a new cxl_port to device hierarchy, moving the functions needing the help of the 'goto' into cxl_port_add(), and using a scoped-based resource management __free() to drop the open coded put_device() and the 'goto' for the error cases. Suggested-by: Dan Williams Signed-off-by: Li Ming Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20240830013138.2256244-3-ming4.li@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/port.c | 59 ++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index ebd7ddadbb54..7ca57285a7b7 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -828,27 +828,20 @@ static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport) &cxl_einj_inject_fops); } -static struct cxl_port *__devm_cxl_add_port(struct device *host, - struct device *uport_dev, - resource_size_t component_reg_phys, - struct cxl_dport *parent_dport) +static int cxl_port_add(struct cxl_port *port, + resource_size_t component_reg_phys, + struct cxl_dport *parent_dport) { - struct cxl_port *port; - struct device *dev; + struct device *dev __free(put_device) = &port->dev; int rc; - port = cxl_port_alloc(uport_dev, parent_dport); - if (IS_ERR(port)) - return port; - - dev = &port->dev; - if (is_cxl_memdev(uport_dev)) { - struct cxl_memdev *cxlmd = to_cxl_memdev(uport_dev); + if (is_cxl_memdev(port->uport_dev)) { + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; rc = dev_set_name(dev, "endpoint%d", port->id); if (rc) - goto err; + return rc; /* * The endpoint driver already enumerated the component and RAS @@ -861,19 +854,41 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, } else if (parent_dport) { rc = dev_set_name(dev, "port%d", port->id); if (rc) - goto err; + return rc; rc = cxl_port_setup_regs(port, component_reg_phys); if (rc) - goto err; - } else + return rc; + } else { rc = dev_set_name(dev, "root%d", port->id); - if (rc) - goto err; + if (rc) + return rc; + } rc = device_add(dev); if (rc) - goto err; + return rc; + + /* Inhibit the cleanup function invoked */ + dev = NULL; + return 0; +} + +static struct cxl_port *__devm_cxl_add_port(struct device *host, + struct device *uport_dev, + resource_size_t component_reg_phys, + struct cxl_dport *parent_dport) +{ + struct cxl_port *port; + int rc; + + port = cxl_port_alloc(uport_dev, parent_dport); + if (IS_ERR(port)) + return port; + + rc = cxl_port_add(port, component_reg_phys, parent_dport); + if (rc) + return ERR_PTR(rc); rc = devm_add_action_or_reset(host, unregister_port, port); if (rc) @@ -891,10 +906,6 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, port->pci_latency = cxl_pci_get_latency(to_pci_dev(uport_dev)); return port; - -err: - put_device(dev); - return ERR_PTR(rc); } /** From 577a67662ff529f617981fe9692ff277b5756402 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 30 Aug 2024 06:13:06 +0000 Subject: [PATCH 139/655] cxl/pci: Rename cxl_setup_parent_dport() and cxl_dport_map_regs() The name of cxl_setup_parent_dport() function is not clear, the function is used to initialize AER and RAS capabilities on a dport, therefore, rename the function to cxl_dport_init_ras_reporting(), it is easier for user to understand what the function does. Besides, adjust the order of the function parameters, the subject of cxl_dport_init_ras_reporting() is a cxl dport, so a struct cxl_dport as the first parameter of the function should be better. cxl_dport_map_regs() is used to map CXL RAS capability on a cxl dport, using cxl_dport_map_ras() as the function name. Signed-off-by: Li Ming Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20240830061308.2327065-1-ming4.li@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 13 +++++++++---- drivers/cxl/cxl.h | 5 +++-- drivers/cxl/mem.c | 2 +- tools/testing/cxl/Kbuild | 2 +- tools/testing/cxl/test/mock.c | 6 +++--- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 4725e37d90fb..cca078068a05 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -787,7 +787,7 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport) dport->regs.dport_aer = dport_aer; } -static void cxl_dport_map_regs(struct cxl_dport *dport) +static void cxl_dport_map_ras(struct cxl_dport *dport) { struct cxl_register_map *map = &dport->reg_map; struct device *dev = dport->dport_dev; @@ -831,7 +831,12 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) } } -void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +/** + * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport + * @dport: the cxl_dport that needs to be initialized + * @host: host device for devm operations + */ +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) { struct device *dport_dev = dport->dport_dev; @@ -843,12 +848,12 @@ void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) } dport->reg_map.host = host; - cxl_dport_map_regs(dport); + cxl_dport_map_ras(dport); if (dport->rch) cxl_disable_rch_root_ints(dport); } -EXPORT_SYMBOL_NS_GPL(cxl_setup_parent_dport, CXL); +EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, CXL); static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, struct cxl_dport *dport) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 84cf3b4d60a1..2890f7e2aad5 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -763,9 +763,10 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, #ifdef CONFIG_PCIEAER_CXL void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport); +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); #else -static inline void cxl_setup_parent_dport(struct device *host, - struct cxl_dport *dport) { } +static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, + struct device *host) { } #endif struct cxl_decoder *to_cxl_decoder(struct device *dev); diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index ae94018a01bd..a9fd5cd5a0d2 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -166,7 +166,7 @@ static int cxl_mem_probe(struct device *dev) else endpoint_parent = &parent_port->dev; - cxl_setup_parent_dport(dev, dport); + cxl_dport_init_ras_reporting(dport, dev); scoped_guard(device, endpoint_parent) { if (!endpoint_parent->driver) { diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 3d1ca9e38b1f..b1256fee3567 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -14,7 +14,7 @@ ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat -ldflags-y += --wrap=cxl_setup_parent_dport +ldflags-y += --wrap=cxl_dport_init_ras_reporting DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index d619672faa49..bbd7d938156d 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -299,17 +299,17 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); -void __wrap_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) { int index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (!ops || !ops->is_mock_port(dport->dport_dev)) - cxl_setup_parent_dport(host, dport); + cxl_dport_init_ras_reporting(dport, host); put_cxl_mock_ops(index); } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_setup_parent_dport, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, CXL); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); From c8706cc15a5814becacff778017bbcc5c031490a Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 30 Aug 2024 06:13:07 +0000 Subject: [PATCH 140/655] cxl/pci: cxl_dport_map_rch_aer() cleanup cxl_dport_map_ras() is used to map CXL RAS capability, the RCH AER capability should not be mapped in the function but should mapped in cxl_dport_init_ras_reporting(). Moving cxl_dport_map_ras() out of cxl_dport_map_ras() and into cxl_dport_init_ras_reporting(). In cxl_dport_init_ras_reporting(), the AER capability position in RCRB will be located but the position is only used in cxl_dport_map_rch_aer(), getting the position in cxl_dport_map_rch_aer() rather than cxl_dport_init_ras_reporting() is more reasonable and makes the code clearer. Besides, some local variables in cxl_dport_map_rch_aer() are unnecessary, remove them to make the function more concise. Signed-off-by: Li Ming Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20240830061308.2327065-2-ming4.li@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index cca078068a05..ccd46843e1e7 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -772,19 +772,17 @@ static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) static void cxl_dport_map_rch_aer(struct cxl_dport *dport) { - struct cxl_rcrb_info *ri = &dport->rcrb; - void __iomem *dport_aer = NULL; resource_size_t aer_phys; struct device *host; + u16 aer_cap; - if (dport->rch && ri->aer_cap) { + aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base); + if (aer_cap) { host = dport->reg_map.host; - aer_phys = ri->aer_cap + ri->base; - dport_aer = devm_cxl_iomap_block(host, aer_phys, - sizeof(struct aer_capability_regs)); + aer_phys = aer_cap + dport->rcrb.base; + dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys, + sizeof(struct aer_capability_regs)); } - - dport->regs.dport_aer = dport_aer; } static void cxl_dport_map_ras(struct cxl_dport *dport) @@ -797,9 +795,6 @@ static void cxl_dport_map_ras(struct cxl_dport *dport) else if (cxl_map_component_regs(map, &dport->regs.component, BIT(CXL_CM_CAP_CAP_ID_RAS))) dev_dbg(dev, "Failed to map RAS capability.\n"); - - if (dport->rch) - cxl_dport_map_rch_aer(dport); } static void cxl_disable_rch_root_ints(struct cxl_dport *dport) @@ -838,20 +833,18 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) */ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) { - struct device *dport_dev = dport->dport_dev; - - if (dport->rch) { - struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport_dev); - - if (host_bridge->native_aer) - dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); - } - dport->reg_map.host = host; cxl_dport_map_ras(dport); - if (dport->rch) + if (dport->rch) { + struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); + + if (!host_bridge->native_aer) + return; + + cxl_dport_map_rch_aer(dport); cxl_disable_rch_root_ints(dport); + } } EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, CXL); From d75ccd4f2ea2aa2679e6c807d76953dcd3f9d56d Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 30 Aug 2024 06:13:08 +0000 Subject: [PATCH 141/655] cxl/pci: Remove duplicate host_bridge->native_aer checking cxl_dport_init_ras_reporting() already checks host_bridge->native_aer before invoking cxl_disable_rch_root_ints(), so cxl_disable_rch_root_ints() does not need to check it again. Signed-off-by: Li Ming Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20240830061308.2327065-3-ming4.li@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index ccd46843e1e7..5e5c24eed545 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -800,14 +800,11 @@ static void cxl_dport_map_ras(struct cxl_dport *dport) static void cxl_disable_rch_root_ints(struct cxl_dport *dport) { void __iomem *aer_base = dport->regs.dport_aer; - struct pci_host_bridge *bridge; u32 aer_cmd_mask, aer_cmd; if (!aer_base) return; - bridge = to_pci_host_bridge(dport->dport_dev); - /* * Disable RCH root port command interrupts. * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors @@ -816,14 +813,12 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) * the root cmd register's interrupts is required. But, PCI spec * shows these are disabled by default on reset. */ - if (bridge->native_aer) { - aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | - PCI_ERR_ROOT_CMD_NONFATAL_EN | - PCI_ERR_ROOT_CMD_FATAL_EN); - aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); - aer_cmd &= ~aer_cmd_mask; - writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); - } + aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | + PCI_ERR_ROOT_CMD_NONFATAL_EN | + PCI_ERR_ROOT_CMD_FATAL_EN); + aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); + aer_cmd &= ~aer_cmd_mask; + writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); } /** From 44d17459626052a2390457e550a12cb973506b2f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:51 -0700 Subject: [PATCH 142/655] KVM: Use dedicated mutex to protect kvm_usage_count to avoid deadlock Use a dedicated mutex to guard kvm_usage_count to fix a potential deadlock on x86 due to a chain of locks and SRCU synchronizations. Translating the below lockdep splat, CPU1 #6 will wait on CPU0 #1, CPU0 #8 will wait on CPU2 #3, and CPU2 #7 will wait on CPU1 #4 (if there's a writer, due to the fairness of r/w semaphores). CPU0 CPU1 CPU2 1 lock(&kvm->slots_lock); 2 lock(&vcpu->mutex); 3 lock(&kvm->srcu); 4 lock(cpu_hotplug_lock); 5 lock(kvm_lock); 6 lock(&kvm->slots_lock); 7 lock(cpu_hotplug_lock); 8 sync(&kvm->srcu); Note, there are likely more potential deadlocks in KVM x86, e.g. the same pattern of taking cpu_hotplug_lock outside of kvm_lock likely exists with __kvmclock_cpufreq_notifier(): cpuhp_cpufreq_online() | -> cpufreq_online() | -> cpufreq_gov_performance_limits() | -> __cpufreq_driver_target() | -> __target_index() | -> cpufreq_freq_transition_begin() | -> cpufreq_notify_transition() | -> ... __kvmclock_cpufreq_notifier() But, actually triggering such deadlocks is beyond rare due to the combination of dependencies and timings involved. E.g. the cpufreq notifier is only used on older CPUs without a constant TSC, mucking with the NX hugepage mitigation while VMs are running is very uncommon, and doing so while also onlining/offlining a CPU (necessary to generate contention on cpu_hotplug_lock) would be even more unusual. The most robust solution to the general cpu_hotplug_lock issue is likely to switch vm_list to be an RCU-protected list, e.g. so that x86's cpufreq notifier doesn't to take kvm_lock. For now, settle for fixing the most blatant deadlock, as switching to an RCU-protected list is a much more involved change, but add a comment in locking.rst to call out that care needs to be taken when walking holding kvm_lock and walking vm_list. ====================================================== WARNING: possible circular locking dependency detected 6.10.0-smp--c257535a0c9d-pip #330 Tainted: G S O ------------------------------------------------------ tee/35048 is trying to acquire lock: ff6a80eced71e0a8 (&kvm->slots_lock){+.+.}-{3:3}, at: set_nx_huge_pages+0x179/0x1e0 [kvm] but task is already holding lock: ffffffffc07abb08 (kvm_lock){+.+.}-{3:3}, at: set_nx_huge_pages+0x14a/0x1e0 [kvm] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (kvm_lock){+.+.}-{3:3}: __mutex_lock+0x6a/0xb40 mutex_lock_nested+0x1f/0x30 kvm_dev_ioctl+0x4fb/0xe50 [kvm] __se_sys_ioctl+0x7b/0xd0 __x64_sys_ioctl+0x21/0x30 x64_sys_call+0x15d0/0x2e60 do_syscall_64+0x83/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #2 (cpu_hotplug_lock){++++}-{0:0}: cpus_read_lock+0x2e/0xb0 static_key_slow_inc+0x16/0x30 kvm_lapic_set_base+0x6a/0x1c0 [kvm] kvm_set_apic_base+0x8f/0xe0 [kvm] kvm_set_msr_common+0x9ae/0xf80 [kvm] vmx_set_msr+0xa54/0xbe0 [kvm_intel] __kvm_set_msr+0xb6/0x1a0 [kvm] kvm_arch_vcpu_ioctl+0xeca/0x10c0 [kvm] kvm_vcpu_ioctl+0x485/0x5b0 [kvm] __se_sys_ioctl+0x7b/0xd0 __x64_sys_ioctl+0x21/0x30 x64_sys_call+0x15d0/0x2e60 do_syscall_64+0x83/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #1 (&kvm->srcu){.+.+}-{0:0}: __synchronize_srcu+0x44/0x1a0 synchronize_srcu_expedited+0x21/0x30 kvm_swap_active_memslots+0x110/0x1c0 [kvm] kvm_set_memslot+0x360/0x620 [kvm] __kvm_set_memory_region+0x27b/0x300 [kvm] kvm_vm_ioctl_set_memory_region+0x43/0x60 [kvm] kvm_vm_ioctl+0x295/0x650 [kvm] __se_sys_ioctl+0x7b/0xd0 __x64_sys_ioctl+0x21/0x30 x64_sys_call+0x15d0/0x2e60 do_syscall_64+0x83/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #0 (&kvm->slots_lock){+.+.}-{3:3}: __lock_acquire+0x15ef/0x2e30 lock_acquire+0xe0/0x260 __mutex_lock+0x6a/0xb40 mutex_lock_nested+0x1f/0x30 set_nx_huge_pages+0x179/0x1e0 [kvm] param_attr_store+0x93/0x100 module_attr_store+0x22/0x40 sysfs_kf_write+0x81/0xb0 kernfs_fop_write_iter+0x133/0x1d0 vfs_write+0x28d/0x380 ksys_write+0x70/0xe0 __x64_sys_write+0x1f/0x30 x64_sys_call+0x281b/0x2e60 do_syscall_64+0x83/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e Cc: Chao Gao Fixes: 0bf50497f03b ("KVM: Drop kvm_count_lock and instead protect kvm_usage_count with kvm_lock") Cc: stable@vger.kernel.org Reviewed-by: Kai Huang Acked-by: Kai Huang Tested-by: Farrah Chen Signed-off-by: Sean Christopherson Message-ID: <20240830043600.127750-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/locking.rst | 32 +++++++++++++++++++++--------- virt/kvm/kvm_main.c | 31 +++++++++++++++-------------- 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index 02880d5552d5..c0cb5ce51c1e 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -9,7 +9,7 @@ KVM Lock Overview The acquisition orders for mutexes are as follows: -- cpus_read_lock() is taken outside kvm_lock +- cpus_read_lock() is taken outside kvm_lock and kvm_usage_lock - kvm->lock is taken outside vcpu->mutex @@ -24,6 +24,12 @@ The acquisition orders for mutexes are as follows: are taken on the waiting side when modifying memslots, so MMU notifiers must not take either kvm->slots_lock or kvm->slots_arch_lock. +cpus_read_lock() vs kvm_lock: +- Taking cpus_read_lock() outside of kvm_lock is problematic, despite that + being the official ordering, as it is quite easy to unknowingly trigger + cpus_read_lock() while holding kvm_lock. Use caution when walking vm_list, + e.g. avoid complex operations when possible. + For SRCU: - ``synchronize_srcu(&kvm->srcu)`` is called inside critical sections @@ -227,10 +233,17 @@ time it will be set using the Dirty tracking mechanism described above. :Type: mutex :Arch: any :Protects: - vm_list - - kvm_usage_count + +``kvm_usage_lock`` +^^^^^^^^^^^^^^^^^^ + +:Type: mutex +:Arch: any +:Protects: - kvm_usage_count - hardware virtualization enable/disable -:Comment: KVM also disables CPU hotplug via cpus_read_lock() during - enable/disable. +:Comment: Exists because using kvm_lock leads to deadlock (see earlier comment + on cpus_read_lock() vs kvm_lock). Note, KVM also disables CPU hotplug via + cpus_read_lock() when enabling/disabling virtualization. ``kvm->mn_invalidate_lock`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -290,11 +303,12 @@ time it will be set using the Dirty tracking mechanism described above. wakeup. ``vendor_module_lock`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^ :Type: mutex :Arch: x86 :Protects: loading a vendor module (kvm_amd or kvm_intel) -:Comment: Exists because using kvm_lock leads to deadlock. cpu_hotplug_lock is - taken outside of kvm_lock, e.g. in KVM's CPU online/offline callbacks, and - many operations need to take cpu_hotplug_lock when loading a vendor module, - e.g. updating static calls. +:Comment: Exists because using kvm_lock leads to deadlock. kvm_lock is taken + in notifiers, e.g. __kvmclock_cpufreq_notifier(), that may be invoked while + cpu_hotplug_lock is held, e.g. from cpufreq_boost_trigger_state(), and many + operations need to take cpu_hotplug_lock when loading a vendor module, e.g. + updating static calls. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cb2b78e92910..7164a9ece208 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5575,6 +5575,7 @@ __visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); static DEFINE_PER_CPU(bool, hardware_enabled); +static DEFINE_MUTEX(kvm_usage_lock); static int kvm_usage_count; static int __hardware_enable_nolock(void) @@ -5607,10 +5608,10 @@ static int kvm_online_cpu(unsigned int cpu) * be enabled. Otherwise running VMs would encounter unrecoverable * errors when scheduled to this CPU. */ - mutex_lock(&kvm_lock); + mutex_lock(&kvm_usage_lock); if (kvm_usage_count) ret = __hardware_enable_nolock(); - mutex_unlock(&kvm_lock); + mutex_unlock(&kvm_usage_lock); return ret; } @@ -5630,10 +5631,10 @@ static void hardware_disable_nolock(void *junk) static int kvm_offline_cpu(unsigned int cpu) { - mutex_lock(&kvm_lock); + mutex_lock(&kvm_usage_lock); if (kvm_usage_count) hardware_disable_nolock(NULL); - mutex_unlock(&kvm_lock); + mutex_unlock(&kvm_usage_lock); return 0; } @@ -5649,9 +5650,9 @@ static void hardware_disable_all_nolock(void) static void hardware_disable_all(void) { cpus_read_lock(); - mutex_lock(&kvm_lock); + mutex_lock(&kvm_usage_lock); hardware_disable_all_nolock(); - mutex_unlock(&kvm_lock); + mutex_unlock(&kvm_usage_lock); cpus_read_unlock(); } @@ -5682,7 +5683,7 @@ static int hardware_enable_all(void) * enable hardware multiple times. */ cpus_read_lock(); - mutex_lock(&kvm_lock); + mutex_lock(&kvm_usage_lock); r = 0; @@ -5696,7 +5697,7 @@ static int hardware_enable_all(void) } } - mutex_unlock(&kvm_lock); + mutex_unlock(&kvm_usage_lock); cpus_read_unlock(); return r; @@ -5724,13 +5725,13 @@ static int kvm_suspend(void) { /* * Secondary CPUs and CPU hotplug are disabled across the suspend/resume - * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count - * is stable. Assert that kvm_lock is not held to ensure the system - * isn't suspended while KVM is enabling hardware. Hardware enabling - * can be preempted, but the task cannot be frozen until it has dropped - * all locks (userspace tasks are frozen via a fake signal). + * callbacks, i.e. no need to acquire kvm_usage_lock to ensure the usage + * count is stable. Assert that kvm_usage_lock is not held to ensure + * the system isn't suspended while KVM is enabling hardware. Hardware + * enabling can be preempted, but the task cannot be frozen until it has + * dropped all locks (userspace tasks are frozen via a fake signal). */ - lockdep_assert_not_held(&kvm_lock); + lockdep_assert_not_held(&kvm_usage_lock); lockdep_assert_irqs_disabled(); if (kvm_usage_count) @@ -5740,7 +5741,7 @@ static int kvm_suspend(void) static void kvm_resume(void) { - lockdep_assert_not_held(&kvm_lock); + lockdep_assert_not_held(&kvm_usage_lock); lockdep_assert_irqs_disabled(); if (kvm_usage_count) From 9a798b1337afe4fdcce53efa77953b068d8614f5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:52 -0700 Subject: [PATCH 143/655] KVM: Register cpuhp and syscore callbacks when enabling hardware Register KVM's cpuhp and syscore callback when enabling virtualization in hardware instead of registering the callbacks during initialization, and let the CPU up/down framework invoke the inner enable/disable functions. Registering the callbacks during initialization makes things more complex than they need to be, as KVM needs to be very careful about handling races between enabling CPUs being onlined/offlined and hardware being enabled/disabled. Intel TDX support will require KVM to enable virtualization during KVM initialization, i.e. will add another wrinkle to things, at which point sorting out the potential races with kvm_usage_count would become even more complex. Note, using the cpuhp framework has a subtle behavioral change: enabling will be done serially across all CPUs, whereas KVM currently sends an IPI to all CPUs in parallel. While serializing virtualization enabling could create undesirable latency, the issue is limited to the 0=>1 transition of VM creation. And even that can be mitigated, e.g. by letting userspace force virtualization to be enabled when KVM is initialized. Cc: Chao Gao Reviewed-by: Kai Huang Acked-by: Kai Huang Tested-by: Farrah Chen Signed-off-by: Sean Christopherson Message-ID: <20240830043600.127750-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/locking.rst | 9 +- virt/kvm/kvm_main.c | 174 ++++++++++------------------- 2 files changed, 66 insertions(+), 117 deletions(-) diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index c0cb5ce51c1e..be3c323888b1 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -9,7 +9,9 @@ KVM Lock Overview The acquisition orders for mutexes are as follows: -- cpus_read_lock() is taken outside kvm_lock and kvm_usage_lock +- cpus_read_lock() is taken outside kvm_lock + +- kvm_usage_lock is taken outside cpus_read_lock() - kvm->lock is taken outside vcpu->mutex @@ -241,9 +243,8 @@ time it will be set using the Dirty tracking mechanism described above. :Arch: any :Protects: - kvm_usage_count - hardware virtualization enable/disable -:Comment: Exists because using kvm_lock leads to deadlock (see earlier comment - on cpus_read_lock() vs kvm_lock). Note, KVM also disables CPU hotplug via - cpus_read_lock() when enabling/disabling virtualization. +:Comment: Exists to allow taking cpus_read_lock() while kvm_usage_count is + protected, which simplifies the virtualization enabling logic. ``kvm->mn_invalidate_lock`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7164a9ece208..9ad8903b575a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5578,7 +5578,7 @@ static DEFINE_PER_CPU(bool, hardware_enabled); static DEFINE_MUTEX(kvm_usage_lock); static int kvm_usage_count; -static int __hardware_enable_nolock(void) +static int hardware_enable_nolock(void) { if (__this_cpu_read(hardware_enabled)) return 0; @@ -5593,34 +5593,18 @@ static int __hardware_enable_nolock(void) return 0; } -static void hardware_enable_nolock(void *failed) -{ - if (__hardware_enable_nolock()) - atomic_inc(failed); -} - static int kvm_online_cpu(unsigned int cpu) { - int ret = 0; - /* * Abort the CPU online process if hardware virtualization cannot * be enabled. Otherwise running VMs would encounter unrecoverable * errors when scheduled to this CPU. */ - mutex_lock(&kvm_usage_lock); - if (kvm_usage_count) - ret = __hardware_enable_nolock(); - mutex_unlock(&kvm_usage_lock); - return ret; + return hardware_enable_nolock(); } static void hardware_disable_nolock(void *junk) { - /* - * Note, hardware_disable_all_nolock() tells all online CPUs to disable - * hardware, not just CPUs that successfully enabled hardware! - */ if (!__this_cpu_read(hardware_enabled)) return; @@ -5631,78 +5615,10 @@ static void hardware_disable_nolock(void *junk) static int kvm_offline_cpu(unsigned int cpu) { - mutex_lock(&kvm_usage_lock); - if (kvm_usage_count) - hardware_disable_nolock(NULL); - mutex_unlock(&kvm_usage_lock); + hardware_disable_nolock(NULL); return 0; } -static void hardware_disable_all_nolock(void) -{ - BUG_ON(!kvm_usage_count); - - kvm_usage_count--; - if (!kvm_usage_count) - on_each_cpu(hardware_disable_nolock, NULL, 1); -} - -static void hardware_disable_all(void) -{ - cpus_read_lock(); - mutex_lock(&kvm_usage_lock); - hardware_disable_all_nolock(); - mutex_unlock(&kvm_usage_lock); - cpus_read_unlock(); -} - -static int hardware_enable_all(void) -{ - atomic_t failed = ATOMIC_INIT(0); - int r; - - /* - * Do not enable hardware virtualization if the system is going down. - * If userspace initiated a forced reboot, e.g. reboot -f, then it's - * possible for an in-flight KVM_CREATE_VM to trigger hardware enabling - * after kvm_reboot() is called. Note, this relies on system_state - * being set _before_ kvm_reboot(), which is why KVM uses a syscore ops - * hook instead of registering a dedicated reboot notifier (the latter - * runs before system_state is updated). - */ - if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF || - system_state == SYSTEM_RESTART) - return -EBUSY; - - /* - * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu() - * is called, and so on_each_cpu() between them includes the CPU that - * is being onlined. As a result, hardware_enable_nolock() may get - * invoked before kvm_online_cpu(), which also enables hardware if the - * usage count is non-zero. Disable CPU hotplug to avoid attempting to - * enable hardware multiple times. - */ - cpus_read_lock(); - mutex_lock(&kvm_usage_lock); - - r = 0; - - kvm_usage_count++; - if (kvm_usage_count == 1) { - on_each_cpu(hardware_enable_nolock, &failed, 1); - - if (atomic_read(&failed)) { - hardware_disable_all_nolock(); - r = -EBUSY; - } - } - - mutex_unlock(&kvm_usage_lock); - cpus_read_unlock(); - - return r; -} - static void kvm_shutdown(void) { /* @@ -5734,8 +5650,7 @@ static int kvm_suspend(void) lockdep_assert_not_held(&kvm_usage_lock); lockdep_assert_irqs_disabled(); - if (kvm_usage_count) - hardware_disable_nolock(NULL); + hardware_disable_nolock(NULL); return 0; } @@ -5744,8 +5659,7 @@ static void kvm_resume(void) lockdep_assert_not_held(&kvm_usage_lock); lockdep_assert_irqs_disabled(); - if (kvm_usage_count) - WARN_ON_ONCE(__hardware_enable_nolock()); + WARN_ON_ONCE(hardware_enable_nolock()); } static struct syscore_ops kvm_syscore_ops = { @@ -5753,6 +5667,60 @@ static struct syscore_ops kvm_syscore_ops = { .resume = kvm_resume, .shutdown = kvm_shutdown, }; + +static int hardware_enable_all(void) +{ + int r; + + guard(mutex)(&kvm_usage_lock); + + if (kvm_usage_count++) + return 0; + + r = cpuhp_setup_state(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online", + kvm_online_cpu, kvm_offline_cpu); + if (r) + goto err_cpuhp; + + register_syscore_ops(&kvm_syscore_ops); + + /* + * Undo virtualization enabling and bail if the system is going down. + * If userspace initiated a forced reboot, e.g. reboot -f, then it's + * possible for an in-flight operation to enable virtualization after + * syscore_shutdown() is called, i.e. without kvm_shutdown() being + * invoked. Note, this relies on system_state being set _before_ + * kvm_shutdown(), e.g. to ensure either kvm_shutdown() is invoked + * or this CPU observes the impending shutdown. Which is why KVM uses + * a syscore ops hook instead of registering a dedicated reboot + * notifier (the latter runs before system_state is updated). + */ + if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF || + system_state == SYSTEM_RESTART) { + r = -EBUSY; + goto err_rebooting; + } + + return 0; + +err_rebooting: + unregister_syscore_ops(&kvm_syscore_ops); + cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); +err_cpuhp: + --kvm_usage_count; + return r; +} + +static void hardware_disable_all(void) +{ + guard(mutex)(&kvm_usage_lock); + + if (--kvm_usage_count) + return; + + unregister_syscore_ops(&kvm_syscore_ops); + cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); +} #else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ static int hardware_enable_all(void) { @@ -6461,15 +6429,6 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) int r; int cpu; -#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING - r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online", - kvm_online_cpu, kvm_offline_cpu); - if (r) - return r; - - register_syscore_ops(&kvm_syscore_ops); -#endif - /* A kmem cache lets us meet the alignment requirements of fx_save. */ if (!vcpu_align) vcpu_align = __alignof__(struct kvm_vcpu); @@ -6480,10 +6439,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) offsetofend(struct kvm_vcpu, stats_id) - offsetof(struct kvm_vcpu, arch), NULL); - if (!kvm_vcpu_cache) { - r = -ENOMEM; - goto err_vcpu_cache; - } + if (!kvm_vcpu_cache) + return -ENOMEM; for_each_possible_cpu(cpu) { if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu), @@ -6540,11 +6497,6 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) for_each_possible_cpu(cpu) free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); kmem_cache_destroy(kvm_vcpu_cache); -err_vcpu_cache: -#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING - unregister_syscore_ops(&kvm_syscore_ops); - cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); -#endif return r; } EXPORT_SYMBOL_GPL(kvm_init); @@ -6566,10 +6518,6 @@ void kvm_exit(void) kmem_cache_destroy(kvm_vcpu_cache); kvm_vfio_ops_exit(); kvm_async_pf_deinit(); -#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING - unregister_syscore_ops(&kvm_syscore_ops); - cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); -#endif kvm_irqfd_exit(); } EXPORT_SYMBOL_GPL(kvm_exit); From 70c0194337d38dd29533e63e3cb07620f8c5eae1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:53 -0700 Subject: [PATCH 144/655] KVM: Rename symbols related to enabling virtualization hardware Rename the various functions (and a variable) that enable virtualization to prepare for upcoming changes, and to clean up artifacts of KVM's previous behavior, which required manually juggling locks around kvm_usage_count. Drop the "nolock" qualifier from per-CPU functions now that there are no "nolock" implementations of the "all" variants, i.e. now that calling a non-nolock function from a nolock function isn't confusing (unlike this sentence). Drop "all" from the outer helpers as they no longer manually iterate over all CPUs, and because it might not be obvious what "all" refers to. In lieu of the above qualifiers, append "_cpu" to the end of the functions that are per-CPU helpers for the outer APIs. Opportunistically prepend "kvm" to all functions to help make it clear that they are KVM helpers, but mostly because there's no reason not to. Lastly, use "virtualization" instead of "hardware", because while the functions do enable virtualization in hardware, there are a _lot_ of things that KVM enables in hardware. Defer renaming the arch hooks to future patches, purely to reduce the amount of churn in a single commit. Reviewed-by: Chao Gao Reviewed-by: Kai Huang Acked-by: Kai Huang Tested-by: Farrah Chen Signed-off-by: Sean Christopherson Message-ID: <20240830043600.127750-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9ad8903b575a..4efbf9f84149 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -136,8 +136,8 @@ static int kvm_no_compat_open(struct inode *inode, struct file *file) #define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \ .open = kvm_no_compat_open #endif -static int hardware_enable_all(void); -static void hardware_disable_all(void); +static int kvm_enable_virtualization(void); +static void kvm_disable_virtualization(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); @@ -1220,7 +1220,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (r) goto out_err_no_arch_destroy_vm; - r = hardware_enable_all(); + r = kvm_enable_virtualization(); if (r) goto out_err_no_disable; @@ -1263,7 +1263,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); #endif out_err_no_mmu_notifier: - hardware_disable_all(); + kvm_disable_virtualization(); out_err_no_disable: kvm_arch_destroy_vm(kvm); out_err_no_arch_destroy_vm: @@ -1360,7 +1360,7 @@ static void kvm_destroy_vm(struct kvm *kvm) #endif kvm_arch_free_vm(kvm); preempt_notifier_dec(); - hardware_disable_all(); + kvm_disable_virtualization(); mmdrop(mm); } @@ -5574,13 +5574,13 @@ static struct miscdevice kvm_dev = { __visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); -static DEFINE_PER_CPU(bool, hardware_enabled); +static DEFINE_PER_CPU(bool, virtualization_enabled); static DEFINE_MUTEX(kvm_usage_lock); static int kvm_usage_count; -static int hardware_enable_nolock(void) +static int kvm_enable_virtualization_cpu(void) { - if (__this_cpu_read(hardware_enabled)) + if (__this_cpu_read(virtualization_enabled)) return 0; if (kvm_arch_hardware_enable()) { @@ -5589,7 +5589,7 @@ static int hardware_enable_nolock(void) return -EIO; } - __this_cpu_write(hardware_enabled, true); + __this_cpu_write(virtualization_enabled, true); return 0; } @@ -5600,22 +5600,22 @@ static int kvm_online_cpu(unsigned int cpu) * be enabled. Otherwise running VMs would encounter unrecoverable * errors when scheduled to this CPU. */ - return hardware_enable_nolock(); + return kvm_enable_virtualization_cpu(); } -static void hardware_disable_nolock(void *junk) +static void kvm_disable_virtualization_cpu(void *ign) { - if (!__this_cpu_read(hardware_enabled)) + if (!__this_cpu_read(virtualization_enabled)) return; kvm_arch_hardware_disable(); - __this_cpu_write(hardware_enabled, false); + __this_cpu_write(virtualization_enabled, false); } static int kvm_offline_cpu(unsigned int cpu) { - hardware_disable_nolock(NULL); + kvm_disable_virtualization_cpu(NULL); return 0; } @@ -5634,7 +5634,7 @@ static void kvm_shutdown(void) */ pr_info("kvm: exiting hardware virtualization\n"); kvm_rebooting = true; - on_each_cpu(hardware_disable_nolock, NULL, 1); + on_each_cpu(kvm_disable_virtualization_cpu, NULL, 1); } static int kvm_suspend(void) @@ -5650,7 +5650,7 @@ static int kvm_suspend(void) lockdep_assert_not_held(&kvm_usage_lock); lockdep_assert_irqs_disabled(); - hardware_disable_nolock(NULL); + kvm_disable_virtualization_cpu(NULL); return 0; } @@ -5659,7 +5659,7 @@ static void kvm_resume(void) lockdep_assert_not_held(&kvm_usage_lock); lockdep_assert_irqs_disabled(); - WARN_ON_ONCE(hardware_enable_nolock()); + WARN_ON_ONCE(kvm_enable_virtualization_cpu()); } static struct syscore_ops kvm_syscore_ops = { @@ -5668,7 +5668,7 @@ static struct syscore_ops kvm_syscore_ops = { .shutdown = kvm_shutdown, }; -static int hardware_enable_all(void) +static int kvm_enable_virtualization(void) { int r; @@ -5711,7 +5711,7 @@ static int hardware_enable_all(void) return r; } -static void hardware_disable_all(void) +static void kvm_disable_virtualization(void) { guard(mutex)(&kvm_usage_lock); @@ -5722,12 +5722,12 @@ static void hardware_disable_all(void) cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); } #else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ -static int hardware_enable_all(void) +static int kvm_enable_virtualization(void) { return 0; } -static void hardware_disable_all(void) +static void kvm_disable_virtualization(void) { } From 071f24ad28cdcdfa544fdb79b1b1d2b423717a11 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:54 -0700 Subject: [PATCH 145/655] KVM: Rename arch hooks related to per-CPU virtualization enabling Rename the per-CPU hooks used to enable virtualization in hardware to align with the KVM-wide helpers in kvm_main.c, and to better capture that the callbacks are invoked on every online CPU. No functional change intended. Suggested-by: Paolo Bonzini Signed-off-by: Sean Christopherson Reviewed-by: Kai Huang Message-ID: <20240830043600.127750-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/arm.c | 6 +++--- arch/loongarch/kvm/main.c | 4 ++-- arch/mips/kvm/mips.c | 4 ++-- arch/riscv/kvm/main.c | 4 ++-- arch/x86/kvm/x86.c | 6 +++--- include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 4 ++-- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9bef7638342e..9c8f5390ec63 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2163,7 +2163,7 @@ static void cpu_hyp_uninit(void *discard) } } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { /* * Most calls to this function are made with migration @@ -2183,7 +2183,7 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { kvm_timer_cpu_down(); kvm_vgic_cpu_down(); @@ -2379,7 +2379,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits) /* * The stub hypercalls are now disabled, so set our local flag to - * prevent a later re-init attempt in kvm_arch_hardware_enable(). + * prevent a later re-init attempt in kvm_arch_enable_virtualization_cpu(). */ __this_cpu_write(kvm_hyp_initialized, 1); preempt_enable(); diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 844736b99d38..27e9b94c0a0b 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -261,7 +261,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -ENOIOCTLCMD; } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { unsigned long env, gcfg = 0; @@ -300,7 +300,7 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { write_csr_gcfg(0); write_csr_gstat(0); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index b5de770b092e..52e1f275351e 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -125,12 +125,12 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return 1; } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { return kvm_mips_callbacks->hardware_enable(); } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { kvm_mips_callbacks->hardware_disable(); } diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index bab2ec34cd87..f3427f6de608 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -20,7 +20,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT); csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT); @@ -35,7 +35,7 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { kvm_riscv_aia_disable(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 70219e406987..1182baf0d487 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -355,7 +355,7 @@ static void kvm_on_user_return(struct user_return_notifier *urn) /* * Disabling irqs at this point since the following code could be - * interrupted and executed through kvm_arch_hardware_disable() + * interrupted and executed through kvm_arch_disable_virtualization_cpu() */ local_irq_save(flags); if (msrs->registered) { @@ -12512,7 +12512,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) } EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector); -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { struct kvm *kvm; struct kvm_vcpu *vcpu; @@ -12608,7 +12608,7 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { kvm_x86_call(hardware_disable)(); drop_user_return_notifiers(); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b23c6d48392f..4ceecba64f93 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1521,8 +1521,8 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING -int kvm_arch_hardware_enable(void); -void kvm_arch_hardware_disable(void); +int kvm_arch_enable_virtualization_cpu(void); +void kvm_arch_disable_virtualization_cpu(void); #endif int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4efbf9f84149..7ada2b669d51 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5583,7 +5583,7 @@ static int kvm_enable_virtualization_cpu(void) if (__this_cpu_read(virtualization_enabled)) return 0; - if (kvm_arch_hardware_enable()) { + if (kvm_arch_enable_virtualization_cpu()) { pr_info("kvm: enabling virtualization on CPU%d failed\n", raw_smp_processor_id()); return -EIO; @@ -5608,7 +5608,7 @@ static void kvm_disable_virtualization_cpu(void *ign) if (!__this_cpu_read(virtualization_enabled)) return; - kvm_arch_hardware_disable(); + kvm_arch_disable_virtualization_cpu(); __this_cpu_write(virtualization_enabled, false); } From 5381eca101fd80a0f0ad028482181e6180e3561c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:55 -0700 Subject: [PATCH 146/655] KVM: MIPS: Rename virtualization {en,dis}abling APIs to match common KVM Rename MIPS's trampoline hooks for virtualization enabling to match the recently renamed arch hooks. No functional change intended. Signed-off-by: Sean Christopherson Message-ID: <20240830043600.127750-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 4 ++-- arch/mips/kvm/mips.c | 4 ++-- arch/mips/kvm/vz.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 6743a57c1ab4..f7222eb594ea 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -728,8 +728,8 @@ struct kvm_mips_callbacks { int (*handle_fpe)(struct kvm_vcpu *vcpu); int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); int (*handle_guest_exit)(struct kvm_vcpu *vcpu); - int (*hardware_enable)(void); - void (*hardware_disable)(void); + int (*enable_virtualization_cpu)(void); + void (*disable_virtualization_cpu)(void); int (*check_extension)(struct kvm *kvm, long ext); int (*vcpu_init)(struct kvm_vcpu *vcpu); void (*vcpu_uninit)(struct kvm_vcpu *vcpu); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 52e1f275351e..60b43ea85c12 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -127,12 +127,12 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) int kvm_arch_enable_virtualization_cpu(void) { - return kvm_mips_callbacks->hardware_enable(); + return kvm_mips_callbacks->enable_virtualization_cpu(); } void kvm_arch_disable_virtualization_cpu(void) { - kvm_mips_callbacks->hardware_disable(); + kvm_mips_callbacks->disable_virtualization_cpu(); } int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 99d5a71e4300..ccab4d76b126 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -2869,7 +2869,7 @@ static unsigned int kvm_vz_resize_guest_vtlb(unsigned int size) return ret + 1; } -static int kvm_vz_hardware_enable(void) +static int kvm_vz_enable_virtualization_cpu(void) { unsigned int mmu_size, guest_mmu_size, ftlb_size; u64 guest_cvmctl, cvmvmconfig; @@ -2983,7 +2983,7 @@ static int kvm_vz_hardware_enable(void) return 0; } -static void kvm_vz_hardware_disable(void) +static void kvm_vz_disable_virtualization_cpu(void) { u64 cvmvmconfig; unsigned int mmu_size; @@ -3280,8 +3280,8 @@ static struct kvm_mips_callbacks kvm_vz_callbacks = { .handle_msa_disabled = kvm_trap_vz_handle_msa_disabled, .handle_guest_exit = kvm_trap_vz_handle_guest_exit, - .hardware_enable = kvm_vz_hardware_enable, - .hardware_disable = kvm_vz_hardware_disable, + .enable_virtualization_cpu = kvm_vz_enable_virtualization_cpu, + .disable_virtualization_cpu = kvm_vz_disable_virtualization_cpu, .check_extension = kvm_vz_check_extension, .vcpu_init = kvm_vz_vcpu_init, .vcpu_uninit = kvm_vz_vcpu_uninit, From 0617a769ce16b836659c8a712f394bfa3543a587 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:56 -0700 Subject: [PATCH 147/655] KVM: x86: Rename virtualization {en,dis}abling APIs to match common KVM Rename x86's the per-CPU vendor hooks used to enable virtualization in hardware to align with the recently renamed arch hooks. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Kai Huang Message-ID: <20240830043600.127750-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm-x86-ops.h | 4 ++-- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/svm/svm.c | 18 +++++++++--------- arch/x86/kvm/vmx/main.c | 4 ++-- arch/x86/kvm/vmx/vmx.c | 10 +++++----- arch/x86/kvm/vmx/x86_ops.h | 4 ++-- arch/x86/kvm/x86.c | 10 +++++----- 7 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 68ad4f923664..03b7e13f15bb 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -14,8 +14,8 @@ BUILD_BUG_ON(1) * be __static_call_return0. */ KVM_X86_OP(check_processor_compatibility) -KVM_X86_OP(hardware_enable) -KVM_X86_OP(hardware_disable) +KVM_X86_OP(enable_virtualization_cpu) +KVM_X86_OP(disable_virtualization_cpu) KVM_X86_OP(hardware_unsetup) KVM_X86_OP(has_emulated_msr) KVM_X86_OP(vcpu_after_set_cpuid) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e4fc362ba3da..704aeecfe2c9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1629,8 +1629,8 @@ struct kvm_x86_ops { int (*check_processor_compatibility)(void); - int (*hardware_enable)(void); - void (*hardware_disable)(void); + int (*enable_virtualization_cpu)(void); + void (*disable_virtualization_cpu)(void); void (*hardware_unsetup)(void); bool (*has_emulated_msr)(struct kvm *kvm, u32 index); void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d6f252555ab3..a9adbe10c12e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -592,14 +592,14 @@ static inline void kvm_cpu_svm_disable(void) } } -static void svm_emergency_disable(void) +static void svm_emergency_disable_virtualization_cpu(void) { kvm_rebooting = true; kvm_cpu_svm_disable(); } -static void svm_hardware_disable(void) +static void svm_disable_virtualization_cpu(void) { /* Make sure we clean up behind us */ if (tsc_scaling) @@ -610,7 +610,7 @@ static void svm_hardware_disable(void) amd_pmu_disable_virt(); } -static int svm_hardware_enable(void) +static int svm_enable_virtualization_cpu(void) { struct svm_cpu_data *sd; @@ -1533,7 +1533,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) * TSC_AUX is always virtualized for SEV-ES guests when the feature is * available. The user return MSR support is not required in this case * because TSC_AUX is restored on #VMEXIT from the host save area - * (which has been initialized in svm_hardware_enable()). + * (which has been initialized in svm_enable_virtualization_cpu()). */ if (likely(tsc_aux_uret_slot >= 0) && (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm))) @@ -3132,7 +3132,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) * feature is available. The user return MSR support is not * required in this case because TSC_AUX is restored on #VMEXIT * from the host save area (which has been initialized in - * svm_hardware_enable()). + * svm_enable_virtualization_cpu()). */ if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm)) break; @@ -4980,8 +4980,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .check_processor_compatibility = svm_check_processor_compat, .hardware_unsetup = svm_hardware_unsetup, - .hardware_enable = svm_hardware_enable, - .hardware_disable = svm_hardware_disable, + .enable_virtualization_cpu = svm_enable_virtualization_cpu, + .disable_virtualization_cpu = svm_disable_virtualization_cpu, .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_vcpu_create, @@ -5411,7 +5411,7 @@ static void __svm_exit(void) { kvm_x86_vendor_exit(); - cpu_emergency_unregister_virt_callback(svm_emergency_disable); + cpu_emergency_unregister_virt_callback(svm_emergency_disable_virtualization_cpu); } static int __init svm_init(void) @@ -5427,7 +5427,7 @@ static int __init svm_init(void) if (r) return r; - cpu_emergency_register_virt_callback(svm_emergency_disable); + cpu_emergency_register_virt_callback(svm_emergency_disable_virtualization_cpu); /* * Common KVM initialization _must_ come last, after this, /dev/kvm is diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 0bf35ebe8a1b..4a5bf92edccf 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -23,8 +23,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .hardware_unsetup = vmx_hardware_unsetup, - .hardware_enable = vmx_hardware_enable, - .hardware_disable = vmx_hardware_disable, + .enable_virtualization_cpu = vmx_enable_virtualization_cpu, + .disable_virtualization_cpu = vmx_disable_virtualization_cpu, .has_emulated_msr = vmx_has_emulated_msr, .vm_size = sizeof(struct kvm_vmx), diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f18c2d8c7476..cf7d937bfd2c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -755,7 +755,7 @@ static int kvm_cpu_vmxoff(void) return -EIO; } -static void vmx_emergency_disable(void) +static void vmx_emergency_disable_virtualization_cpu(void) { int cpu = raw_smp_processor_id(); struct loaded_vmcs *v; @@ -2844,7 +2844,7 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer) return -EFAULT; } -int vmx_hardware_enable(void) +int vmx_enable_virtualization_cpu(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2881,7 +2881,7 @@ static void vmclear_local_loaded_vmcss(void) __loaded_vmcs_clear(v); } -void vmx_hardware_disable(void) +void vmx_disable_virtualization_cpu(void) { vmclear_local_loaded_vmcss(); @@ -8584,7 +8584,7 @@ static void __vmx_exit(void) { allow_smaller_maxphyaddr = false; - cpu_emergency_unregister_virt_callback(vmx_emergency_disable); + cpu_emergency_unregister_virt_callback(vmx_emergency_disable_virtualization_cpu); vmx_cleanup_l1d_flush(); } @@ -8632,7 +8632,7 @@ static int __init vmx_init(void) pi_init_cpu(cpu); } - cpu_emergency_register_virt_callback(vmx_emergency_disable); + cpu_emergency_register_virt_callback(vmx_emergency_disable_virtualization_cpu); vmx_check_vmcs12_offsets(); diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index ce3221cd1d01..205692c43a8e 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -13,8 +13,8 @@ extern struct kvm_x86_init_ops vt_init_ops __initdata; void vmx_hardware_unsetup(void); int vmx_check_processor_compat(void); -int vmx_hardware_enable(void); -void vmx_hardware_disable(void); +int vmx_enable_virtualization_cpu(void); +void vmx_disable_virtualization_cpu(void); int vmx_vm_init(struct kvm *kvm); void vmx_vm_destroy(struct kvm *kvm); int vmx_vcpu_precreate(struct kvm *kvm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1182baf0d487..431358167fa8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9749,7 +9749,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) guard(mutex)(&vendor_module_lock); - if (kvm_x86_ops.hardware_enable) { + if (kvm_x86_ops.enable_virtualization_cpu) { pr_err("already loaded vendor module '%s'\n", kvm_x86_ops.name); return -EEXIST; } @@ -9876,7 +9876,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) return 0; out_unwind_ops: - kvm_x86_ops.hardware_enable = NULL; + kvm_x86_ops.enable_virtualization_cpu = NULL; kvm_x86_call(hardware_unsetup)(); out_mmu_exit: kvm_mmu_vendor_module_exit(); @@ -9917,7 +9917,7 @@ void kvm_x86_vendor_exit(void) WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key)); #endif mutex_lock(&vendor_module_lock); - kvm_x86_ops.hardware_enable = NULL; + kvm_x86_ops.enable_virtualization_cpu = NULL; mutex_unlock(&vendor_module_lock); } EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit); @@ -12528,7 +12528,7 @@ int kvm_arch_enable_virtualization_cpu(void) if (ret) return ret; - ret = kvm_x86_call(hardware_enable)(); + ret = kvm_x86_call(enable_virtualization_cpu)(); if (ret != 0) return ret; @@ -12610,7 +12610,7 @@ int kvm_arch_enable_virtualization_cpu(void) void kvm_arch_disable_virtualization_cpu(void) { - kvm_x86_call(hardware_disable)(); + kvm_x86_call(disable_virtualization_cpu)(); drop_user_return_notifiers(); } From b4886fab6fb620b96ad7eeefb9801c42dfa91741 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:57 -0700 Subject: [PATCH 148/655] KVM: Add a module param to allow enabling virtualization when KVM is loaded Add an on-by-default module param, enable_virt_at_load, to let userspace force virtualization to be enabled in hardware when KVM is initialized, i.e. just before /dev/kvm is exposed to userspace. Enabling virtualization during KVM initialization allows userspace to avoid the additional latency when creating/destroying the first/last VM (or more specifically, on the 0=>1 and 1=>0 edges of creation/destruction). Now that KVM uses the cpuhp framework to do per-CPU enabling, the latency could be non-trivial as the cpuhup bringup/teardown is serialized across CPUs, e.g. the latency could be problematic for use case that need to spin up VMs quickly. Prior to commit 10474ae8945c ("KVM: Activate Virtualization On Demand"), KVM _unconditionally_ enabled virtualization during load, i.e. there's no fundamental reason KVM needs to dynamically toggle virtualization. These days, the only known argument for not enabling virtualization is to allow KVM to be autoloaded without blocking other out-of-tree hypervisors, and such use cases can simply change the module param, e.g. via command line. Note, the aforementioned commit also mentioned that enabling SVM (AMD's virtualization extensions) can result in "using invalid TLB entries". It's not clear whether the changelog was referring to a KVM bug, a CPU bug, or something else entirely. Regardless, leaving virtualization off by default is not a robust "fix", as any protection provided is lost the instant userspace creates the first VM. Reviewed-by: Chao Gao Acked-by: Kai Huang Reviewed-by: Kai Huang Tested-by: Farrah Chen Signed-off-by: Sean Christopherson Message-ID: <20240830043600.127750-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- .../admin-guide/kernel-parameters.txt | 17 +++++++++ virt/kvm/kvm_main.c | 35 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 09126bb8cc9f..1b52b1b7bbc4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2648,6 +2648,23 @@ Default is Y (on). + kvm.enable_virt_at_load=[KVM,ARM64,LOONGARCH,MIPS,RISCV,X86] + If enabled, KVM will enable virtualization in hardware + when KVM is loaded, and disable virtualization when KVM + is unloaded (if KVM is built as a module). + + If disabled, KVM will dynamically enable and disable + virtualization on-demand when creating and destroying + VMs, i.e. on the 0=>1 and 1=>0 transitions of the + number of VMs. + + Enabling virtualization at module lode avoids potential + latency for creation of the 0=>1 VM, as KVM serializes + virtualization enabling across all online CPUs. The + "cost" of enabling virtualization when KVM is loaded, + is that doing so may interfere with using out-of-tree + hypervisors that want to "own" virtualization hardware. + kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface. Default is false (don't support). diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7ada2b669d51..3a18da68b0ca 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5571,6 +5571,9 @@ static struct miscdevice kvm_dev = { }; #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +static bool enable_virt_at_load = true; +module_param(enable_virt_at_load, bool, 0444); + __visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); @@ -5721,15 +5724,39 @@ static void kvm_disable_virtualization(void) unregister_syscore_ops(&kvm_syscore_ops); cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); } + +static int kvm_init_virtualization(void) +{ + if (enable_virt_at_load) + return kvm_enable_virtualization(); + + return 0; +} + +static void kvm_uninit_virtualization(void) +{ + if (enable_virt_at_load) + kvm_disable_virtualization(); +} #else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ static int kvm_enable_virtualization(void) { return 0; } +static int kvm_init_virtualization(void) +{ + return 0; +} + static void kvm_disable_virtualization(void) { +} + +static void kvm_uninit_virtualization(void) +{ + } #endif /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ @@ -6474,6 +6501,10 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) kvm_gmem_init(module); + r = kvm_init_virtualization(); + if (r) + goto err_virt; + /* * Registration _must_ be the very last thing done, as this exposes * /dev/kvm to userspace, i.e. all infrastructure must be setup! @@ -6487,6 +6518,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) return 0; err_register: + kvm_uninit_virtualization(); +err_virt: kvm_vfio_ops_exit(); err_vfio: kvm_async_pf_deinit(); @@ -6512,6 +6545,8 @@ void kvm_exit(void) */ misc_deregister(&kvm_dev); + kvm_uninit_virtualization(); + debugfs_remove_recursive(kvm_debugfs_dir); for_each_possible_cpu(cpu) free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); From b67107a251b01161956892352d53fb122346eda1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:58 -0700 Subject: [PATCH 149/655] KVM: Add arch hooks for enabling/disabling virtualization Add arch hooks that are invoked when KVM enables/disable virtualization. x86 will use the hooks to register an "emergency disable" callback, which is essentially an x86-specific shutdown notifier that is used when the kernel is doing an emergency reboot/shutdown/kexec. Add comments for the declarations to help arch code understand exactly when the callbacks are invoked. Alternatively, the APIs themselves could communicate most of the same info, but kvm_arch_pre_enable_virtualization() and kvm_arch_post_disable_virtualization() are a bit cumbersome, and make it a bit less obvious that they are intended to be implemented as a pair. Reviewed-by: Chao Gao Reviewed-by: Kai Huang Acked-by: Kai Huang Tested-by: Farrah Chen Signed-off-by: Sean Christopherson Message-ID: <20240830043600.127750-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 14 ++++++++++++++ virt/kvm/kvm_main.c | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4ceecba64f93..dfd6ad66efcc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1521,6 +1521,20 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +/* + * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under + * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of + * kvm_usage_count, i.e. at the beginning of the generic hardware enabling + * sequence, and at the end of the generic hardware disabling sequence. + */ +void kvm_arch_enable_virtualization(void); +void kvm_arch_disable_virtualization(void); +/* + * kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to + * do the actual twiddling of hardware bits. The hooks are called on all + * online CPUs when KVM enables/disabled virtualization, and on a single CPU + * when that CPU is onlined/offlined (including for Resume/Suspend). + */ int kvm_arch_enable_virtualization_cpu(void); void kvm_arch_disable_virtualization_cpu(void); #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3a18da68b0ca..5f9a66df7bfb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5581,6 +5581,16 @@ static DEFINE_PER_CPU(bool, virtualization_enabled); static DEFINE_MUTEX(kvm_usage_lock); static int kvm_usage_count; +__weak void kvm_arch_enable_virtualization(void) +{ + +} + +__weak void kvm_arch_disable_virtualization(void) +{ + +} + static int kvm_enable_virtualization_cpu(void) { if (__this_cpu_read(virtualization_enabled)) @@ -5680,6 +5690,8 @@ static int kvm_enable_virtualization(void) if (kvm_usage_count++) return 0; + kvm_arch_enable_virtualization(); + r = cpuhp_setup_state(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online", kvm_online_cpu, kvm_offline_cpu); if (r) @@ -5710,6 +5722,7 @@ static int kvm_enable_virtualization(void) unregister_syscore_ops(&kvm_syscore_ops); cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); err_cpuhp: + kvm_arch_disable_virtualization(); --kvm_usage_count; return r; } @@ -5723,6 +5736,7 @@ static void kvm_disable_virtualization(void) unregister_syscore_ops(&kvm_syscore_ops); cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); + kvm_arch_disable_virtualization(); } static int kvm_init_virtualization(void) From 6d55a94222dbc64754fc138dbe4578dc5cac1c8b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:59 -0700 Subject: [PATCH 150/655] x86/reboot: Unconditionally define cpu_emergency_virt_cb typedef Define cpu_emergency_virt_cb even if the kernel is being built without KVM support so that KVM can reference the typedef in asm/kvm_host.h without needing yet more #ifdefs. No functional change intended. Acked-by: Kai Huang Reviewed-by: Chao Gao Reviewed-by: Kai Huang Tested-by: Farrah Chen Signed-off-by: Sean Christopherson Message-ID: <20240830043600.127750-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/reboot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 6536873f8fc0..d0ef2a678d66 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,8 +25,8 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 -#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD) typedef void (cpu_emergency_virt_cb)(void); +#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD) void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback); void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback); void cpu_emergency_disable_virtualization(void); From 590b09b1d88e18ae57f89930a6f7b89795d2e9f3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:36:00 -0700 Subject: [PATCH 151/655] KVM: x86: Register "emergency disable" callbacks when virt is enabled Register the "disable virtualization in an emergency" callback just before KVM enables virtualization in hardware, as there is no functional need to keep the callbacks registered while KVM happens to be loaded, but is inactive, i.e. if KVM hasn't enabled virtualization. Note, unregistering the callback every time the last VM is destroyed could have measurable latency due to the synchronize_rcu() needed to ensure all references to the callback are dropped before KVM is unloaded. But the latency should be a small fraction of the total latency of disabling virtualization across all CPUs, and userspace can set enable_virt_at_load to completely eliminate the runtime overhead. Add a pointer in kvm_x86_ops to allow vendor code to provide its callback. There is no reason to force vendor code to do the registration, and either way KVM would need a new kvm_x86_ops hook. Suggested-by: Kai Huang Reviewed-by: Chao Gao Reviewed-by: Kai Huang Acked-by: Kai Huang Tested-by: Farrah Chen Signed-off-by: Sean Christopherson Message-ID: <20240830043600.127750-11-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/svm/svm.c | 5 +---- arch/x86/kvm/vmx/main.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 6 +----- arch/x86/kvm/vmx/x86_ops.h | 1 + arch/x86/kvm/x86.c | 10 ++++++++++ 6 files changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 704aeecfe2c9..52443ccda320 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -36,6 +36,7 @@ #include #include #include +#include #define __KVM_HAVE_ARCH_VCPU_DEBUGFS @@ -1631,6 +1632,8 @@ struct kvm_x86_ops { int (*enable_virtualization_cpu)(void); void (*disable_virtualization_cpu)(void); + cpu_emergency_virt_cb *emergency_disable_virtualization_cpu; + void (*hardware_unsetup)(void); bool (*has_emulated_msr)(struct kvm *kvm, u32 index); void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a9adbe10c12e..9a0506ef87df 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4982,6 +4982,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .hardware_unsetup = svm_hardware_unsetup, .enable_virtualization_cpu = svm_enable_virtualization_cpu, .disable_virtualization_cpu = svm_disable_virtualization_cpu, + .emergency_disable_virtualization_cpu = svm_emergency_disable_virtualization_cpu, .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_vcpu_create, @@ -5410,8 +5411,6 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = { static void __svm_exit(void) { kvm_x86_vendor_exit(); - - cpu_emergency_unregister_virt_callback(svm_emergency_disable_virtualization_cpu); } static int __init svm_init(void) @@ -5427,8 +5426,6 @@ static int __init svm_init(void) if (r) return r; - cpu_emergency_register_virt_callback(svm_emergency_disable_virtualization_cpu); - /* * Common KVM initialization _must_ come last, after this, /dev/kvm is * exposed to userspace! diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 4a5bf92edccf..7e2e78a14257 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -25,6 +25,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .enable_virtualization_cpu = vmx_enable_virtualization_cpu, .disable_virtualization_cpu = vmx_disable_virtualization_cpu, + .emergency_disable_virtualization_cpu = vmx_emergency_disable_virtualization_cpu, + .has_emulated_msr = vmx_has_emulated_msr, .vm_size = sizeof(struct kvm_vmx), diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cf7d937bfd2c..89682832dded 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -755,7 +755,7 @@ static int kvm_cpu_vmxoff(void) return -EIO; } -static void vmx_emergency_disable_virtualization_cpu(void) +void vmx_emergency_disable_virtualization_cpu(void) { int cpu = raw_smp_processor_id(); struct loaded_vmcs *v; @@ -8584,8 +8584,6 @@ static void __vmx_exit(void) { allow_smaller_maxphyaddr = false; - cpu_emergency_unregister_virt_callback(vmx_emergency_disable_virtualization_cpu); - vmx_cleanup_l1d_flush(); } @@ -8632,8 +8630,6 @@ static int __init vmx_init(void) pi_init_cpu(cpu); } - cpu_emergency_register_virt_callback(vmx_emergency_disable_virtualization_cpu); - vmx_check_vmcs12_offsets(); /* diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index 205692c43a8e..b6a7cfc6ae31 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -15,6 +15,7 @@ void vmx_hardware_unsetup(void); int vmx_check_processor_compat(void); int vmx_enable_virtualization_cpu(void); void vmx_disable_virtualization_cpu(void); +void vmx_emergency_disable_virtualization_cpu(void); int vmx_vm_init(struct kvm *kvm); void vmx_vm_destroy(struct kvm *kvm); int vmx_vcpu_precreate(struct kvm *kvm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 431358167fa8..f72e5d89e942 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12512,6 +12512,16 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) } EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector); +void kvm_arch_enable_virtualization(void) +{ + cpu_emergency_register_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu); +} + +void kvm_arch_disable_virtualization(void) +{ + cpu_emergency_unregister_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu); +} + int kvm_arch_enable_virtualization_cpu(void) { struct kvm *kvm; From a7722b82c2ca9ca771d6c698643883be76f61a7e Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Thu, 5 Sep 2024 10:28:32 +0800 Subject: [PATCH 152/655] dm integrity: Convert comma to semicolon Replace comma between expressions with semicolons. Using a ',' in place of a ';' can have unintended side effects. Although that is not the case here, it is seems best to use ';' unless ',' is intended. Found by inspection. No functional change intended. Compile tested only. Signed-off-by: Chen Ni Reviewed-by: Mike Snitzer Signed-off-by: Mikulas Patocka --- drivers/md/dm-integrity.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index b3489d3fe7db..34fa98efa9fa 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1500,15 +1500,15 @@ static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_dat if (!ic->meta_dev) flush_data = false; if (flush_data) { - fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, - fr.io_req.mem.type = DM_IO_KMEM, - fr.io_req.mem.ptr.addr = NULL, - fr.io_req.notify.fn = flush_notify, + fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; + fr.io_req.mem.type = DM_IO_KMEM; + fr.io_req.mem.ptr.addr = NULL; + fr.io_req.notify.fn = flush_notify; fr.io_req.notify.context = &fr; - fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio), - fr.io_reg.bdev = ic->dev->bdev, - fr.io_reg.sector = 0, - fr.io_reg.count = 0, + fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio); + fr.io_reg.bdev = ic->dev->bdev; + fr.io_reg.sector = 0; + fr.io_reg.count = 0; fr.ic = ic; init_completion(&fr.comp); r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL, IOPRIO_DEFAULT); From 90da77987dd59c8f6ec6d508d23d5a77c7af64f1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 5 Sep 2024 21:01:52 +0200 Subject: [PATCH 153/655] dm-integrity: support recalculation in the 'I' mode In the kernel 6.11, dm-integrity was enhanced with an inline ('I') mode. This mode uses devices with non-power-of-2 sector size. The extra metadata after each sector are used to hold the integrity hash. This commit enhances the inline mode, so that there is automatic recalculation of the integrity hashes when the 'reclaculate' parameter is used. It allows us to activate the device instantly, and the recalculation is done on background. If the device is deactivated while recalculation is in progress, it will remember the point where it stopped and it will continue from this point when activated again. Signed-off-by: Mikulas Patocka --- drivers/md/dm-integrity.c | 288 ++++++++++++++++++++++++++++++++------ 1 file changed, 246 insertions(+), 42 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 34fa98efa9fa..c40df05e0521 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -284,6 +284,7 @@ struct dm_integrity_c { mempool_t recheck_pool; struct bio_set recheck_bios; + struct bio_set recalc_bios; struct notifier_block reboot_notifier; }; @@ -321,7 +322,9 @@ struct dm_integrity_io { struct dm_bio_details bio_details; char *integrity_payload; + unsigned payload_len; bool integrity_payload_from_mempool; + bool integrity_range_locked; }; struct journal_completion { @@ -359,7 +362,7 @@ static struct kmem_cache *journal_io_cache; #endif static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map); -static int dm_integrity_map_inline(struct dm_integrity_io *dio); +static int dm_integrity_map_inline(struct dm_integrity_io *dio, bool from_map); static void integrity_bio_wait(struct work_struct *w); static void dm_integrity_dtr(struct dm_target *ti); @@ -1946,8 +1949,13 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio) dio->bi_status = 0; dio->op = bio_op(bio); - if (ic->mode == 'I') - return dm_integrity_map_inline(dio); + if (ic->mode == 'I') { + bio->bi_iter.bi_sector = dm_target_offset(ic->ti, bio->bi_iter.bi_sector); + dio->integrity_payload = NULL; + dio->integrity_payload_from_mempool = false; + dio->integrity_range_locked = false; + return dm_integrity_map_inline(dio, true); + } if (unlikely(dio->op == REQ_OP_DISCARD)) { if (ti->max_io_len) { @@ -2395,15 +2403,13 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map do_endio_flush(ic, dio); } -static int dm_integrity_map_inline(struct dm_integrity_io *dio) +static int dm_integrity_map_inline(struct dm_integrity_io *dio, bool from_map) { struct dm_integrity_c *ic = dio->ic; struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); struct bio_integrity_payload *bip; - unsigned payload_len, digest_size, extra_size, ret; - - dio->integrity_payload = NULL; - dio->integrity_payload_from_mempool = false; + unsigned ret; + sector_t recalc_sector; if (unlikely(bio_integrity(bio))) { bio->bi_status = BLK_STS_NOTSUPP; @@ -2416,28 +2422,67 @@ static int dm_integrity_map_inline(struct dm_integrity_io *dio) return DM_MAPIO_REMAPPED; retry: - payload_len = ic->tuple_size * (bio_sectors(bio) >> ic->sb->log2_sectors_per_block); - digest_size = crypto_shash_digestsize(ic->internal_hash); - extra_size = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; - payload_len += extra_size; - dio->integrity_payload = kmalloc(payload_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); - if (unlikely(!dio->integrity_payload)) { - const unsigned x_size = PAGE_SIZE << 1; - if (payload_len > x_size) { - unsigned sectors = ((x_size - extra_size) / ic->tuple_size) << ic->sb->log2_sectors_per_block; - if (WARN_ON(!sectors || sectors >= bio_sectors(bio))) { - bio->bi_status = BLK_STS_NOTSUPP; - bio_endio(bio); - return DM_MAPIO_SUBMITTED; + if (!dio->integrity_payload) { + unsigned digest_size, extra_size; + dio->payload_len = ic->tuple_size * (bio_sectors(bio) >> ic->sb->log2_sectors_per_block); + digest_size = crypto_shash_digestsize(ic->internal_hash); + extra_size = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; + dio->payload_len += extra_size; + dio->integrity_payload = kmalloc(dio->payload_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (unlikely(!dio->integrity_payload)) { + const unsigned x_size = PAGE_SIZE << 1; + if (dio->payload_len > x_size) { + unsigned sectors = ((x_size - extra_size) / ic->tuple_size) << ic->sb->log2_sectors_per_block; + if (WARN_ON(!sectors || sectors >= bio_sectors(bio))) { + bio->bi_status = BLK_STS_NOTSUPP; + bio_endio(bio); + return DM_MAPIO_SUBMITTED; + } + dm_accept_partial_bio(bio, sectors); + goto retry; } - dm_accept_partial_bio(bio, sectors); - goto retry; } + } + + dio->range.logical_sector = bio->bi_iter.bi_sector; + dio->range.n_sectors = bio_sectors(bio); + + if (!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) + goto skip_spinlock; +#ifdef CONFIG_64BIT + /* + * On 64-bit CPUs we can optimize the lock away (so that it won't cause + * cache line bouncing) and use acquire/release barriers instead. + * + * Paired with smp_store_release in integrity_recalc_inline. + */ + recalc_sector = le64_to_cpu(smp_load_acquire(&ic->sb->recalc_sector)); + if (likely(dio->range.logical_sector + dio->range.n_sectors <= recalc_sector)) + goto skip_spinlock; +#endif + spin_lock_irq(&ic->endio_wait.lock); + recalc_sector = le64_to_cpu(ic->sb->recalc_sector); + if (dio->range.logical_sector + dio->range.n_sectors <= recalc_sector) + goto skip_unlock; + if (unlikely(!add_new_range(ic, &dio->range, true))) { + if (from_map) { + spin_unlock_irq(&ic->endio_wait.lock); + INIT_WORK(&dio->work, integrity_bio_wait); + queue_work(ic->wait_wq, &dio->work); + return DM_MAPIO_SUBMITTED; + } + wait_and_add_new_range(ic, &dio->range); + } + dio->integrity_range_locked = true; +skip_unlock: + spin_unlock_irq(&ic->endio_wait.lock); +skip_spinlock: + + if (unlikely(!dio->integrity_payload)) { dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO)); dio->integrity_payload_from_mempool = true; } - bio->bi_iter.bi_sector = dm_target_offset(ic->ti, bio->bi_iter.bi_sector); dio->bio_details.bi_iter = bio->bi_iter; if (unlikely(!dm_integrity_check_limits(ic, bio->bi_iter.bi_sector, bio))) { @@ -2468,8 +2513,8 @@ static int dm_integrity_map_inline(struct dm_integrity_io *dio) } ret = bio_integrity_add_page(bio, virt_to_page(dio->integrity_payload), - payload_len, offset_in_page(dio->integrity_payload)); - if (unlikely(ret != payload_len)) { + dio->payload_len, offset_in_page(dio->integrity_payload)); + if (unlikely(ret != dio->payload_len)) { bio->bi_status = BLK_STS_RESOURCE; bio_endio(bio); return DM_MAPIO_SUBMITTED; @@ -2577,6 +2622,9 @@ static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); if (dio->op == REQ_OP_READ && likely(*status == BLK_STS_OK)) { unsigned pos = 0; + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && + unlikely(dio->integrity_range_locked)) + goto skip_check; while (dio->bio_details.bi_iter.bi_size) { char digest[HASH_MAX_DIGESTSIZE]; struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter); @@ -2596,9 +2644,10 @@ static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT); } } - if (likely(dio->op == REQ_OP_READ) || likely(dio->op == REQ_OP_WRITE)) { - dm_integrity_free_payload(dio); - } +skip_check: + dm_integrity_free_payload(dio); + if (unlikely(dio->integrity_range_locked)) + remove_range(ic, &dio->range); } return DM_ENDIO_DONE; } @@ -2606,8 +2655,26 @@ static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status static void integrity_bio_wait(struct work_struct *w) { struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); + struct dm_integrity_c *ic = dio->ic; - dm_integrity_map_continue(dio, false); + if (ic->mode == 'I') { + struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); + int r = dm_integrity_map_inline(dio, false); + switch (r) { + case DM_MAPIO_KILL: + bio->bi_status = BLK_STS_IOERR; + fallthrough; + case DM_MAPIO_REMAPPED: + submit_bio_noacct(bio); + fallthrough; + case DM_MAPIO_SUBMITTED: + return; + default: + BUG(); + } + } else { + dm_integrity_map_continue(dio, false); + } } static void pad_uncommitted(struct dm_integrity_c *ic) @@ -3079,6 +3146,133 @@ static void integrity_recalc(struct work_struct *w) kvfree(recalc_tags); } +static void integrity_recalc_inline(struct work_struct *w) +{ + struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work); + size_t recalc_tags_size; + u8 *recalc_buffer = NULL; + u8 *recalc_tags = NULL; + struct dm_integrity_range range; + struct bio *bio; + struct bio_integrity_payload *bip; + __u8 *t; + unsigned int i; + int r; + unsigned ret; + unsigned int super_counter = 0; + unsigned recalc_sectors = RECALC_SECTORS; + +retry: + recalc_buffer = kmalloc(recalc_sectors << SECTOR_SHIFT, GFP_NOIO | __GFP_NOWARN); + if (!recalc_buffer) { +oom: + recalc_sectors >>= 1; + if (recalc_sectors >= 1U << ic->sb->log2_sectors_per_block) + goto retry; + DMCRIT("out of memory for recalculate buffer - recalculation disabled"); + goto free_ret; + } + + recalc_tags_size = (recalc_sectors >> ic->sb->log2_sectors_per_block) * ic->tuple_size; + if (crypto_shash_digestsize(ic->internal_hash) > ic->tuple_size) + recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tuple_size; + recalc_tags = kmalloc(recalc_tags_size, GFP_NOIO | __GFP_NOWARN); + if (!recalc_tags) { + kfree(recalc_buffer); + recalc_buffer = NULL; + goto oom; + } + + spin_lock_irq(&ic->endio_wait.lock); + +next_chunk: + if (unlikely(dm_post_suspending(ic->ti))) + goto unlock_ret; + + range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); + if (unlikely(range.logical_sector >= ic->provided_data_sectors)) + goto unlock_ret; + range.n_sectors = min((sector_t)recalc_sectors, ic->provided_data_sectors - range.logical_sector); + + add_new_range_and_wait(ic, &range); + spin_unlock_irq(&ic->endio_wait.lock); + + if (unlikely(++super_counter == RECALC_WRITE_SUPER)) { + recalc_write_super(ic); + super_counter = 0; + } + + if (unlikely(dm_integrity_failed(ic))) + goto err; + + DEBUG_print("recalculating: %llx - %llx\n", range.logical_sector, range.n_sectors); + + bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recalc_bios); + bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector; + __bio_add_page(bio, virt_to_page(recalc_buffer), range.n_sectors << SECTOR_SHIFT, offset_in_page(recalc_buffer)); + r = submit_bio_wait(bio); + bio_put(bio); + if (unlikely(r)) { + dm_integrity_io_error(ic, "reading data", r); + goto err; + } + + t = recalc_tags; + for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) { + memset(t, 0, ic->tuple_size); + integrity_sector_checksum(ic, range.logical_sector + i, recalc_buffer + (i << SECTOR_SHIFT), t); + t += ic->tuple_size; + } + + bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_WRITE, GFP_NOIO, &ic->recalc_bios); + bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector; + __bio_add_page(bio, virt_to_page(recalc_buffer), range.n_sectors << SECTOR_SHIFT, offset_in_page(recalc_buffer)); + + bip = bio_integrity_alloc(bio, GFP_NOIO, 1); + if (unlikely(IS_ERR(bip))) { + bio_put(bio); + DMCRIT("out of memory for bio integrity payload - recalculation disabled"); + goto err; + } + ret = bio_integrity_add_page(bio, virt_to_page(recalc_tags), t - recalc_tags, offset_in_page(recalc_tags)); + if (unlikely(ret != t - recalc_tags)) { + bio_put(bio); + dm_integrity_io_error(ic, "attaching integrity tags", -ENOMEM); + goto err; + } + + r = submit_bio_wait(bio); + bio_put(bio); + if (unlikely(r)) { + dm_integrity_io_error(ic, "writing data", r); + goto err; + } + + cond_resched(); + spin_lock_irq(&ic->endio_wait.lock); + remove_range_unlocked(ic, &range); +#ifdef CONFIG_64BIT + /* Paired with smp_load_acquire in dm_integrity_map_inline. */ + smp_store_release(&ic->sb->recalc_sector, cpu_to_le64(range.logical_sector + range.n_sectors)); +#else + ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors); +#endif + goto next_chunk; + +err: + remove_range(ic, &range); + goto free_ret; + +unlock_ret: + spin_unlock_irq(&ic->endio_wait.lock); + + recalc_write_super(ic); + +free_ret: + kfree(recalc_buffer); + kfree(recalc_tags); +} + static void bitmap_block_work(struct work_struct *w) { struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work); @@ -4617,6 +4811,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv r = -ENOMEM; goto bad; } + r = bioset_init(&ic->recalc_bios, 1, 0, BIOSET_NEED_BVECS); + if (r) { + ti->error = "Cannot allocate bio set"; + goto bad; + } + r = bioset_integrity_create(&ic->recalc_bios, 1); + if (r) { + ti->error = "Cannot allocate bio integrity set"; + r = -ENOMEM; + goto bad; + } } ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", @@ -4833,7 +5038,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv r = -ENOMEM; goto bad; } - INIT_WORK(&ic->recalc_work, integrity_recalc); + INIT_WORK(&ic->recalc_work, ic->mode == 'I' ? integrity_recalc_inline : integrity_recalc); } else { if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { ti->error = "Recalculate can only be specified with internal_hash"; @@ -4850,17 +5055,15 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv goto bad; } - if (ic->mode != 'I') { - ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, - 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0); - if (IS_ERR(ic->bufio)) { - r = PTR_ERR(ic->bufio); - ti->error = "Cannot initialize dm-bufio"; - ic->bufio = NULL; - goto bad; - } - dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors); + ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, + 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0); + if (IS_ERR(ic->bufio)) { + r = PTR_ERR(ic->bufio); + ti->error = "Cannot initialize dm-bufio"; + ic->bufio = NULL; + goto bad; } + dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors); if (ic->mode != 'R' && ic->mode != 'I') { r = create_journal(ic, &ti->error); @@ -4982,6 +5185,7 @@ static void dm_integrity_dtr(struct dm_target *ti) kvfree(ic->bbs); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); + bioset_exit(&ic->recalc_bios); bioset_exit(&ic->recheck_bios); mempool_exit(&ic->recheck_pool); mempool_exit(&ic->journal_io_mempool); @@ -5036,7 +5240,7 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 12, 0}, + .version = {1, 13, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, From 4b30051c4864234ec57290c3d142db7c88f10d8a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Sep 2024 11:09:07 +0200 Subject: [PATCH 154/655] static_call: Handle module init failure correctly in static_call_del_module() Module insertion invokes static_call_add_module() to initialize the static calls in a module. static_call_add_module() invokes __static_call_init(), which allocates a struct static_call_mod to either encapsulate the built-in static call sites of the associated key into it so further modules can be added or to append the module to the module chain. If that allocation fails the function returns with an error code and the module core invokes static_call_del_module() to clean up eventually added static_call_mod entries. This works correctly, when all keys used by the module were converted over to a module chain before the failure. If not then static_call_del_module() causes a #GP as it blindly assumes that key::mods points to a valid struct static_call_mod. The problem is that key::mods is not a individual struct member of struct static_call_key, it's part of a union to save space: union { /* bit 0: 0 = mods, 1 = sites */ unsigned long type; struct static_call_mod *mods; struct static_call_site *sites; }; key::sites is a pointer to the list of built-in usage sites of the static call. The type of the pointer is differentiated by bit 0. A mods pointer has the bit clear, the sites pointer has the bit set. As static_call_del_module() blidly assumes that the pointer is a valid static_call_mod type, it fails to check for this failure case and dereferences the pointer to the list of built-in call sites, which is obviously bogus. Cure it by checking whether the key has a sites or a mods pointer. If it's a sites pointer then the key is not to be touched. As the sites are walked in the same order as in __static_call_init() the site walk can be terminated because all subsequent sites have not been touched by the init code due to the error exit. If it was converted before the allocation fail, then the inner loop which searches for a module match will find nothing. A fail in the second allocation in __static_call_init() is harmless and does not require special treatment. The first allocation succeeded and converted the key to a module chain. That first entry has mod::mod == NULL and mod::next == NULL, so the inner loop of static_call_del_module() will neither find a module match nor a module chain. The next site in the walk was either already converted, but can't match the module, or it will exit the outer loop because it has a static_call_site pointer and not a static_call_mod pointer. Fixes: 9183c3f9ed71 ("static_call: Add inline static call infrastructure") Closes: https://lore.kernel.org/all/20230915082126.4187913-1-ruanjinjie@huawei.com Reported-by: Jinjie Ruan Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Jinjie Ruan Link: https://lore.kernel.org/r/87zfon6b0s.ffs@tglx --- kernel/static_call_inline.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c index 639397b5491c..7bb0962b5229 100644 --- a/kernel/static_call_inline.c +++ b/kernel/static_call_inline.c @@ -411,6 +411,17 @@ static void static_call_del_module(struct module *mod) for (site = start; site < stop; site++) { key = static_call_key(site); + + /* + * If the key was not updated due to a memory allocation + * failure in __static_call_init() then treating key::sites + * as key::mods in the code below would cause random memory + * access and #GP. In that case all subsequent sites have + * not been touched either, so stop iterating. + */ + if (!static_call_key_has_mods(key)) + break; + if (key == prev_key) continue; From fe513c2ef0a172a58f158e2e70465c4317f0a9a2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Sep 2024 11:08:28 +0200 Subject: [PATCH 155/655] static_call: Replace pointless WARN_ON() in static_call_module_notify() static_call_module_notify() triggers a WARN_ON(), when memory allocation fails in __static_call_add_module(). That's not really justified, because the failure case must be correctly handled by the well known call chain and the error code is passed through to the initiating userspace application. A memory allocation fail is not a fatal problem, but the WARN_ON() takes the machine out when panic_on_warn is set. Replace it with a pr_warn(). Fixes: 9183c3f9ed71 ("static_call: Add inline static call infrastructure") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/8734mf7pmb.ffs@tglx --- kernel/static_call_inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c index 7bb0962b5229..5259cda486d0 100644 --- a/kernel/static_call_inline.c +++ b/kernel/static_call_inline.c @@ -453,7 +453,7 @@ static int static_call_module_notify(struct notifier_block *nb, case MODULE_STATE_COMING: ret = static_call_add_module(mod); if (ret) { - WARN(1, "Failed to allocate memory for static calls"); + pr_warn("Failed to allocate memory for static calls\n"); static_call_del_module(mod); } break; From 55e268694e8b07026c88191f9b6949b6887d9ce3 Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Wed, 28 Aug 2024 16:42:28 +0800 Subject: [PATCH 156/655] cxl/pci: Fix to record only non-zero ranges The function cxl_dvsec_rr_decode() retrieves and records DVSEC ranges into info->dvsec_range[], regardless of whether it is non-zero range, and the variable info->ranges indicates the number of non-zero ranges. However, in cxl_hdm_decode_init(), the validation for info->dvsec_range[] occurs in a for loop that iterates based on info->ranges. It may result in zero range to be validated but non-zero range not be validated, in turn, the number of allowed ranges is to be 0. Address it by only record non-zero ranges. This fix is not urgent as it requires a configuration that zeroes out the first dvsec range while populating the second. This has not been observed, but it is theoretically possible. If this gets picked up for -stable, no harm done, but there is no urgency to backport. Fixes: 560f78559006 ("cxl/pci: Retrieve CXL DVSEC memory info") Reviewed-by: Jonathan Cameron Signed-off-by: Yanfei Xu Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20240828084231.1378789-2-yanfei.xu@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 5e5c24eed545..075a2b2d2ea0 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -390,10 +390,6 @@ int cxl_dvsec_rr_decode(struct device *dev, int d, size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; if (!size) { - info->dvsec_range[i] = (struct range) { - .start = 0, - .end = CXL_RESOURCE_NONE, - }; continue; } @@ -411,12 +407,10 @@ int cxl_dvsec_rr_decode(struct device *dev, int d, base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; - info->dvsec_range[i] = (struct range) { + info->dvsec_range[ranges++] = (struct range) { .start = base, .end = base + size - 1 }; - - ranges++; } info->ranges = ranges; From 5c6e3d5a5da118be2ae074bd70d111994147c708 Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Wed, 28 Aug 2024 16:42:29 +0800 Subject: [PATCH 157/655] cxl/pci: Remove duplicated implementation of waiting for memory_info_valid commit ce17ad0d5498 ("cxl: Wait Memory_Info_Valid before access memory related info") added another implementation, which is cxl_dvsec_mem_range_valid(), of waiting for memory_info_valid without realizing it duplicated wait_for_valid(). Remove wait_for_valid() and retain cxl_dvsec_mem_range_valid() as the former is hardcoded to check only the Memory_Info_Valid bit of DVSEC range 1, while the latter allows for selection between DVSEC range 1 or 2 via parameter. Suggested-by: Dan Williams Reviewed-by: Jonathan Cameron Signed-off-by: Yanfei Xu Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20240828084231.1378789-3-yanfei.xu@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 41 +++++------------------------------ drivers/cxl/cxl.h | 2 +- drivers/cxl/port.c | 2 +- tools/testing/cxl/test/mock.c | 4 ++-- 4 files changed, 9 insertions(+), 40 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 075a2b2d2ea0..37e537e50b34 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -211,37 +211,6 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL); -static int wait_for_valid(struct pci_dev *pdev, int d) -{ - u32 val; - int rc; - - /* - * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high - * and Size Low registers are valid. Must be set within 1 second of - * deassertion of reset to CXL device. Likely it is already set by the - * time this runs, but otherwise give a 1.5 second timeout in case of - * clock skew. - */ - rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); - if (rc) - return rc; - - if (val & CXL_DVSEC_MEM_INFO_VALID) - return 0; - - msleep(1500); - - rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); - if (rc) - return rc; - - if (val & CXL_DVSEC_MEM_INFO_VALID) - return 0; - - return -ETIMEDOUT; -} - static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); @@ -322,11 +291,13 @@ static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm) return devm_add_action_or_reset(host, disable_hdm, cxlhdm); } -int cxl_dvsec_rr_decode(struct device *dev, int d, +int cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, struct cxl_endpoint_dvsec_info *info) { struct pci_dev *pdev = to_pci_dev(dev); + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); int hdm_count, rc, i, ranges = 0; + int d = cxlds->cxl_dvsec; u16 cap, ctrl; if (!d) { @@ -353,11 +324,9 @@ int cxl_dvsec_rr_decode(struct device *dev, int d, if (!hdm_count || hdm_count > 2) return -EINVAL; - rc = wait_for_valid(pdev, d); - if (rc) { - dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc); + rc = cxl_dvsec_mem_range_valid(cxlds, 0); + if (rc) return rc; - } /* * The current DVSEC values are moot if the memory capability is diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 2890f7e2aad5..0fc96f8bf15c 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -811,7 +811,7 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info); int devm_cxl_add_passthrough_decoder(struct cxl_port *port); -int cxl_dvsec_rr_decode(struct device *dev, int dvsec, +int cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, struct cxl_endpoint_dvsec_info *info); bool is_cxl_region(struct device *dev); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index d7d5d982ce69..861dde65768f 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -98,7 +98,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) struct cxl_port *root; int rc; - rc = cxl_dvsec_rr_decode(cxlds->dev, cxlds->cxl_dvsec, &info); + rc = cxl_dvsec_rr_decode(cxlds->dev, port, &info); if (rc < 0) return rc; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index bbd7d938156d..f4ce96cc11d4 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -228,7 +228,7 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL); -int __wrap_cxl_dvsec_rr_decode(struct device *dev, int dvsec, +int __wrap_cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, struct cxl_endpoint_dvsec_info *info) { int rc = 0, index; @@ -237,7 +237,7 @@ int __wrap_cxl_dvsec_rr_decode(struct device *dev, int dvsec, if (ops && ops->is_mock_dev(dev)) rc = 0; else - rc = cxl_dvsec_rr_decode(dev, dvsec, info); + rc = cxl_dvsec_rr_decode(dev, port, info); put_cxl_mock_ops(index); return rc; From 99bf0eebc75c9085440d6dca014724e7e49b5116 Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Wed, 28 Aug 2024 16:42:30 +0800 Subject: [PATCH 158/655] cxl/pci: Check Mem_info_valid bit for each applicable DVSEC In theory a device might set the mem_info_valid bit for a first range after it is ready but before as second range has reached that state. Therefore, the correct approach is to check the Mem_info_valid bit for each applicable DVSEC range against HDM_COUNT, rather than only for the DVSEC range 1. Consequently, let's move the check into the "for loop" that handles each DVSEC range. Reviewed-by: Jonathan Cameron Signed-off-by: Yanfei Xu Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20240828084231.1378789-4-yanfei.xu@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 37e537e50b34..043775f3f9a5 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -324,10 +324,6 @@ int cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, if (!hdm_count || hdm_count > 2) return -EINVAL; - rc = cxl_dvsec_mem_range_valid(cxlds, 0); - if (rc) - return rc; - /* * The current DVSEC values are moot if the memory capability is * disabled, and they will remain moot after the HDM Decoder @@ -345,6 +341,10 @@ int cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, u64 base, size; u32 temp; + rc = cxl_dvsec_mem_range_valid(cxlds, i); + if (rc) + return rc; + rc = pci_read_config_dword( pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); if (rc) From 3f9e07531778ce66e0100d93f482e9a299d10d8d Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Wed, 28 Aug 2024 16:42:31 +0800 Subject: [PATCH 159/655] cxl/pci: simplify the check of mem_enabled in cxl_hdm_decode_init() Cases can be divided into two categories which are DVSEC range enabled and not enabled when HDM decoders exist but is not enabled. To avoid checking info->mem_enabled, which indicates the enablement of DVSEC range, every time, we can check !info->mem_enabled once in advance. This simplification can make the code clearer. No functional change intended. Reviewed-by: Jonathan Cameron Signed-off-by: Yanfei Xu Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20240828084231.1378789-5-yanfei.xu@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 043775f3f9a5..9d396ef0aeed 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -426,7 +426,15 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, return -ENODEV; } - for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) { + if (!info->mem_enabled) { + rc = devm_cxl_enable_hdm(&port->dev, cxlhdm); + if (rc) + return rc; + + return devm_cxl_enable_mem(&port->dev, cxlds); + } + + for (i = 0, allowed = 0; i < info->ranges; i++) { struct device *cxld_dev; cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i], @@ -440,7 +448,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, allowed++; } - if (!allowed && info->mem_enabled) { + if (!allowed) { dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n"); return -ENXIO; } @@ -454,14 +462,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, * match. If at least one DVSEC range is enabled and allowed, skip HDM * Decoder Capability Enable. */ - if (info->mem_enabled) - return 0; - - rc = devm_cxl_enable_hdm(&port->dev, cxlhdm); - if (rc) - return rc; - - return devm_cxl_enable_mem(&port->dev, cxlds); + return 0; } EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL); From d9a476c837fab38856c6b6ff9f794c33907a9f81 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 4 Sep 2024 09:47:54 -0500 Subject: [PATCH 160/655] cxl/region: Remove lock from memory notifier callback In testing Dynamic Capacity Device (DCD) support, a lockdep splat revealed an ABBA issue between the memory notifiers and the DCD extent processing code.[0] Changing the lock ordering within DCD proved difficult because regions must be stable while searching for the proper region and then the device lock must be held to properly notify the DAX region driver of memory changes. Dan points out in the thread that notifiers should be able to trust that it is safe to access static data. Region data is static once the device is realized and until it's destruction. Thus it is better to manage the notifiers within the region driver. Remove the need for a lock by ensuring the notifiers are active only during the region's lifetime. Furthermore, remove cxl_region_nid() because resource can't be NULL while the region is stable. Link: https://lore.kernel.org/all/66b4cf539a79b_a36e829416@iweiny-mobl.notmuch/ [0] Cc: Ying Huang Suggested-by: Dan Williams Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Reviewed-by: Ying Huang Signed-off-by: Ira Weiny Link: https://patch.msgid.link/20240904-fix-notifiers-v3-1-576b4e950266@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 54 ++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 9a7c001eff1e..7bb79f3f318c 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2313,8 +2313,6 @@ static void unregister_region(void *_cxlr) struct cxl_region_params *p = &cxlr->params; int i; - unregister_memory_notifier(&cxlr->memory_notifier); - unregister_mt_adistance_algorithm(&cxlr->adist_notifier); device_del(&cxlr->dev); /* @@ -2391,18 +2389,6 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) return true; } -static int cxl_region_nid(struct cxl_region *cxlr) -{ - struct cxl_region_params *p = &cxlr->params; - struct resource *res; - - guard(rwsem_read)(&cxl_region_rwsem); - res = p->res; - if (!res) - return NUMA_NO_NODE; - return phys_to_target_node(res->start); -} - static int cxl_region_perf_attrs_callback(struct notifier_block *nb, unsigned long action, void *arg) { @@ -2415,7 +2401,11 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, if (nid == NUMA_NO_NODE || action != MEM_ONLINE) return NOTIFY_DONE; - region_nid = cxl_region_nid(cxlr); + /* + * No need to hold cxl_region_rwsem; region parameters are stable + * within the cxl_region driver. + */ + region_nid = phys_to_target_node(cxlr->params.res->start); if (nid != region_nid) return NOTIFY_DONE; @@ -2434,7 +2424,11 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb, int *adist = data; int region_nid; - region_nid = cxl_region_nid(cxlr); + /* + * No need to hold cxl_region_rwsem; region parameters are stable + * within the cxl_region driver. + */ + region_nid = phys_to_target_node(cxlr->params.res->start); if (nid != region_nid) return NOTIFY_OK; @@ -2484,14 +2478,6 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, if (rc) goto err; - cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback; - cxlr->memory_notifier.priority = CXL_CALLBACK_PRI; - register_memory_notifier(&cxlr->memory_notifier); - - cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance; - cxlr->adist_notifier.priority = 100; - register_mt_adistance_algorithm(&cxlr->adist_notifier); - rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr); if (rc) return ERR_PTR(rc); @@ -3387,6 +3373,14 @@ static int is_system_ram(struct resource *res, void *arg) return 1; } +static void shutdown_notifiers(void *_cxlr) +{ + struct cxl_region *cxlr = _cxlr; + + unregister_memory_notifier(&cxlr->memory_notifier); + unregister_mt_adistance_algorithm(&cxlr->adist_notifier); +} + static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); @@ -3419,6 +3413,18 @@ static int cxl_region_probe(struct device *dev) out: up_read(&cxl_region_rwsem); + if (rc) + return rc; + + cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback; + cxlr->memory_notifier.priority = CXL_CALLBACK_PRI; + register_memory_notifier(&cxlr->memory_notifier); + + cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance; + cxlr->adist_notifier.priority = 100; + register_mt_adistance_algorithm(&cxlr->adist_notifier); + + rc = devm_add_action_or_reset(&cxlr->dev, shutdown_notifiers, cxlr); if (rc) return rc; From addf89774e48c992316449ffab4f29c2309ebefb Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 9 Sep 2024 21:17:40 +0800 Subject: [PATCH 161/655] ieee802154: Fix build error If REGMAP_SPI is m and IEEE802154_MCR20A is y, mcr20a.c:(.text+0x3ed6c5b): undefined reference to `__devm_regmap_init_spi' ld: mcr20a.c:(.text+0x3ed6cb5): undefined reference to `__devm_regmap_init_spi' Select REGMAP_SPI for IEEE802154_MCR20A to fix it. Fixes: 8c6ad9cc5157 ("ieee802154: Add NXP MCR20A IEEE 802.15.4 transceiver driver") Signed-off-by: Jinjie Ruan Link: https://lore.kernel.org/20240909131740.1296608-1-ruanjinjie@huawei.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ieee802154/Kconfig b/drivers/net/ieee802154/Kconfig index 95da876c5613..1075e24b11de 100644 --- a/drivers/net/ieee802154/Kconfig +++ b/drivers/net/ieee802154/Kconfig @@ -101,6 +101,7 @@ config IEEE802154_CA8210_DEBUGFS config IEEE802154_MCR20A tristate "MCR20A transceiver driver" + select REGMAP_SPI depends on IEEE802154_DRIVERS && MAC802154 depends on SPI help From 40a895fd9a358eea16901026360bd2cd3ca691a7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 5 Sep 2024 15:35:45 -0700 Subject: [PATCH 162/655] cxl: move cxl headers to new include/cxl/ directory Group all cxl related kernel headers into include/cxl/ directory. Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20240905223711.1990186-2-dave.jiang@intel.com Signed-off-by: Dave Jiang --- MAINTAINERS | 3 +-- drivers/acpi/apei/einj-cxl.c | 2 +- drivers/acpi/apei/ghes.c | 2 +- drivers/cxl/core/port.c | 2 +- drivers/cxl/cxlmem.h | 2 +- include/{linux/einj-cxl.h => cxl/einj.h} | 0 include/{linux/cxl-event.h => cxl/event.h} | 0 7 files changed, 5 insertions(+), 6 deletions(-) rename include/{linux/einj-cxl.h => cxl/einj.h} (100%) rename include/{linux/cxl-event.h => cxl/event.h} (100%) diff --git a/MAINTAINERS b/MAINTAINERS index fe83ba7194ea..df59354af438 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5620,8 +5620,7 @@ L: linux-cxl@vger.kernel.org S: Maintained F: Documentation/driver-api/cxl F: drivers/cxl/ -F: include/linux/einj-cxl.h -F: include/linux/cxl-event.h +F: include/cxl/ F: include/uapi/linux/cxl_mem.h F: tools/testing/cxl/ diff --git a/drivers/acpi/apei/einj-cxl.c b/drivers/acpi/apei/einj-cxl.c index 8b8be0c90709..4f81a119ec08 100644 --- a/drivers/acpi/apei/einj-cxl.c +++ b/drivers/acpi/apei/einj-cxl.c @@ -7,9 +7,9 @@ * * Author: Ben Cheatham */ -#include #include #include +#include #include "apei-internal.h" diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 623cc0cb4a65..ada93cfde9ba 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -50,6 +49,7 @@ #include #include #include +#include #include #include "apei-internal.h" diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 7ca57285a7b7..a5e6f3d23cfb 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -11,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index afb53d058d62..a81a8982bf93 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -6,8 +6,8 @@ #include #include #include -#include #include +#include #include "cxl.h" /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */ diff --git a/include/linux/einj-cxl.h b/include/cxl/einj.h similarity index 100% rename from include/linux/einj-cxl.h rename to include/cxl/einj.h diff --git a/include/linux/cxl-event.h b/include/cxl/event.h similarity index 100% rename from include/linux/cxl-event.h rename to include/cxl/event.h From 3f6821aa147b6e6fe07e8b35999724518b74a632 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 6 Sep 2024 09:13:37 -0700 Subject: [PATCH 163/655] KVM: x86: Forcibly leave nested if RSM to L2 hits shutdown Leave nested mode before synthesizing shutdown (a.k.a. TRIPLE_FAULT) if RSM fails when resuming L2 (a.k.a. guest mode). Architecturally, shutdown on RSM occurs _before_ the transition back to guest mode on both Intel and AMD. On Intel, per the SDM pseudocode, SMRAM state is loaded before critical VMX state: restore state normally from SMRAM; ... CR4.VMXE := value stored internally; IF internal storage indicates that the logical processor had been in VMX operation (root or non-root) THEN enter VMX operation (root or non-root); restore VMX-critical state as defined in Section 32.14.1; ... restore current VMCS pointer; FI; AMD's APM is both less clearcut and more explicit. Because AMD CPUs save VMCB and guest state in SMRAM itself, given the lack of anything in the APM to indicate a shutdown in guest mode is possible, a straightforward reading of the clause on invalid state is that _what_ state is invalid is irrelevant, i.e. all roads lead to shutdown. An RSM causes a processor shutdown if an invalid-state condition is found in the SMRAM state-save area. This fixes a bug found by syzkaller where synthesizing shutdown for L2 led to a nested VM-Exit (if L1 is intercepting shutdown), which in turn caused KVM to complain about trying to cancel a nested VM-Enter (see commit 759cbd59674a ("KVM: x86: nSVM/nVMX: set nested_run_pending on VM entry which is a result of RSM"). Note, Paolo pointed out that KVM shouldn't set nested_run_pending until after loading SMRAM state. But as above, that's only half the story, KVM shouldn't transition to guest mode either. Unfortunately, fixing that mess requires rewriting the nVMX and nSVM RSM flows to not piggyback their nested VM-Enter flows, as executing the nested VM-Enter flows after loading state from SMRAM would clobber much of said state. For now, add a FIXME to call out that transitioning to guest mode before loading state from SMRAM is wrong. Link: https://lore.kernel.org/all/CABgObfYaUHXyRmsmg8UjRomnpQ0Jnaog9-L2gMjsjkqChjDYUQ@mail.gmail.com Reported-by: syzbot+988d9efcdf137bc05f66@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/0000000000007a9acb06151e1670@google.com Reported-by: Zheyu Ma Closes: https://lore.kernel.org/all/CAMhUBjmXMYsEoVYw_M8hSZjBMHh24i88QYm-RY6HDta5YZ7Wgw@mail.gmail.com Analyzed-by: Michal Wilczynski Cc: Kishen Maloor Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20240906161337.1118412-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/smm.c | 24 +++++++++++++++++++----- arch/x86/kvm/x86.c | 6 ------ arch/x86/kvm/x86.h | 6 ++++++ 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 00e3c27d2a87..85241c0c7f56 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -624,17 +624,31 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) #endif /* - * Give leave_smm() a chance to make ISA-specific changes to the vCPU - * state (e.g. enter guest mode) before loading state from the SMM - * state-save area. + * FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest + * mode should happen _after_ loading state from SMRAM. However, KVM + * piggybacks the nested VM-Enter flows (which is wrong for many other + * reasons), and so nSVM/nVMX would clobber state that is loaded from + * SMRAM and from the VMCS/VMCB. */ if (kvm_x86_call(leave_smm)(vcpu, &smram)) return X86EMUL_UNHANDLEABLE; #ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) - return rsm_load_state_64(ctxt, &smram.smram64); + ret = rsm_load_state_64(ctxt, &smram.smram64); else #endif - return rsm_load_state_32(ctxt, &smram.smram32); + ret = rsm_load_state_32(ctxt, &smram.smram32); + + /* + * If RSM fails and triggers shutdown, architecturally the shutdown + * occurs *before* the transition to guest mode. But due to KVM's + * flawed handling of RSM to L2 (see above), the vCPU may already be + * in_guest_mode(). Force the vCPU out of guest mode before delivering + * the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit + * that architecturally shouldn't be possible. + */ + if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu)) + kvm_leave_nested(vcpu); + return ret; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fa455a60b557..92fade53c79f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -833,12 +833,6 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto ex->payload = payload; } -/* Forcibly leave the nested mode in cases like a vCPU reset */ -static void kvm_leave_nested(struct kvm_vcpu *vcpu) -{ - kvm_x86_ops.nested_ops->leave_nested(vcpu); -} - static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error, u32 error_code, bool has_payload, unsigned long payload, bool reinject) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 516eb9e28752..121f5c19613e 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -108,6 +108,12 @@ static inline unsigned int __shrink_ple_window(unsigned int val, void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); int kvm_check_nested_events(struct kvm_vcpu *vcpu); +/* Forcibly leave the nested mode in cases like a vCPU reset */ +static inline void kvm_leave_nested(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops.nested_ops->leave_nested(vcpu); +} + static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu) { return vcpu->arch.last_vmentry_cpu != -1; From c32e028057f144f15c06e2f09dfec49b14311910 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:44:48 -0700 Subject: [PATCH 164/655] KVM: selftests: Verify single-stepping a fastpath VM-Exit exits to userspace In x86's debug_regs test, change the RDMSR(MISC_ENABLES) in the single-step testcase to a WRMSR(TSC_DEADLINE) in order to verify that KVM honors KVM_GUESTDBG_SINGLESTEP when handling a fastpath VM-Exit. Note, the extra coverage is effectively Intel-only, as KVM only handles TSC_DEADLINE in the fastpath when the timer is emulated via the hypervisor timer, a.k.a. the VMX preemption timer. Link: https://lore.kernel.org/r/20240830044448.130449-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86_64/debug_regs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index f6b295e0b2d2..76cc2df9238a 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -47,15 +47,18 @@ static void guest_code(void) /* * Single step test, covers 2 basic instructions and 2 emulated * - * Enable interrupts during the single stepping to see that - * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ + * Enable interrupts during the single stepping to see that pending + * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ. + * + * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler + * exits to userspace due to single-step being enabled. */ asm volatile("ss_start: " "sti\n\t" "xor %%eax,%%eax\n\t" "cpuid\n\t" - "movl $0x1a0,%%ecx\n\t" - "rdmsr\n\t" + "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t" + "wrmsr\n\t" "cli\n\t" : : : "eax", "ebx", "ecx", "edx"); From 4ca077f26d885cbc97e742a5f3572aac244a0f8a Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 30 Aug 2024 10:25:37 +0800 Subject: [PATCH 165/655] KVM: x86: Remove some unused declarations Commit 238adc77051a ("KVM: Cleanup LAPIC interface") removed kvm_lapic_get_base() but leave declaration. And other two declarations were never implenmented since introduction. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20240830022537.2403873-1-yuehaibing@huawei.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.h | 1 - arch/x86/kvm/mmu.h | 2 -- arch/x86/kvm/mmu/mmu_internal.h | 2 -- 3 files changed, 5 deletions(-) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 7ef8ae73e82d..7c95eedd771e 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -96,7 +96,6 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); -u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); void kvm_recalculate_apic_map(struct kvm *kvm); void kvm_apic_set_version(struct kvm_vcpu *vcpu); void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 4341e0e28571..9dc5dd43ae7f 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -223,8 +223,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, bool kvm_mmu_may_ignore_guest_pat(void); -int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); - int kvm_mmu_post_init_vm(struct kvm *kvm); void kvm_mmu_pre_destroy_vm(struct kvm *kvm); diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 1721d97743e9..1469a1d9782d 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -349,8 +349,6 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm, void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level); -void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); - void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); From 7efb4d8a392a18e37fcdb5e77c111af6e9a9e2f2 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 6 Sep 2024 00:08:37 +1200 Subject: [PATCH 166/655] KVM: VMX: Also clear SGX EDECCSSA in KVM CPU caps when SGX is disabled When SGX EDECCSSA support was added to KVM in commit 16a7fe3728a8 ("KVM/VMX: Allow exposing EDECCSSA user leaf function to KVM guest"), it forgot to clear the X86_FEATURE_SGX_EDECCSSA bit in KVM CPU caps when KVM SGX is disabled. Fix it. Fixes: 16a7fe3728a8 ("KVM/VMX: Allow exposing EDECCSSA user leaf function to KVM guest") Signed-off-by: Kai Huang Link: https://lore.kernel.org/r/20240905120837.579102-1-kai.huang@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/vmx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 594db9afbc0f..98d737e0d416 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7963,6 +7963,7 @@ static __init void vmx_set_cpu_caps(void) kvm_cpu_cap_clear(X86_FEATURE_SGX_LC); kvm_cpu_cap_clear(X86_FEATURE_SGX1); kvm_cpu_cap_clear(X86_FEATURE_SGX2); + kvm_cpu_cap_clear(X86_FEATURE_SGX_EDECCSSA); } if (vmx_umip_emulated()) From a194a3a13ce0b4cce4b52f328405891ef3a85cb9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 5 Sep 2024 21:34:07 -0700 Subject: [PATCH 167/655] KVM: x86: Move "ack" phase of local APIC IRQ delivery to separate API Split the "ack" phase, i.e. the movement of an interrupt from IRR=>ISR, out of kvm_get_apic_interrupt() and into a separate API so that nested VMX can acknowledge a specific interrupt _after_ emulating a VM-Exit from L2 to L1. To correctly emulate nested posted interrupts while APICv is active, KVM must: 1. find the highest pending interrupt. 2. check if that IRQ is L2's notification vector 3. emulate VM-Exit if the IRQ is NOT the notification vector 4. ACK the IRQ in L1 _after_ VM-Exit When APICv is active, the process of moving the IRQ from the IRR to the ISR also requires a VMWRITE to update vmcs01.GUEST_INTERRUPT_STATUS.SVI, and so acknowledging the interrupt before switching to vmcs01 would result in marking the IRQ as in-service in the wrong VMCS. KVM currently fudges around this issue by doing kvm_get_apic_interrupt() smack dab in the middle of emulating VM-Exit, but that hack doesn't play nice with nested posted interrupts, as notification vector IRQs don't trigger a VM-Exit in the first place. Cc: Nathan Chancellor Link: https://lore.kernel.org/r/20240906043413.1049633-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.c | 17 +++++++++++++---- arch/x86/kvm/lapic.h | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5bb481aefcbc..c1b2a615acfd 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2922,14 +2922,13 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) } } -int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) +void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector) { - int vector = kvm_apic_has_interrupt(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (vector == -1) - return -1; + if (WARN_ON_ONCE(vector < 0 || !apic)) + return; /* * We get here even with APIC virtualization enabled, if doing @@ -2957,6 +2956,16 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) __apic_update_ppr(apic, &ppr); } +} +EXPORT_SYMBOL_GPL(kvm_apic_ack_interrupt); + +int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) +{ + int vector = kvm_apic_has_interrupt(vcpu); + + if (vector != -1) + kvm_apic_ack_interrupt(vcpu, vector); + return vector; } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 7ef8ae73e82d..db80a2965b9c 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -88,6 +88,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu); void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); +void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); int kvm_apic_accept_events(struct kvm_vcpu *vcpu); From 363010e1dd0efd4778637c1a5a5aaffbcfcae919 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 5 Sep 2024 21:34:08 -0700 Subject: [PATCH 168/655] KVM: nVMX: Get to-be-acknowledge IRQ for nested VM-Exit at injection site Move the logic to get the to-be-acknowledge IRQ for a nested VM-Exit from nested_vmx_vmexit() to vmx_check_nested_events(), which is subtly the one and only path where KVM invokes nested_vmx_vmexit() with EXIT_REASON_EXTERNAL_INTERRUPT. A future fix will perform a last-minute check on L2's nested posted interrupt notification vector, just before injecting a nested VM-Exit. To handle that scenario correctly, KVM needs to get the interrupt _before_ injecting VM-Exit, as simply querying the highest priority interrupt, via kvm_cpu_has_interrupt(), would result in TOCTOU bug, as a new, higher priority interrupt could arrive between kvm_cpu_has_interrupt() and kvm_cpu_get_interrupt(). Unfortunately, simply moving the call to kvm_cpu_get_interrupt() doesn't suffice, as a VMWRITE to GUEST_INTERRUPT_STATUS.SVI is hiding in kvm_get_apic_interrupt(), and acknowledging the interrupt before nested VM-Exit would cause the VMWRITE to hit vmcs02 instead of vmcs01. Open code a rough equivalent to kvm_cpu_get_interrupt() so that the IRQ is acknowledged after emulating VM-Exit, taking care to avoid the TOCTOU issue described above. Opportunistically convert the WARN_ON() to a WARN_ON_ONCE(). If KVM has a bug that results in a false positive from kvm_cpu_has_interrupt(), spamming dmesg won't help the situation. Note, nested_vmx_reflect_vmexit() can never reflect external interrupts as they are always "wanted" by L0. Link: https://lore.kernel.org/r/20240906043413.1049633-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/irq.c | 3 ++- arch/x86/kvm/vmx/nested.c | 36 ++++++++++++++++++++++++--------- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4a93ac1b9be9..aa31c4b94977 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2256,6 +2256,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); +int kvm_cpu_get_extint(struct kvm_vcpu *v); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 3d7eb11d0e45..810da99ff7ed 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -108,7 +108,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); * Read pending interrupt(from non-APIC source) * vector and intack. */ -static int kvm_cpu_get_extint(struct kvm_vcpu *v) +int kvm_cpu_get_extint(struct kvm_vcpu *v) { if (!kvm_cpu_has_extint(v)) { WARN_ON(!lapic_in_kernel(v)); @@ -131,6 +131,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v) } else return kvm_pic_read_irq(v->kvm); /* PIC */ } +EXPORT_SYMBOL_GPL(kvm_cpu_get_extint); /* * Read pending interrupt vector and intack. diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 867de342df33..1a6dc85cde18 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4285,11 +4285,37 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) } if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) { + int irq; + if (block_nested_events) return -EBUSY; if (!nested_exit_on_intr(vcpu)) goto no_vmexit; - nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); + + if (!nested_exit_intr_ack_set(vcpu)) { + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); + return 0; + } + + irq = kvm_cpu_get_extint(vcpu); + if (irq != -1) { + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, + INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0); + return 0; + } + + irq = kvm_apic_has_interrupt(vcpu); + WARN_ON_ONCE(irq < 0); + + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, + INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0); + + /* + * ACK the interrupt _after_ emulating VM-Exit, as the IRQ must + * be marked as in-service in vmcs01.GUEST_INTERRUPT_STATUS.SVI + * if APICv is active. + */ + kvm_apic_ack_interrupt(vcpu, irq); return 0; } @@ -4970,14 +4996,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; if (likely(!vmx->fail)) { - if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && - nested_exit_intr_ack_set(vcpu)) { - int irq = kvm_cpu_get_interrupt(vcpu); - WARN_ON(irq < 0); - vmcs12->vm_exit_intr_info = irq | - INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR; - } - if (vm_exit_reason != -1) trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, vmcs12->exit_qualification, From 8c23670f2b0004edb8f7135e314114f0c3452085 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 5 Sep 2024 21:34:09 -0700 Subject: [PATCH 169/655] KVM: nVMX: Suppress external interrupt VM-Exit injection if there's no IRQ In the should-be-impossible scenario that kvm_cpu_get_interrupt() doesn't return a valid vector after checking kvm_cpu_has_interrupt(), skip VM-Exit injection to reduce the probability of crashing/confusing L1. Now that KVM gets the IRQ _before_ calling nested_vmx_vmexit(), squashing the VM-Exit injection is trivial since there are no actions that need to be undone. Reviewed-by: Chao Gao Link: https://lore.kernel.org/r/20240906043413.1049633-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1a6dc85cde18..e57cd849354b 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4305,7 +4305,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) } irq = kvm_apic_has_interrupt(vcpu); - WARN_ON_ONCE(irq < 0); + if (WARN_ON_ONCE(irq < 0)) + goto no_vmexit; nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0); From 6e0b456547f41bafdad2dc3a72adc7f69326ca4c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 5 Sep 2024 21:34:10 -0700 Subject: [PATCH 170/655] KVM: nVMX: Detect nested posted interrupt NV at nested VM-Exit injection When synthensizing a nested VM-Exit due to an external interrupt, pend a nested posted interrupt if the external interrupt vector matches L2's PI notification vector, i.e. if the interrupt is a PI notification for L2. This fixes a bug where KVM will incorrectly inject VM-Exit instead of processing nested posted interrupt when IPI virtualization is enabled. Per the SDM, detection of the notification vector doesn't occur until the interrupt is acknowledge and deliver to the CPU core. If the external-interrupt exiting VM-execution control is 1, any unmasked external interrupt causes a VM exit (see Section 26.2). If the "process posted interrupts" VM-execution control is also 1, this behavior is changed and the processor handles an external interrupt as follows: 1. The local APIC is acknowledged; this provides the processor core with an interrupt vector, called here the physical vector. 2. If the physical vector equals the posted-interrupt notification vector, the logical processor continues to the next step. Otherwise, a VM exit occurs as it would normally due to an external interrupt; the vector is saved in the VM-exit interruption-information field. For the most part, KVM has avoided problems because a PI NV for L2 that arrives will L2 is active will be processed by hardware, and KVM checks for a pending notification vector during nested VM-Enter. Thus, to hit the bug, the PI NV interrupt needs to sneak its way into L1's vIRR while L2 is active. Without IPI virtualization, the scenario is practically impossible to hit, modulo L1 doing weird things (see below), as the ordering between vmx_deliver_posted_interrupt() and nested VM-Enter effectively guarantees that either the sender will see the vCPU as being in_guest_mode(), or the receiver will see the interrupt in its vIRR. With IPI virtualization, introduced by commit d588bb9be1da ("KVM: VMX: enable IPI virtualization"), the sending CPU effectively implements a rough equivalent of vmx_deliver_posted_interrupt(), sans the nested PI NV check. If the target vCPU has a valid PID, the CPU will send a PI NV interrupt based on _L1's_ PID, as the sender's because IPIv table points at L1 PIDs. PIR := 32 bytes at PID_ADDR; // under lock PIR[V] := 1; store PIR at PID_ADDR; // release lock NotifyInfo := 8 bytes at PID_ADDR + 32; // under lock IF NotifyInfo.ON = 0 AND NotifyInfo.SN = 0; THEN NotifyInfo.ON := 1; SendNotify := 1; ELSE SendNotify := 0; FI; store NotifyInfo at PID_ADDR + 32; // release lock IF SendNotify = 1; THEN send an IPI specified by NotifyInfo.NDST and NotifyInfo.NV; FI; As a result, the target vCPU ends up receiving an interrupt on KVM's POSTED_INTR_VECTOR while L2 is running, with an interrupt in L1's PIR for L2's nested PI NV. The POSTED_INTR_VECTOR interrupt triggers a VM-Exit from L2 to L0, KVM moves the interrupt from L1's PIR to vIRR, triggers a KVM_REQ_EVENT prior to re-entry to L2, and calls vmx_check_nested_events(), effectively bypassing all of KVM's "early" checks on nested PI NV. Without IPI virtualization, the bug can likely be hit only if L1 programs an assigned device to _post_ an interrupt to L2's notification vector, by way of L1's PID.PIR. Doing so would allow the interrupt to get into L1's vIRR without KVM checking vmcs12's NV. Which is architecturally allowed, but unlikely behavior for a hypervisor. Cc: Zeng Guang Reviewed-by: Chao Gao Link: https://lore.kernel.org/r/20240906043413.1049633-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e57cd849354b..02d6a3a1e80f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4308,6 +4308,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) if (WARN_ON_ONCE(irq < 0)) goto no_vmexit; + /* + * If the IRQ is L2's PI notification vector, process posted + * interrupts for L2 instead of injecting VM-Exit, as the + * detection/morphing architecturally occurs when the IRQ is + * delivered to the CPU. Note, only interrupts that are routed + * through the local APIC trigger posted interrupt processing, + * and enabling posted interrupts requires ACK-on-exit. + */ + if (irq == vmx->nested.posted_intr_nv) { + vmx->nested.pi_pending = true; + kvm_apic_clear_irr(vcpu, irq); + goto no_vmexit; + } + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0); From aa9477966aabc344ae83555002bd31809f6a9546 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 5 Sep 2024 21:34:11 -0700 Subject: [PATCH 171/655] KVM: x86: Fold kvm_get_apic_interrupt() into kvm_cpu_get_interrupt() Fold kvm_get_apic_interrupt() into kvm_cpu_get_interrupt() now that nVMX essentially open codes kvm_get_apic_interrupt() in order to correctly emulate nested posted interrupts. Opportunistically stop exporting kvm_cpu_get_interrupt(), as the aforementioned nVMX flow was the only user in vendor code. Link: https://lore.kernel.org/r/20240906043413.1049633-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/irq.c | 7 +++++-- arch/x86/kvm/lapic.c | 10 ---------- arch/x86/kvm/lapic.h | 1 - 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 810da99ff7ed..63f66c51975a 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -142,9 +142,12 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) if (vector != -1) return vector; /* PIC */ - return kvm_get_apic_interrupt(v); /* APIC */ + vector = kvm_apic_has_interrupt(v); /* APIC */ + if (vector != -1) + kvm_apic_ack_interrupt(v, vector); + + return vector; } -EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c1b2a615acfd..63e67b6301ec 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2959,16 +2959,6 @@ void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector) } EXPORT_SYMBOL_GPL(kvm_apic_ack_interrupt); -int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) -{ - int vector = kvm_apic_has_interrupt(vcpu); - - if (vector != -1) - kvm_apic_ack_interrupt(vcpu, vector); - - return vector; -} - static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s, bool set) { diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index db80a2965b9c..8310ff74be29 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -90,7 +90,6 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); -int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); int kvm_apic_accept_events(struct kvm_vcpu *vcpu); void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); From 1ed0f119c5ff66bec663ba5507539ec4a4f33775 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 5 Sep 2024 21:34:12 -0700 Subject: [PATCH 172/655] KVM: nVMX: Explicitly invalidate posted_intr_nv if PI is disabled at VM-Enter Explicitly invalidate posted_intr_nv when emulating nested VM-Enter and posted interrupts are disabled to make it clear that posted_intr_nv is valid if and only if nested posted interrupts are enabled, and as a cheap way to harden against KVM bugs. KVM initializes posted_intr_nv to -1 at vCPU creation and resets it to -1 when unloading vmcs12 and/or leaving nested mode, i.e. this is not a bug fix (or at least, it's not intended to be a bug fix). Note, tracking nested.posted_intr_nv as a u16 subtly adds a measure of safety, as it prevents unintentionally matching KVM's informal "no IRQ" vector of -1, stored as a signed int. Because a u16 can be always be represented as a signed int, the effective "invalid" value of posted_intr_nv, 65535, will be preserved as-is when comparing against an int, i.e. will be zero-extended, not sign-extended, and thus won't get a false positive if KVM is buggy and compares posted_intr_nv against -1. Opportunistically add a comment in vmx_deliver_nested_posted_interrupt() to call out that it must check vmx->nested.posted_intr_nv, not the vector in vmcs12, which is presumably the _entire_ reason nested.posted_intr_nv exists. E.g. vmcs12 is a KVM-controlled snapshot, so there are no TOCTOU races to worry about, the only potential badness is if the vCPU leaves nested and frees vmcs12 between the sender checking is_guest_mode() and dereferencing the vmcs12 pointer. Link: https://lore.kernel.org/r/20240906043413.1049633-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 6 ++++-- arch/x86/kvm/vmx/vmx.c | 7 +++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 02d6a3a1e80f..fc3d2ba036f6 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2317,10 +2317,12 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 /* Posted interrupts setting is only taken from vmcs12. */ vmx->nested.pi_pending = false; - if (nested_cpu_has_posted_intr(vmcs12)) + if (nested_cpu_has_posted_intr(vmcs12)) { vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; - else + } else { + vmx->nested.posted_intr_nv = -1; exec_control &= ~PIN_BASED_POSTED_INTR; + } pin_controls_set(vmx, exec_control); /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 98d737e0d416..fe99deceebbd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4215,6 +4215,13 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); + /* + * DO NOT query the vCPU's vmcs12, as vmcs12 is dynamically allocated + * and freed, and must not be accessed outside of vcpu->mutex. The + * vCPU's cached PI NV is valid if and only if posted interrupts + * enabled in its vmcs12, i.e. checking the vector also checks that + * L1 has enabled posted interrupts for L2. + */ if (is_guest_mode(vcpu) && vector == vmx->nested.posted_intr_nv) { /* From 3dde46a21aa72a3640bf3f6ff5ce7838af06a1f9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 5 Sep 2024 21:34:13 -0700 Subject: [PATCH 173/655] KVM: nVMX: Assert that vcpu->mutex is held when accessing secondary VMCSes Add lockdep assertions in get_vmcs12() and get_shadow_vmcs12() to verify the vCPU's mutex is held, as the returned VMCS objects are dynamically allocated/freed when nested VMX is turned on/off, i.e. accessing vmcs12 structures without holding vcpu->mutex is susceptible to use-after-free. Waive the assertion if the VM is being destroyed, as KVM currently forces a nested VM-Exit when freeing the vCPU. If/when that wart is fixed, the assertion can/should be converted to an unqualified lockdep assertion. See also https://lore.kernel.org/all/Zsd0TqCeY3B5Sb5b@google.com. Link: https://lore.kernel.org/r/20240906043413.1049633-8-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index cce4e2aa30fb..668b6c83a373 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -39,11 +39,17 @@ bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) { + lockdep_assert_once(lockdep_is_held(&vcpu->mutex) || + !refcount_read(&vcpu->kvm->users_count)); + return to_vmx(vcpu)->nested.cached_vmcs12; } static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu) { + lockdep_assert_once(lockdep_is_held(&vcpu->mutex) || + !refcount_read(&vcpu->kvm->users_count)); + return to_vmx(vcpu)->nested.cached_shadow_vmcs12; } From ec495f2ab12290b008a691e826b39b895f458945 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 12:14:12 -0700 Subject: [PATCH 174/655] KVM: Write the per-page "segment" when clearing (part of) a guest page Pass "seg" instead of "len" when writing guest memory in kvm_clear_guest(), as "seg" holds the number of bytes to write for the current page, while "len" holds the total bytes remaining. Luckily, all users of kvm_clear_guest() are guaranteed to not cross a page boundary, and so the bug is unhittable in the current code base. Fixes: 2f5414423ef5 ("KVM: remove kvm_clear_guest_page") Reported-by: zyr_ms@outlook.com Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219104 Link: https://lore.kernel.org/r/20240829191413.900740-2-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cb2b78e92910..04011b94edec 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3581,7 +3581,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) int ret; while ((seg = next_segment(len, offset)) != 0) { - ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, len); + ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, seg); if (ret < 0) return ret; offset = 0; From 025dde582bbf31e7618f9283594ef5e2408e384b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 12:14:13 -0700 Subject: [PATCH 175/655] KVM: Harden guest memory APIs against out-of-bounds accesses When reading or writing a guest page, WARN and bail if offset+len would result in a read to a different page so that KVM bugs are more likely to be detected, and so that any such bugs are less likely to escalate to an out-of-bounds access. E.g. if userspace isn't using guard pages and the target page is at the end of a memslot. Note, KVM already hardens itself in similar APIs, e.g. in the "cached" variants, it's just the vanilla APIs that are playing with fire. Link: https://lore.kernel.org/r/20240829191413.900740-3-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/kvm_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 04011b94edec..d51357fd28d7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3275,6 +3275,9 @@ static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn, int r; unsigned long addr; + if (WARN_ON_ONCE(offset + len > PAGE_SIZE)) + return -EFAULT; + addr = gfn_to_hva_memslot_prot(slot, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; @@ -3348,6 +3351,9 @@ static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn, int r; unsigned long addr; + if (WARN_ON_ONCE(offset + len > PAGE_SIZE)) + return -EFAULT; + addr = gfn_to_hva_memslot_prot(slot, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; @@ -3378,6 +3384,9 @@ static int __kvm_write_guest_page(struct kvm *kvm, int r; unsigned long addr; + if (WARN_ON_ONCE(offset + len > PAGE_SIZE)) + return -EFAULT; + addr = gfn_to_hva_memslot(memslot, gfn); if (kvm_is_error_hva(addr)) return -EFAULT; From 4ececec19a0914873634ad69bbaca5557c33e855 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:17 -0700 Subject: [PATCH 176/655] KVM: x86/mmu: Replace PFERR_NESTED_GUEST_PAGE with a more descriptive helper Drop the globally visible PFERR_NESTED_GUEST_PAGE and replace it with a more appropriately named is_write_to_guest_page_table(). The macro name is misleading, because while all nNPT walks match PAGE|WRITE|PRESENT, the reverse is not true. No functional change intended. Link: https://lore.kernel.org/r/20240831001538.336683-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 4 ---- arch/x86/kvm/mmu/mmu.c | 9 ++++++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4a68cb3eba78..797433994d56 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -280,10 +280,6 @@ enum x86_intercept_stage; #define PFERR_PRIVATE_ACCESS BIT_ULL(49) #define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS) -#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ - PFERR_WRITE_MASK | \ - PFERR_PRESENT_MASK) - /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 /* diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5226bb055d99..1eedd1932e5f 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5945,6 +5945,13 @@ void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, write_unlock(&vcpu->kvm->mmu_lock); } +static bool is_write_to_guest_page_table(u64 error_code) +{ + const u64 mask = PFERR_GUEST_PAGE_MASK | PFERR_WRITE_MASK | PFERR_PRESENT_MASK; + + return (error_code & mask) == mask; +} + int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len) { @@ -6008,7 +6015,7 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err * and resume the guest. */ if (vcpu->arch.mmu->root_role.direct && - (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { + is_write_to_guest_page_table(error_code)) { kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); return 1; } From 989a84c93f592e6b288fb3b96d2eeec827d75bef Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:18 -0700 Subject: [PATCH 177/655] KVM: x86/mmu: Trigger unprotect logic only on write-protection page faults Trigger KVM's various "unprotect gfn" paths if and only if the page fault was a write to a write-protected gfn. To do so, add a new page fault return code, RET_PF_WRITE_PROTECTED, to explicitly and precisely track such page faults. If a page fault requires emulation for any MMIO (or any reason besides write-protection), trying to unprotect the gfn is pointless and risks putting the vCPU into an infinite loop. E.g. KVM will put the vCPU into an infinite loop if the vCPU manages to trigger MMIO on a page table walk. Fixes: 147277540bbc ("kvm: svm: Add support for additional SVM NPF error codes") Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 75 +++++++++++++++++++-------------- arch/x86/kvm/mmu/mmu_internal.h | 3 ++ arch/x86/kvm/mmu/mmutrace.h | 1 + arch/x86/kvm/mmu/paging_tmpl.h | 2 +- arch/x86/kvm/mmu/tdp_mmu.c | 6 +-- 5 files changed, 50 insertions(+), 37 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 1eedd1932e5f..d8deed29091b 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2896,10 +2896,8 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, trace_kvm_mmu_set_spte(level, gfn, sptep); } - if (wrprot) { - if (write_fault) - ret = RET_PF_EMULATE; - } + if (wrprot && write_fault) + ret = RET_PF_WRITE_PROTECTED; if (flush) kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level); @@ -4531,7 +4529,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault return RET_PF_RETRY; if (page_fault_handle_page_track(vcpu, fault)) - return RET_PF_EMULATE; + return RET_PF_WRITE_PROTECTED; r = fast_page_fault(vcpu, fault); if (r != RET_PF_INVALID) @@ -4624,7 +4622,7 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu, int r; if (page_fault_handle_page_track(vcpu, fault)) - return RET_PF_EMULATE; + return RET_PF_WRITE_PROTECTED; r = fast_page_fault(vcpu, fault); if (r != RET_PF_INVALID) @@ -4703,6 +4701,7 @@ static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, switch (r) { case RET_PF_FIXED: case RET_PF_SPURIOUS: + case RET_PF_WRITE_PROTECTED: return 0; case RET_PF_EMULATE: @@ -5952,6 +5951,40 @@ static bool is_write_to_guest_page_table(u64 error_code) return (error_code & mask) == mask; } +static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + u64 error_code, int *emulation_type) +{ + bool direct = vcpu->arch.mmu->root_role.direct; + + /* + * Before emulating the instruction, check if the error code + * was due to a RO violation while translating the guest page. + * This can occur when using nested virtualization with nested + * paging in both guests. If true, we simply unprotect the page + * and resume the guest. + */ + if (direct && is_write_to_guest_page_table(error_code)) { + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); + return RET_PF_RETRY; + } + + /* + * The gfn is write-protected, but if emulation fails we can still + * optimistically try to just unprotect the page and let the processor + * re-execute the instruction that caused the page fault. Do not allow + * retrying MMIO emulation, as it's not only pointless but could also + * cause us to enter an infinite loop because the processor will keep + * faulting on the non-existent MMIO address. Retrying an instruction + * from a nested guest is also pointless and dangerous as we are only + * explicitly shadowing L1's page tables, i.e. unprotecting something + * for L1 isn't going to magically fix whatever issue cause L2 to fail. + */ + if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) + *emulation_type |= EMULTYPE_ALLOW_RETRY_PF; + + return RET_PF_EMULATE; +} + int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len) { @@ -5997,6 +6030,10 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err if (r < 0) return r; + if (r == RET_PF_WRITE_PROTECTED) + r = kvm_mmu_write_protect_fault(vcpu, cr2_or_gpa, error_code, + &emulation_type); + if (r == RET_PF_FIXED) vcpu->stat.pf_fixed++; else if (r == RET_PF_EMULATE) @@ -6007,32 +6044,6 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err if (r != RET_PF_EMULATE) return 1; - /* - * Before emulating the instruction, check if the error code - * was due to a RO violation while translating the guest page. - * This can occur when using nested virtualization with nested - * paging in both guests. If true, we simply unprotect the page - * and resume the guest. - */ - if (vcpu->arch.mmu->root_role.direct && - is_write_to_guest_page_table(error_code)) { - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); - return 1; - } - - /* - * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still - * optimistically try to just unprotect the page and let the processor - * re-execute the instruction that caused the page fault. Do not allow - * retrying MMIO emulation, as it's not only pointless but could also - * cause us to enter an infinite loop because the processor will keep - * faulting on the non-existent MMIO address. Retrying an instruction - * from a nested guest is also pointless and dangerous as we are only - * explicitly shadowing L1's page tables, i.e. unprotecting something - * for L1 isn't going to magically fix whatever issue cause L2 to fail. - */ - if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) - emulation_type |= EMULTYPE_ALLOW_RETRY_PF; emulate: return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, insn_len); diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 1721d97743e9..50d2624111f8 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -258,6 +258,8 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); * RET_PF_CONTINUE: So far, so good, keep handling the page fault. * RET_PF_RETRY: let CPU fault again on the address. * RET_PF_EMULATE: mmio page fault, emulate the instruction directly. + * RET_PF_WRITE_PROTECTED: the gfn is write-protected, either unprotected the + * gfn and retry, or emulate the instruction directly. * RET_PF_INVALID: the spte is invalid, let the real page fault path update it. * RET_PF_FIXED: The faulting entry has been fixed. * RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU. @@ -274,6 +276,7 @@ enum { RET_PF_CONTINUE = 0, RET_PF_RETRY, RET_PF_EMULATE, + RET_PF_WRITE_PROTECTED, RET_PF_INVALID, RET_PF_FIXED, RET_PF_SPURIOUS, diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h index 195d98bc8de8..f35a830ce469 100644 --- a/arch/x86/kvm/mmu/mmutrace.h +++ b/arch/x86/kvm/mmu/mmutrace.h @@ -57,6 +57,7 @@ TRACE_DEFINE_ENUM(RET_PF_CONTINUE); TRACE_DEFINE_ENUM(RET_PF_RETRY); TRACE_DEFINE_ENUM(RET_PF_EMULATE); +TRACE_DEFINE_ENUM(RET_PF_WRITE_PROTECTED); TRACE_DEFINE_ENUM(RET_PF_INVALID); TRACE_DEFINE_ENUM(RET_PF_FIXED); TRACE_DEFINE_ENUM(RET_PF_SPURIOUS); diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 405bd7ceee2a..ae7d39ff2d07 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -806,7 +806,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (page_fault_handle_page_track(vcpu, fault)) { shadow_page_table_clear_flood(vcpu, fault->addr); - return RET_PF_EMULATE; + return RET_PF_WRITE_PROTECTED; } r = mmu_topup_memory_caches(vcpu, true); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index c7dc49ee7388..8bf44ac9372f 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1046,10 +1046,8 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, * protected, emulation is needed. If the emulation was skipped, * the vCPU would have the same fault again. */ - if (wrprot) { - if (fault->write) - ret = RET_PF_EMULATE; - } + if (wrprot && fault->write) + ret = RET_PF_WRITE_PROTECTED; /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) { From 2fb2b7877b3a4cac4de070ef92437b38f13559b0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:19 -0700 Subject: [PATCH 178/655] KVM: x86/mmu: Skip emulation on page fault iff 1+ SPs were unprotected When doing "fast unprotection" of nested TDP page tables, skip emulation if and only if at least one gfn was unprotected, i.e. continue with emulation if simply resuming is likely to hit the same fault and risk putting the vCPU into an infinite loop. Note, it's entirely possible to get a false negative, e.g. if a different vCPU faults on the same gfn and unprotects the gfn first, but that's a relatively rare edge case, and emulating is still functionally ok, i.e. saving a few cycles by avoiding emulation isn't worth the risk of putting the vCPU into an infinite loop. Opportunistically rewrite the relevant comment to document in gory detail exactly what scenario the "fast unprotect" logic is handling. Fixes: 147277540bbc ("kvm: svm: Add support for additional SVM NPF error codes") Cc: Yuan Yao Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d8deed29091b..edca32a4e874 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5957,16 +5957,37 @@ static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool direct = vcpu->arch.mmu->root_role.direct; /* - * Before emulating the instruction, check if the error code - * was due to a RO violation while translating the guest page. - * This can occur when using nested virtualization with nested - * paging in both guests. If true, we simply unprotect the page - * and resume the guest. + * Before emulating the instruction, check to see if the access was due + * to a read-only violation while the CPU was walking non-nested NPT + * page tables, i.e. for a direct MMU, for _guest_ page tables in L1. + * If L1 is sharing (a subset of) its page tables with L2, e.g. by + * having nCR3 share lower level page tables with hCR3, then when KVM + * (L0) write-protects the nested NPTs, i.e. npt12 entries, KVM is also + * unknowingly write-protecting L1's guest page tables, which KVM isn't + * shadowing. + * + * Because the CPU (by default) walks NPT page tables using a write + * access (to ensure the CPU can do A/D updates), page walks in L1 can + * trigger write faults for the above case even when L1 isn't modifying + * PTEs. As a result, KVM will unnecessarily emulate (or at least, try + * to emulate) an excessive number of L1 instructions; because L1's MMU + * isn't shadowed by KVM, there is no need to write-protect L1's gPTEs + * and thus no need to emulate in order to guarantee forward progress. + * + * Try to unprotect the gfn, i.e. zap any shadow pages, so that L1 can + * proceed without triggering emulation. If one or more shadow pages + * was zapped, skip emulation and resume L1 to let it natively execute + * the instruction. If no shadow pages were zapped, then the write- + * fault is due to something else entirely, i.e. KVM needs to emulate, + * as resuming the guest will put it into an infinite loop. + * + * Note, this code also applies to Intel CPUs, even though it is *very* + * unlikely that an L1 will share its page tables (IA32/PAE/paging64 + * format) with L2's page tables (EPT format). */ - if (direct && is_write_to_guest_page_table(error_code)) { - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); + if (direct && is_write_to_guest_page_table(error_code) && + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa))) return RET_PF_RETRY; - } /* * The gfn is write-protected, but if emulation fails we can still From c1edcc41c3603c65f34000ae031a20971f4e56f9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:20 -0700 Subject: [PATCH 179/655] KVM: x86: Retry to-be-emulated insn in "slow" unprotect path iff sp is zapped Resume the guest and thus skip emulation of a non-PTE-writing instruction if and only if unprotecting the gfn actually zapped at least one shadow page. If the gfn is write-protected for some reason other than shadow paging, attempting to unprotect the gfn will effectively fail, and thus retrying the instruction is all but guaranteed to be pointless. This bug has existed for a long time, but was effectively fudged around by the retry RIP+address anti-loop detection. Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 70219e406987..5d4bcb999df2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8965,14 +8965,14 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa) return false; - vcpu->arch.last_retry_eip = ctxt->eip; - vcpu->arch.last_retry_addr = cr2_or_gpa; - if (!vcpu->arch.mmu->root_role.direct) gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + if (!kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa))) + return false; + vcpu->arch.last_retry_eip = ctxt->eip; + vcpu->arch.last_retry_addr = cr2_or_gpa; return true; } From 019f3f84a40c88b68ca4d455306b92c20733e784 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:21 -0700 Subject: [PATCH 180/655] KVM: x86: Get RIP from vCPU state when storing it to last_retry_eip Read RIP from vCPU state instead of pulling it from the emulation context when filling last_retry_eip, which is part of the anti-infinite-loop protection used when unprotecting and retrying instructions that hit a write-protected gfn. This will allow reusing the anti-infinite-loop protection in flows that never make it into the emulator. No functional change intended, as ctxt->eip is set to kvm_rip_read() in init_emulate_ctxt(), and EMULTYPE_PF emulation is mutually exclusive with EMULTYPE_NO_DECODE and EMULTYPE_SKIP, i.e. always goes through x86_decode_emulated_instruction() and hasn't advanced ctxt->eip (yet). Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5d4bcb999df2..760d455f77ac 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8971,7 +8971,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (!kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa))) return false; - vcpu->arch.last_retry_eip = ctxt->eip; + vcpu->arch.last_retry_eip = kvm_rip_read(vcpu); vcpu->arch.last_retry_addr = cr2_or_gpa; return true; } From 9c19129e535bfff85bdfcb5a804e19e5aae935b2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:22 -0700 Subject: [PATCH 181/655] KVM: x86: Store gpa as gpa_t, not unsigned long, when unprotecting for retry Store the gpa used to unprotect the faulting gfn for retry as a gpa_t, not an unsigned long. This fixes a bug where 32-bit KVM would unprotect and retry the wrong gfn if the gpa had bits 63:32!=0. In practice, this bug is functionally benign, as unprotecting the wrong gfn is purely a performance issue (thanks to the anti-infinite-loop logic). And of course, almost no one runs 32-bit KVM these days. Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-8-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 760d455f77ac..d26e107225f7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8932,7 +8932,8 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, gpa_t cr2_or_gpa, int emulation_type) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa; + unsigned long last_retry_eip, last_retry_addr; + gpa_t gpa = cr2_or_gpa; last_retry_eip = vcpu->arch.last_retry_eip; last_retry_addr = vcpu->arch.last_retry_addr; From 01dd4d319207c4cfd51a1c9a1812909e944d8c86 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:23 -0700 Subject: [PATCH 182/655] KVM: x86/mmu: Apply retry protection to "fast nTDP unprotect" path Move the anti-infinite-loop protection provided by last_retry_{eip,addr} into kvm_mmu_write_protect_fault() so that it guards unprotect+retry that never hits the emulator, as well as reexecute_instruction(), which is the last ditch "might as well try it" logic that kicks in when emulation fails on an instruction that faulted on a write-protected gfn. Add a new helper, kvm_mmu_unprotect_gfn_and_retry(), to set the retry fields and deduplicate other code (with more to come). Link: https://lore.kernel.org/r/20240831001538.336683-9-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu/mmu.c | 39 ++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 27 +---------------------- 3 files changed, 40 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 797433994d56..fd115feb49b3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2133,6 +2133,7 @@ int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu); void kvm_update_dr7(struct kvm_vcpu *vcpu); int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); +bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa); void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, ulong roots_to_free); void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index edca32a4e874..e992da21f4e5 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2713,6 +2713,22 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) return r; } +bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa) +{ + gpa_t gpa = cr2_or_gpa; + bool r; + + if (!vcpu->arch.mmu->root_role.direct) + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); + + r = kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + if (r) { + vcpu->arch.last_retry_eip = kvm_rip_read(vcpu); + vcpu->arch.last_retry_addr = cr2_or_gpa; + } + return r; +} + static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) { gpa_t gpa; @@ -5956,6 +5972,27 @@ static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, { bool direct = vcpu->arch.mmu->root_role.direct; + /* + * Do not try to unprotect and retry if the vCPU re-faulted on the same + * RIP with the same address that was previously unprotected, as doing + * so will likely put the vCPU into an infinite. E.g. if the vCPU uses + * a non-page-table modifying instruction on the PDE that points to the + * instruction, then unprotecting the gfn will unmap the instruction's + * code, i.e. make it impossible for the instruction to ever complete. + */ + if (vcpu->arch.last_retry_eip == kvm_rip_read(vcpu) && + vcpu->arch.last_retry_addr == cr2_or_gpa) + return RET_PF_EMULATE; + + /* + * Reset the unprotect+retry values that guard against infinite loops. + * The values will be refreshed if KVM explicitly unprotects a gfn and + * retries, in all other cases it's safe to retry in the future even if + * the next page fault happens on the same RIP+address. + */ + vcpu->arch.last_retry_eip = 0; + vcpu->arch.last_retry_addr = 0; + /* * Before emulating the instruction, check to see if the access was due * to a read-only violation while the CPU was walking non-nested NPT @@ -5986,7 +6023,7 @@ static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, * format) with L2's page tables (EPT format). */ if (direct && is_write_to_guest_page_table(error_code) && - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa))) + kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa)) return RET_PF_RETRY; /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d26e107225f7..ce075e07126b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8932,27 +8932,13 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, gpa_t cr2_or_gpa, int emulation_type) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - unsigned long last_retry_eip, last_retry_addr; - gpa_t gpa = cr2_or_gpa; - - last_retry_eip = vcpu->arch.last_retry_eip; - last_retry_addr = vcpu->arch.last_retry_addr; /* * If the emulation is caused by #PF and it is non-page_table * writing instruction, it means the VM-EXIT is caused by shadow * page protected, we can zap the shadow page and retry this * instruction directly. - * - * Note: if the guest uses a non-page-table modifying instruction - * on the PDE that points to the instruction, then we will unmap - * the instruction and go to an infinite loop. So, we cache the - * last retried eip and the last fault address, if we meet the eip - * and the address again, we can break out of the potential infinite - * loop. */ - vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0; - if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) return false; @@ -8963,18 +8949,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (x86_page_table_writing_insn(ctxt)) return false; - if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa) - return false; - - if (!vcpu->arch.mmu->root_role.direct) - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); - - if (!kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa))) - return false; - - vcpu->arch.last_retry_eip = kvm_rip_read(vcpu); - vcpu->arch.last_retry_addr = cr2_or_gpa; - return true; + return kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa); } static int complete_emulated_mmio(struct kvm_vcpu *vcpu); From dfaae8447c53819749cf3ba10ce24d3c609752e3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:24 -0700 Subject: [PATCH 183/655] KVM: x86/mmu: Try "unprotect for retry" iff there are indirect SPs Try to unprotect shadow pages if and only if indirect_shadow_pages is non- zero, i.e. iff there is at least one protected such shadow page. Pre- checking indirect_shadow_pages avoids taking mmu_lock for write when the gfn is write-protected by a third party, i.e. not for KVM shadow paging, and in the *extremely* unlikely case that a different task has already unprotected the last shadow page. Link: https://lore.kernel.org/r/20240831001538.336683-10-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e992da21f4e5..e24de1f3d1db 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2718,6 +2718,17 @@ bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa) gpa_t gpa = cr2_or_gpa; bool r; + /* + * Bail early if there aren't any write-protected shadow pages to avoid + * unnecessarily taking mmu_lock lock, e.g. if the gfn is write-tracked + * by a third party. Reading indirect_shadow_pages without holding + * mmu_lock is safe, as this is purely an optimization, i.e. a false + * positive is benign, and a false negative will simply result in KVM + * skipping the unprotect+retry path, which is also an optimization. + */ + if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) + return false; + if (!vcpu->arch.mmu->root_role.direct) gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); From 41e6e367d576ce1801dc5c2b106e14cde35e3c80 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:25 -0700 Subject: [PATCH 184/655] KVM: x86: Move EMULTYPE_ALLOW_RETRY_PF to x86_emulate_instruction() Move the sanity checks for EMULTYPE_ALLOW_RETRY_PF to the top of x86_emulate_instruction(). In addition to deduplicating a small amount of code, this makes the connection between EMULTYPE_ALLOW_RETRY_PF and EMULTYPE_PF even more explicit, and will allow dropping retry_instruction() entirely. Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-11-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ce075e07126b..51d22d9de8d7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8870,10 +8870,6 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) return false; - if (WARN_ON_ONCE(is_guest_mode(vcpu)) || - WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF))) - return false; - if (!vcpu->arch.mmu->root_role.direct) { /* * Write permission should be allowed since only @@ -8942,10 +8938,6 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) return false; - if (WARN_ON_ONCE(is_guest_mode(vcpu)) || - WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF))) - return false; - if (x86_page_table_writing_insn(ctxt)) return false; @@ -9148,6 +9140,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; bool writeback = true; + if ((emulation_type & EMULTYPE_ALLOW_RETRY_PF) && + (WARN_ON_ONCE(is_guest_mode(vcpu)) || + WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))) + emulation_type &= ~EMULTYPE_ALLOW_RETRY_PF; + r = kvm_check_emulate_insn(vcpu, emulation_type, insn, insn_len); if (r != X86EMUL_CONTINUE) { if (r == X86EMUL_RETRY_INSTR || r == X86EMUL_PROPAGATE_FAULT) From 2df354e37c1398a85bb43cbbf1f913eb3f91d035 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:26 -0700 Subject: [PATCH 185/655] KVM: x86: Fold retry_instruction() into x86_emulate_instruction() Now that retry_instruction() is reasonably tiny, fold it into its sole caller, x86_emulate_instruction(). In addition to getting rid of the absurdly confusing retry_instruction() name, handling the retry in x86_emulate_instruction() pairs it back up with the code that resets last_retry_{eip,address}. No functional change intended. Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-12-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 51d22d9de8d7..a7961f8a6429 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8924,26 +8924,6 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP); } -static bool retry_instruction(struct x86_emulate_ctxt *ctxt, - gpa_t cr2_or_gpa, int emulation_type) -{ - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - - /* - * If the emulation is caused by #PF and it is non-page_table - * writing instruction, it means the VM-EXIT is caused by shadow - * page protected, we can zap the shadow page and retry this - * instruction directly. - */ - if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) - return false; - - if (x86_page_table_writing_insn(ctxt)) - return false; - - return kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa); -} - static int complete_emulated_mmio(struct kvm_vcpu *vcpu); static int complete_emulated_pio(struct kvm_vcpu *vcpu); @@ -9223,7 +9203,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, return 1; } - if (retry_instruction(ctxt, cr2_or_gpa, emulation_type)) + /* + * If emulation was caused by a write-protection #PF on a non-page_table + * writing instruction, try to unprotect the gfn, i.e. zap shadow pages, + * and retry the instruction, as the vCPU is likely no longer using the + * gfn as a page table. + */ + if ((emulation_type & EMULTYPE_ALLOW_RETRY_PF) && + !x86_page_table_writing_insn(ctxt) && + kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa)) return 1; /* this is needed for vmware backdoor interface to work since it From b7e948898e772ac900950c0dac4ca90e905cd0c0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:27 -0700 Subject: [PATCH 186/655] KVM: x86/mmu: Don't try to unprotect an INVALID_GPA If getting the gpa for a gva fails, e.g. because the gva isn't mapped in the guest page tables, don't try to unprotect the invalid gfn. This is mostly a performance fix (avoids unnecessarily taking mmu_lock), as for_each_gfn_valid_sp_with_gptes() won't explode on garbage input, it's simply pointless. Link: https://lore.kernel.org/r/20240831001538.336683-13-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e24de1f3d1db..bafec04b07ea 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2729,8 +2729,11 @@ bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa) if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) return false; - if (!vcpu->arch.mmu->root_role.direct) + if (!vcpu->arch.mmu->root_role.direct) { gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); + if (gpa == INVALID_GPA) + return false; + } r = kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); if (r) { @@ -2749,6 +2752,8 @@ static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) return 0; gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); + if (gpa == INVALID_GPA) + return 0; r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); From 29e495bdf847ac6ad0e0d03e5db39a3ed9f12858 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:28 -0700 Subject: [PATCH 187/655] KVM: x86/mmu: Always walk guest PTEs with WRITE access when unprotecting When getting a gpa from a gva to unprotect the associated gfn when an event is awating reinjection, walk the guest PTEs for WRITE as there's no point in unprotecting the gfn if the guest is unable to write the page, i.e. if write-protection can't trigger emulation. Note, the entire flow should be guarded on the access being a write, and even better should be conditioned on actually triggering a write-protect fault. This will be addressed in a future commit. Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-14-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index bafec04b07ea..937fa9a82a43 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2751,7 +2751,7 @@ static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) if (vcpu->arch.mmu->root_role.direct) return 0; - gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); + gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, NULL); if (gpa == INVALID_GPA) return 0; From b299c273c06f005976cdc1b9e9299d492527607e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:29 -0700 Subject: [PATCH 188/655] KVM: x86/mmu: Move event re-injection unprotect+retry into common path Move the event re-injection unprotect+retry logic into kvm_mmu_write_protect_fault(), i.e. unprotect and retry if and only if the #PF actually hit a write-protected gfn. Note, there is a small possibility that the gfn was unprotected by a different tasking between hitting the #PF and acquiring mmu_lock, but in that case, KVM will resume the guest immediately anyways because KVM will treat the fault as spurious. As a bonus, unprotecting _after_ handling the page fault also addresses the case where the installing a SPTE to handle fault encounters a shadowed PTE, i.e. *creates* a read-only SPTE. Opportunstically add a comment explaining what on earth the intent of the code is, as based on the changelog from commit 577bdc496614 ("KVM: Avoid instruction emulation when event delivery is pending"). Link: https://lore.kernel.org/r/20240831001538.336683-15-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 937fa9a82a43..195ba7430720 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2743,23 +2743,6 @@ bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa) return r; } -static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) -{ - gpa_t gpa; - int r; - - if (vcpu->arch.mmu->root_role.direct) - return 0; - - gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, NULL); - if (gpa == INVALID_GPA) - return 0; - - r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); - - return r; -} - static void kvm_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { trace_kvm_mmu_unsync_page(sp); @@ -4630,8 +4613,6 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, if (!flags) { trace_kvm_page_fault(vcpu, fault_address, error_code); - if (kvm_event_needs_reinjection(vcpu)) - kvm_mmu_unprotect_page_virt(vcpu, fault_address); r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn, insn_len); } else if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) { @@ -6037,8 +6018,15 @@ static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, * Note, this code also applies to Intel CPUs, even though it is *very* * unlikely that an L1 will share its page tables (IA32/PAE/paging64 * format) with L2's page tables (EPT format). + * + * For indirect MMUs, i.e. if KVM is shadowing the current MMU, try to + * unprotect the gfn and retry if an event is awaiting reinjection. If + * KVM emulates multiple instructions before completing event injection, + * the event could be delayed beyond what is architecturally allowed, + * e.g. KVM could inject an IRQ after the TPR has been raised. */ - if (direct && is_write_to_guest_page_table(error_code) && + if (((direct && is_write_to_guest_page_table(error_code)) || + (!direct && kvm_event_needs_reinjection(vcpu))) && kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa)) return RET_PF_RETRY; From 620525739521376a65a690df899e1596d56791f8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:30 -0700 Subject: [PATCH 189/655] KVM: x86: Remove manual pfn lookup when retrying #PF after failed emulation Drop the manual pfn look when retrying an instruction that KVM failed to emulation in response to a #PF due to a write-protected gfn. Now that KVM sets EMULTYPE_ALLOW_RETRY_PF if and only if the page fault hit a write- protected gfn, i.e. if and only if there's a writable memslot, there's no need to redo the lookup to avoid retrying an instruction that failed on emulated MMIO (no slot, or a write to a read-only slot). I.e. KVM will never attempt to retry an instruction that failed on emulated MMIO, whereas that was not the case prior to the introduction of RET_PF_WRITE_PROTECTED. Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-16-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a7961f8a6429..1e9c5ef4a9f5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8865,7 +8865,6 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type) { gpa_t gpa = cr2_or_gpa; - kvm_pfn_t pfn; if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) return false; @@ -8885,23 +8884,6 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, return true; } - /* - * Do not retry the unhandleable instruction if it faults on the - * readonly host memory, otherwise it will goto a infinite loop: - * retry instruction -> write #PF -> emulation fail -> retry - * instruction -> ... - */ - pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); - - /* - * If the instruction failed on the error pfn, it can not be fixed, - * report the error to userspace. - */ - if (is_error_noslot_pfn(pfn)) - return false; - - kvm_release_pfn_clean(pfn); - /* * If emulation may have been triggered by a write to a shadowed page * table, unprotect the gfn (zap any relevant SPTEs) and re-enter the From 19ab2c8be070160be70a88027b3b93106fef7b89 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:31 -0700 Subject: [PATCH 190/655] KVM: x86: Check EMULTYPE_WRITE_PF_TO_SP before unprotecting gfn Don't bother unprotecting the target gfn if EMULTYPE_WRITE_PF_TO_SP is set, as KVM will simply report the emulation failure to userspace. This will allow converting reexecute_instruction() to use kvm_mmu_unprotect_gfn_instead_retry() instead of kvm_mmu_unprotect_page(). Link: https://lore.kernel.org/r/20240831001538.336683-17-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1e9c5ef4a9f5..7170eee23597 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8869,6 +8869,19 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) return false; + /* + * If the failed instruction faulted on an access to page tables that + * are used to translate any part of the instruction, KVM can't resolve + * the issue by unprotecting the gfn, as zapping the shadow page will + * result in the instruction taking a !PRESENT page fault and thus put + * the vCPU into an infinite loop of page faults. E.g. KVM will create + * a SPTE and write-protect the gfn to resolve the !PRESENT fault, and + * then zap the SPTE to unprotect the gfn, and then do it all over + * again. Report the error to userspace. + */ + if (emulation_type & EMULTYPE_WRITE_PF_TO_SP) + return false; + if (!vcpu->arch.mmu->root_role.direct) { /* * Write permission should be allowed since only @@ -8894,16 +8907,13 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); /* - * If the failed instruction faulted on an access to page tables that - * are used to translate any part of the instruction, KVM can't resolve - * the issue by unprotecting the gfn, as zapping the shadow page will - * result in the instruction taking a !PRESENT page fault and thus put - * the vCPU into an infinite loop of page faults. E.g. KVM will create - * a SPTE and write-protect the gfn to resolve the !PRESENT fault, and - * then zap the SPTE to unprotect the gfn, and then do it all over - * again. Report the error to userspace. + * Retry even if _this_ vCPU didn't unprotect the gfn, as it's possible + * all SPTEs were already zapped by a different task. The alternative + * is to report the error to userspace and likely terminate the guest, + * and the last_retry_{eip,addr} checks will prevent retrying the page + * fault indefinitely, i.e. there's nothing to lose by retrying. */ - return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP); + return true; } static int complete_emulated_mmio(struct kvm_vcpu *vcpu); From dabc4ff70c35756bc107bc5d035d0f0746396a9a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:32 -0700 Subject: [PATCH 191/655] KVM: x86: Apply retry protection to "unprotect on failure" path Use kvm_mmu_unprotect_gfn_and_retry() in reexecute_instruction() to pick up protection against infinite loops, e.g. if KVM somehow manages to encounter an unsupported instruction and unprotecting the gfn doesn't allow the vCPU to make forward progress. Other than that, the retry-on- failure logic is a functionally equivalent, open coded version of kvm_mmu_unprotect_gfn_and_retry(). Note, the emulation failure path still isn't fully protected, as KVM won't update the retry protection fields if no shadow pages are zapped (but this change is still a step forward). That flaw will be addressed in a future patch. Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-18-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7170eee23597..ad942892fa2c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8864,8 +8864,6 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type) { - gpa_t gpa = cr2_or_gpa; - if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) return false; @@ -8882,29 +8880,13 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, if (emulation_type & EMULTYPE_WRITE_PF_TO_SP) return false; - if (!vcpu->arch.mmu->root_role.direct) { - /* - * Write permission should be allowed since only - * write access need to be emulated. - */ - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); - - /* - * If the mapping is invalid in guest, let cpu retry - * it to generate fault. - */ - if (gpa == INVALID_GPA) - return true; - } - /* * If emulation may have been triggered by a write to a shadowed page * table, unprotect the gfn (zap any relevant SPTEs) and re-enter the * guest to let the CPU re-execute the instruction in the hope that the * CPU can cleanly execute the instruction that KVM failed to emulate. */ - if (vcpu->kvm->arch.indirect_shadow_pages) - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa); /* * Retry even if _this_ vCPU didn't unprotect the gfn, as it's possible From 4df685664bed04794ad72b58d8af1fa4fcc60261 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:33 -0700 Subject: [PATCH 192/655] KVM: x86: Update retry protection fields when forcing retry on emulation failure When retrying the faulting instruction after emulation failure, refresh the infinite loop protection fields even if no shadow pages were zapped, i.e. avoid hitting an infinite loop even when retrying the instruction as a last-ditch effort to avoid terminating the guest. Link: https://lore.kernel.org/r/20240831001538.336683-19-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 10 +++++++++- arch/x86/kvm/mmu/mmu.c | 12 +++++++----- arch/x86/kvm/x86.c | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fd115feb49b3..cdee59f3d15b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2133,7 +2133,15 @@ int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu); void kvm_update_dr7(struct kvm_vcpu *vcpu); int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); -bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa); +bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + bool always_retry); + +static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, + gpa_t cr2_or_gpa) +{ + return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false); +} + void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, ulong roots_to_free); void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 195ba7430720..4b4edaf7dc06 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2713,10 +2713,11 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) return r; } -bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa) +bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + bool always_retry) { gpa_t gpa = cr2_or_gpa; - bool r; + bool r = false; /* * Bail early if there aren't any write-protected shadow pages to avoid @@ -2727,16 +2728,17 @@ bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa) * skipping the unprotect+retry path, which is also an optimization. */ if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) - return false; + goto out; if (!vcpu->arch.mmu->root_role.direct) { gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); if (gpa == INVALID_GPA) - return false; + goto out; } r = kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); - if (r) { +out: + if (r || always_retry) { vcpu->arch.last_retry_eip = kvm_rip_read(vcpu); vcpu->arch.last_retry_addr = cr2_or_gpa; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ad942892fa2c..843ddb982b35 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8886,7 +8886,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, * guest to let the CPU re-execute the instruction in the hope that the * CPU can cleanly execute the instruction that KVM failed to emulate. */ - kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa); + __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, true); /* * Retry even if _this_ vCPU didn't unprotect the gfn, as it's possible From 2876624e1adcd9a3a3ffa8c4fe3bf8dbba969d95 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:34 -0700 Subject: [PATCH 193/655] KVM: x86: Rename reexecute_instruction()=>kvm_unprotect_and_retry_on_failure() Rename reexecute_instruction() to kvm_unprotect_and_retry_on_failure() to make the intent and purpose of the helper much more obvious. No functional change intended. Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-20-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 843ddb982b35..cd9725df3680 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8861,8 +8861,9 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) return 1; } -static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, - int emulation_type) +static bool kvm_unprotect_and_retry_on_failure(struct kvm_vcpu *vcpu, + gpa_t cr2_or_gpa, + int emulation_type) { if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) return false; @@ -9129,8 +9130,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, kvm_queue_exception(vcpu, UD_VECTOR); return 1; } - if (reexecute_instruction(vcpu, cr2_or_gpa, - emulation_type)) + if (kvm_unprotect_and_retry_on_failure(vcpu, cr2_or_gpa, + emulation_type)) return 1; if (ctxt->have_exception && @@ -9216,7 +9217,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, return 1; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2_or_gpa, emulation_type)) + if (kvm_unprotect_and_retry_on_failure(vcpu, cr2_or_gpa, + emulation_type)) return 1; return handle_emulation_failure(vcpu, emulation_type); From 6b3dcabc10911711eba15816d808e2a18f130406 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:35 -0700 Subject: [PATCH 194/655] KVM: x86/mmu: Subsume kvm_mmu_unprotect_page() into the and_retry() version Fold kvm_mmu_unprotect_page() into kvm_mmu_unprotect_gfn_and_retry() now that all other direct usage is gone. No functional change intended. Link: https://lore.kernel.org/r/20240831001538.336683-21-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/mmu/mmu.c | 33 +++++++++++++-------------------- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cdee59f3d15b..8f4164f58b6c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2132,7 +2132,6 @@ int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu); void kvm_update_dr7(struct kvm_vcpu *vcpu); -int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool always_retry); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4b4edaf7dc06..29305403f956 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2695,27 +2695,12 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages) write_unlock(&kvm->mmu_lock); } -int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) -{ - struct kvm_mmu_page *sp; - LIST_HEAD(invalid_list); - int r; - - r = 0; - write_lock(&kvm->mmu_lock); - for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) { - r = 1; - kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); - } - kvm_mmu_commit_zap_page(kvm, &invalid_list); - write_unlock(&kvm->mmu_lock); - - return r; -} - bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool always_retry) { + struct kvm *kvm = vcpu->kvm; + LIST_HEAD(invalid_list); + struct kvm_mmu_page *sp; gpa_t gpa = cr2_or_gpa; bool r = false; @@ -2727,7 +2712,7 @@ bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, * positive is benign, and a false negative will simply result in KVM * skipping the unprotect+retry path, which is also an optimization. */ - if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) + if (!READ_ONCE(kvm->arch.indirect_shadow_pages)) goto out; if (!vcpu->arch.mmu->root_role.direct) { @@ -2736,7 +2721,15 @@ bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, goto out; } - r = kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + r = false; + write_lock(&kvm->mmu_lock); + for_each_gfn_valid_sp_with_gptes(kvm, sp, gpa_to_gfn(gpa)) { + r = true; + kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); + } + kvm_mmu_commit_zap_page(kvm, &invalid_list); + write_unlock(&kvm->mmu_lock); + out: if (r || always_retry) { vcpu->arch.last_retry_eip = kvm_rip_read(vcpu); From d859b16161c81ee929b7b02a85227b8e3250bc97 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:36 -0700 Subject: [PATCH 195/655] KVM: x86/mmu: Detect if unprotect will do anything based on invalid_list Explicitly query the list of to-be-zapped shadow pages when checking to see if unprotecting a gfn for retry has succeeded, i.e. if KVM should retry the faulting instruction. Add a comment to explain why the list needs to be checked before zapping, which is the primary motivation for this change. No functional change intended. Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-22-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 29305403f956..ebbdc979a069 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2721,12 +2721,15 @@ bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, goto out; } - r = false; write_lock(&kvm->mmu_lock); - for_each_gfn_valid_sp_with_gptes(kvm, sp, gpa_to_gfn(gpa)) { - r = true; + for_each_gfn_valid_sp_with_gptes(kvm, sp, gpa_to_gfn(gpa)) kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); - } + + /* + * Snapshot the result before zapping, as zapping will remove all list + * entries, i.e. checking the list later would yield a false negative. + */ + r = !list_empty(&invalid_list); kvm_mmu_commit_zap_page(kvm, &invalid_list); write_unlock(&kvm->mmu_lock); From 98a69b96caca3e07aff57ca91fd7cc3a3853871a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:37 -0700 Subject: [PATCH 196/655] KVM: x86/mmu: WARN on MMIO cache hit when emulating write-protected gfn WARN if KVM gets an MMIO cache hit on a RET_PF_WRITE_PROTECTED fault, as KVM should return RET_PF_WRITE_PROTECTED if and only if there is a memslot, and creating a memslot is supposed to invalidate the MMIO cache by virtue of changing the memslot generation. Keep the code around mainly to provide a convenient location to document why emulated MMIO should be impossible. Suggested-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-23-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index ebbdc979a069..330b87a1c80a 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5988,6 +5988,18 @@ static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, vcpu->arch.last_retry_eip = 0; vcpu->arch.last_retry_addr = 0; + /* + * It should be impossible to reach this point with an MMIO cache hit, + * as RET_PF_WRITE_PROTECTED is returned if and only if there's a valid, + * writable memslot, and creating a memslot should invalidate the MMIO + * cache by way of changing the memslot generation. WARN and disallow + * retry if MMIO is detected, as retrying MMIO emulation is pointless + * and could put the vCPU into an infinite loop because the processor + * will keep faulting on the non-existent MMIO address. + */ + if (WARN_ON_ONCE(mmio_info_in_cache(vcpu, cr2_or_gpa, direct))) + return RET_PF_EMULATE; + /* * Before emulating the instruction, check to see if the access was due * to a read-only violation while the CPU was walking non-nested NPT @@ -6029,17 +6041,15 @@ static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, return RET_PF_RETRY; /* - * The gfn is write-protected, but if emulation fails we can still - * optimistically try to just unprotect the page and let the processor + * The gfn is write-protected, but if KVM detects its emulating an + * instruction that is unlikely to be used to modify page tables, or if + * emulation fails, KVM can try to unprotect the gfn and let the CPU * re-execute the instruction that caused the page fault. Do not allow - * retrying MMIO emulation, as it's not only pointless but could also - * cause us to enter an infinite loop because the processor will keep - * faulting on the non-existent MMIO address. Retrying an instruction - * from a nested guest is also pointless and dangerous as we are only - * explicitly shadowing L1's page tables, i.e. unprotecting something - * for L1 isn't going to magically fix whatever issue cause L2 to fail. + * retrying an instruction from a nested guest as KVM is only explicitly + * shadowing L1's page tables, i.e. unprotecting something for L1 isn't + * going to magically fix whatever issue caused L2 to fail. */ - if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) + if (!is_guest_mode(vcpu)) *emulation_type |= EMULTYPE_ALLOW_RETRY_PF; return RET_PF_EMULATE; From 0a37fffda14538036f5402a29920285284fe5d5b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 9 Aug 2024 12:43:22 -0700 Subject: [PATCH 197/655] KVM: x86/mmu: Move walk_slot_rmaps() up near for_each_slot_rmap_range() Move walk_slot_rmaps() and friends up near for_each_slot_rmap_range() so that the walkers can be used to handle mmu_notifier invalidations, and so that similar function has some amount of locality in code. No functional change intended. Link: https://lore.kernel.org/r/20240809194335.1726916-11-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 106 ++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 330b87a1c80a..17edf1499be7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1516,6 +1516,59 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator) slot_rmap_walk_okay(_iter_); \ slot_rmap_walk_next(_iter_)) +/* The return value indicates if tlb flush on all vcpus is needed. */ +typedef bool (*slot_rmaps_handler) (struct kvm *kvm, + struct kvm_rmap_head *rmap_head, + const struct kvm_memory_slot *slot); + +static __always_inline bool __walk_slot_rmaps(struct kvm *kvm, + const struct kvm_memory_slot *slot, + slot_rmaps_handler fn, + int start_level, int end_level, + gfn_t start_gfn, gfn_t end_gfn, + bool flush_on_yield, bool flush) +{ + struct slot_rmap_walk_iterator iterator; + + lockdep_assert_held_write(&kvm->mmu_lock); + + for_each_slot_rmap_range(slot, start_level, end_level, start_gfn, + end_gfn, &iterator) { + if (iterator.rmap) + flush |= fn(kvm, iterator.rmap, slot); + + if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { + if (flush && flush_on_yield) { + kvm_flush_remote_tlbs_range(kvm, start_gfn, + iterator.gfn - start_gfn + 1); + flush = false; + } + cond_resched_rwlock_write(&kvm->mmu_lock); + } + } + + return flush; +} + +static __always_inline bool walk_slot_rmaps(struct kvm *kvm, + const struct kvm_memory_slot *slot, + slot_rmaps_handler fn, + int start_level, int end_level, + bool flush_on_yield) +{ + return __walk_slot_rmaps(kvm, slot, fn, start_level, end_level, + slot->base_gfn, slot->base_gfn + slot->npages - 1, + flush_on_yield, false); +} + +static __always_inline bool walk_slot_rmaps_4k(struct kvm *kvm, + const struct kvm_memory_slot *slot, + slot_rmaps_handler fn, + bool flush_on_yield) +{ + return walk_slot_rmaps(kvm, slot, fn, PG_LEVEL_4K, PG_LEVEL_4K, flush_on_yield); +} + typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level); @@ -6272,59 +6325,6 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, } EXPORT_SYMBOL_GPL(kvm_configure_mmu); -/* The return value indicates if tlb flush on all vcpus is needed. */ -typedef bool (*slot_rmaps_handler) (struct kvm *kvm, - struct kvm_rmap_head *rmap_head, - const struct kvm_memory_slot *slot); - -static __always_inline bool __walk_slot_rmaps(struct kvm *kvm, - const struct kvm_memory_slot *slot, - slot_rmaps_handler fn, - int start_level, int end_level, - gfn_t start_gfn, gfn_t end_gfn, - bool flush_on_yield, bool flush) -{ - struct slot_rmap_walk_iterator iterator; - - lockdep_assert_held_write(&kvm->mmu_lock); - - for_each_slot_rmap_range(slot, start_level, end_level, start_gfn, - end_gfn, &iterator) { - if (iterator.rmap) - flush |= fn(kvm, iterator.rmap, slot); - - if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { - if (flush && flush_on_yield) { - kvm_flush_remote_tlbs_range(kvm, start_gfn, - iterator.gfn - start_gfn + 1); - flush = false; - } - cond_resched_rwlock_write(&kvm->mmu_lock); - } - } - - return flush; -} - -static __always_inline bool walk_slot_rmaps(struct kvm *kvm, - const struct kvm_memory_slot *slot, - slot_rmaps_handler fn, - int start_level, int end_level, - bool flush_on_yield) -{ - return __walk_slot_rmaps(kvm, slot, fn, start_level, end_level, - slot->base_gfn, slot->base_gfn + slot->npages - 1, - flush_on_yield, false); -} - -static __always_inline bool walk_slot_rmaps_4k(struct kvm *kvm, - const struct kvm_memory_slot *slot, - slot_rmaps_handler fn, - bool flush_on_yield) -{ - return walk_slot_rmaps(kvm, slot, fn, PG_LEVEL_4K, PG_LEVEL_4K, flush_on_yield); -} - static void free_mmu_pages(struct kvm_mmu *mmu) { if (!tdp_enabled && mmu->pae_root) From 5b1fb116e1a636701627a6eb202d17be93e8f7a8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 9 Aug 2024 12:43:23 -0700 Subject: [PATCH 198/655] KVM: x86/mmu: Plumb a @can_yield parameter into __walk_slot_rmaps() Add a @can_yield param to __walk_slot_rmaps() to control whether or not dropping mmu_lock and conditionally rescheduling is allowed. This will allow using __walk_slot_rmaps() and thus cond_resched() to handle mmu_notifier invalidations, which usually allow blocking/yielding, but not when invoked by the OOM killer. Link: https://lore.kernel.org/r/20240809194335.1726916-12-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 17edf1499be7..e3adc934559d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1526,7 +1526,8 @@ static __always_inline bool __walk_slot_rmaps(struct kvm *kvm, slot_rmaps_handler fn, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn, - bool flush_on_yield, bool flush) + bool can_yield, bool flush_on_yield, + bool flush) { struct slot_rmap_walk_iterator iterator; @@ -1537,6 +1538,9 @@ static __always_inline bool __walk_slot_rmaps(struct kvm *kvm, if (iterator.rmap) flush |= fn(kvm, iterator.rmap, slot); + if (!can_yield) + continue; + if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { if (flush && flush_on_yield) { kvm_flush_remote_tlbs_range(kvm, start_gfn, @@ -1558,7 +1562,7 @@ static __always_inline bool walk_slot_rmaps(struct kvm *kvm, { return __walk_slot_rmaps(kvm, slot, fn, start_level, end_level, slot->base_gfn, slot->base_gfn + slot->npages - 1, - flush_on_yield, false); + true, flush_on_yield, false); } static __always_inline bool walk_slot_rmaps_4k(struct kvm *kvm, @@ -6600,7 +6604,7 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e flush = __walk_slot_rmaps(kvm, memslot, __kvm_zap_rmap, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, - start, end - 1, true, flush); + start, end - 1, true, true, flush); } } @@ -6888,7 +6892,7 @@ static void kvm_shadow_mmu_try_split_huge_pages(struct kvm *kvm, */ for (level = KVM_MAX_HUGEPAGE_LEVEL; level > target_level; level--) __walk_slot_rmaps(kvm, slot, shadow_mmu_try_split_huge_pages, - level, level, start, end - 1, true, false); + level, level, start, end - 1, true, true, false); } /* Must be called with the mmu_lock held in write-mode. */ From dd9eaad744f4ed30913cca423439a1765a760c71 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 9 Aug 2024 12:43:24 -0700 Subject: [PATCH 199/655] KVM: x86/mmu: Add a helper to walk and zap rmaps for a memslot Add a dedicated helper to walk and zap rmaps for a given memslot so that the code can be shared between KVM-initiated zaps and mmu_notifier invalidations. No functional change intended. Link: https://lore.kernel.org/r/20240809194335.1726916-13-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e3adc934559d..70b043d7701d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1573,6 +1573,16 @@ static __always_inline bool walk_slot_rmaps_4k(struct kvm *kvm, return walk_slot_rmaps(kvm, slot, fn, PG_LEVEL_4K, PG_LEVEL_4K, flush_on_yield); } +static bool __kvm_rmap_zap_gfn_range(struct kvm *kvm, + const struct kvm_memory_slot *slot, + gfn_t start, gfn_t end, bool can_yield, + bool flush) +{ + return __walk_slot_rmaps(kvm, slot, __kvm_zap_rmap, + PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, + start, end - 1, can_yield, true, flush); +} + typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level); @@ -6602,9 +6612,8 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e if (WARN_ON_ONCE(start >= end)) continue; - flush = __walk_slot_rmaps(kvm, memslot, __kvm_zap_rmap, - PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, - start, end - 1, true, true, flush); + flush = __kvm_rmap_zap_gfn_range(kvm, memslot, start, + end, true, flush); } } From 548f87f667a38ffeb2f021d9cfbc1f1b34fb4cb5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 9 Aug 2024 12:43:25 -0700 Subject: [PATCH 200/655] KVM: x86/mmu: Honor NEED_RESCHED when zapping rmaps and blocking is allowed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert kvm_unmap_gfn_range(), which is the helper that zaps rmap SPTEs in response to an mmu_notifier invalidation, to use __kvm_rmap_zap_gfn_range() and feed in range->may_block. In other words, honor NEED_RESCHED by way of cond_resched() when zapping rmaps. This fixes a long-standing issue where KVM could process an absurd number of rmap entries without ever yielding, e.g. if an mmu_notifier fired on a PUD (or larger) range. Opportunistically rename __kvm_zap_rmap() to kvm_zap_rmap(), and drop the old kvm_zap_rmap(). Ideally, the shuffling would be done in a different patch, but that just makes the compiler unhappy, e.g. arch/x86/kvm/mmu/mmu.c:1462:13: error: ‘kvm_zap_rmap’ defined but not used Reported-by: Peter Xu Link: https://lore.kernel.org/r/20240809194335.1726916-14-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 70b043d7701d..27a8a4f486c5 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1435,18 +1435,12 @@ static bool kvm_vcpu_write_protect_gfn(struct kvm_vcpu *vcpu, u64 gfn) return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn, PG_LEVEL_4K); } -static bool __kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - const struct kvm_memory_slot *slot) +static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, + const struct kvm_memory_slot *slot) { return kvm_zap_all_rmap_sptes(kvm, rmap_head); } -static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level) -{ - return __kvm_zap_rmap(kvm, rmap_head, slot); -} - struct slot_rmap_walk_iterator { /* input fields. */ const struct kvm_memory_slot *slot; @@ -1578,7 +1572,7 @@ static bool __kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end, bool can_yield, bool flush) { - return __walk_slot_rmaps(kvm, slot, __kvm_zap_rmap, + return __walk_slot_rmaps(kvm, slot, kvm_zap_rmap, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, start, end - 1, can_yield, true, flush); } @@ -1607,7 +1601,9 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool flush = false; if (kvm_memslots_have_rmaps(kvm)) - flush = kvm_handle_gfn_range(kvm, range, kvm_zap_rmap); + flush = __kvm_rmap_zap_gfn_range(kvm, range->slot, + range->start, range->end, + range->may_block, flush); if (tdp_mmu_enabled) flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush); From c17f150000f6b06061dc109bc2dd2858898a62b2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 9 Aug 2024 12:43:26 -0700 Subject: [PATCH 201/655] KVM: x86/mmu: Morph kvm_handle_gfn_range() into an aging specific helper Rework kvm_handle_gfn_range() into an aging-specic helper, kvm_rmap_age_gfn_range(). In addition to purging a bunch of unnecessary boilerplate code, this sets the stage for aging rmap SPTEs outside of mmu_lock. Note, there's a small functional change, as kvm_test_age_gfn() will now return immediately if a young SPTE is found, whereas previously KVM would continue iterating over other levels. Link: https://lore.kernel.org/r/20240809194335.1726916-15-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 68 ++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 46 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 27a8a4f486c5..9b977560677b 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1577,25 +1577,6 @@ static bool __kvm_rmap_zap_gfn_range(struct kvm *kvm, start, end - 1, can_yield, true, flush); } -typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, - int level); - -static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm, - struct kvm_gfn_range *range, - rmap_handler_t handler) -{ - struct slot_rmap_walk_iterator iterator; - bool ret = false; - - for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, - range->start, range->end - 1, &iterator) - ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn, - iterator.level); - - return ret; -} - bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) { bool flush = false; @@ -1615,31 +1596,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return flush; } -static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level) -{ - u64 *sptep; - struct rmap_iterator iter; - int young = 0; - - for_each_rmap_spte(rmap_head, &iter, sptep) - young |= mmu_spte_age(sptep); - - return young; -} - -static bool kvm_test_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level) -{ - u64 *sptep; - struct rmap_iterator iter; - - for_each_rmap_spte(rmap_head, &iter, sptep) - if (is_accessed_spte(*sptep)) - return true; - return false; -} - #define RMAP_RECYCLE_THRESHOLD 1000 static void __rmap_add(struct kvm *kvm, @@ -1674,12 +1630,32 @@ static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot, __rmap_add(vcpu->kvm, cache, slot, spte, gfn, access); } +static bool kvm_rmap_age_gfn_range(struct kvm *kvm, + struct kvm_gfn_range *range, bool test_only) +{ + struct slot_rmap_walk_iterator iterator; + struct rmap_iterator iter; + bool young = false; + u64 *sptep; + + for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, + range->start, range->end - 1, &iterator) { + for_each_rmap_spte(iterator.rmap, &iter, sptep) { + if (test_only && is_accessed_spte(*sptep)) + return true; + + young = mmu_spte_age(sptep); + } + } + return young; +} + bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { bool young = false; if (kvm_memslots_have_rmaps(kvm)) - young = kvm_handle_gfn_range(kvm, range, kvm_age_rmap); + young = kvm_rmap_age_gfn_range(kvm, range, false); if (tdp_mmu_enabled) young |= kvm_tdp_mmu_age_gfn_range(kvm, range); @@ -1692,7 +1668,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) bool young = false; if (kvm_memslots_have_rmaps(kvm)) - young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmap); + young = kvm_rmap_age_gfn_range(kvm, range, true); if (tdp_mmu_enabled) young |= kvm_tdp_mmu_test_age_gfn(kvm, range); From 7aac9dc680da9390a22515c3f822c9d1907c4f02 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 9 Aug 2024 12:43:27 -0700 Subject: [PATCH 202/655] KVM: x86/mmu: Fold mmu_spte_age() into kvm_rmap_age_gfn_range() Fold mmu_spte_age() into its sole caller now that aging and testing for young SPTEs is handled in a common location, i.e. doesn't require more helpers. Opportunistically remove the use of mmu_spte_get_lockless(), as mmu_lock is held (for write!), and marking SPTEs for access tracking outside of mmu_lock is unsafe (at least, as written). I.e. using the lockless accessor is quite misleading. No functional change intended. Link: https://lore.kernel.org/r/20240809194335.1726916-16-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 50 +++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 9b977560677b..24ca233d78cd 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -614,32 +614,6 @@ static u64 mmu_spte_get_lockless(u64 *sptep) return __get_spte_lockless(sptep); } -/* Returns the Accessed status of the PTE and resets it at the same time. */ -static bool mmu_spte_age(u64 *sptep) -{ - u64 spte = mmu_spte_get_lockless(sptep); - - if (!is_accessed_spte(spte)) - return false; - - if (spte_ad_enabled(spte)) { - clear_bit((ffs(shadow_accessed_mask) - 1), - (unsigned long *)sptep); - } else { - /* - * Capture the dirty status of the page, so that it doesn't get - * lost when the SPTE is marked for access tracking. - */ - if (is_writable_pte(spte)) - kvm_set_pfn_dirty(spte_to_pfn(spte)); - - spte = mark_spte_for_access_track(spte); - mmu_spte_update_no_track(sptep, spte); - } - - return true; -} - static inline bool is_tdp_mmu_active(struct kvm_vcpu *vcpu) { return tdp_mmu_enabled && vcpu->arch.mmu->root_role.direct; @@ -1641,10 +1615,30 @@ static bool kvm_rmap_age_gfn_range(struct kvm *kvm, for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, range->start, range->end - 1, &iterator) { for_each_rmap_spte(iterator.rmap, &iter, sptep) { - if (test_only && is_accessed_spte(*sptep)) + u64 spte = *sptep; + + if (!is_accessed_spte(spte)) + continue; + + if (test_only) return true; - young = mmu_spte_age(sptep); + if (spte_ad_enabled(spte)) { + clear_bit((ffs(shadow_accessed_mask) - 1), + (unsigned long *)sptep); + } else { + /* + * Capture the dirty status of the page, so that + * it doesn't get lost when the SPTE is marked + * for access tracking. + */ + if (is_writable_pte(spte)) + kvm_set_pfn_dirty(spte_to_pfn(spte)); + + spte = mark_spte_for_access_track(spte); + mmu_spte_update_no_track(sptep, spte); + } + young = true; } } return young; From 7645829145a91a3e13fdc322492500dae46ca17c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 9 Aug 2024 12:43:28 -0700 Subject: [PATCH 203/655] KVM: x86/mmu: Add KVM_RMAP_MANY to replace open coded '1' and '1ul' literals Replace all of the open coded '1' literals used to mark a PTE list as having many/multiple entries with a proper define. It's hard enough to read the code with one magic bit, and a future patch to support "locking" a single rmap will add another. No functional change intended. Link: https://lore.kernel.org/r/20240809194335.1726916-17-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 24ca233d78cd..53e24892055a 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -912,6 +912,7 @@ static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct * pte_list_desc containing more mappings. */ +#define KVM_RMAP_MANY BIT(0) /* * Returns the number of pointers in the rmap chain, not counting the new one. @@ -924,16 +925,16 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte, if (!rmap_head->val) { rmap_head->val = (unsigned long)spte; - } else if (!(rmap_head->val & 1)) { + } else if (!(rmap_head->val & KVM_RMAP_MANY)) { desc = kvm_mmu_memory_cache_alloc(cache); desc->sptes[0] = (u64 *)rmap_head->val; desc->sptes[1] = spte; desc->spte_count = 2; desc->tail_count = 0; - rmap_head->val = (unsigned long)desc | 1; + rmap_head->val = (unsigned long)desc | KVM_RMAP_MANY; ++count; } else { - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); count = desc->tail_count + desc->spte_count; /* @@ -942,10 +943,10 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte, */ if (desc->spte_count == PTE_LIST_EXT) { desc = kvm_mmu_memory_cache_alloc(cache); - desc->more = (struct pte_list_desc *)(rmap_head->val & ~1ul); + desc->more = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); desc->spte_count = 0; desc->tail_count = count; - rmap_head->val = (unsigned long)desc | 1; + rmap_head->val = (unsigned long)desc | KVM_RMAP_MANY; } desc->sptes[desc->spte_count++] = spte; } @@ -956,7 +957,7 @@ static void pte_list_desc_remove_entry(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct pte_list_desc *desc, int i) { - struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); int j = head_desc->spte_count - 1; /* @@ -985,7 +986,7 @@ static void pte_list_desc_remove_entry(struct kvm *kvm, if (!head_desc->more) rmap_head->val = 0; else - rmap_head->val = (unsigned long)head_desc->more | 1; + rmap_head->val = (unsigned long)head_desc->more | KVM_RMAP_MANY; mmu_free_pte_list_desc(head_desc); } @@ -998,13 +999,13 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte, if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm)) return; - if (!(rmap_head->val & 1)) { + if (!(rmap_head->val & KVM_RMAP_MANY)) { if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm)) return; rmap_head->val = 0; } else { - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); while (desc) { for (i = 0; i < desc->spte_count; ++i) { if (desc->sptes[i] == spte) { @@ -1037,12 +1038,12 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm, if (!rmap_head->val) return false; - if (!(rmap_head->val & 1)) { + if (!(rmap_head->val & KVM_RMAP_MANY)) { mmu_spte_clear_track_bits(kvm, (u64 *)rmap_head->val); goto out; } - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); for (; desc; desc = next) { for (i = 0; i < desc->spte_count; i++) @@ -1062,10 +1063,10 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head) if (!rmap_head->val) return 0; - else if (!(rmap_head->val & 1)) + else if (!(rmap_head->val & KVM_RMAP_MANY)) return 1; - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); return desc->tail_count + desc->spte_count; } @@ -1127,13 +1128,13 @@ static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head, if (!rmap_head->val) return NULL; - if (!(rmap_head->val & 1)) { + if (!(rmap_head->val & KVM_RMAP_MANY)) { iter->desc = NULL; sptep = (u64 *)rmap_head->val; goto out; } - iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + iter->desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); iter->pos = 0; sptep = iter->desc->sptes[iter->pos]; out: From 9a5bff7f5ec2383e3edac5eda561b52e267ccbb5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 9 Aug 2024 12:43:30 -0700 Subject: [PATCH 204/655] KVM: x86/mmu: Use KVM_PAGES_PER_HPAGE() instead of an open coded equivalent Use KVM_PAGES_PER_HPAGE() instead of open coding equivalent logic that is anything but obvious. No functional change intended, and verified by compiling with the below assertions: BUILD_BUG_ON((1UL << KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)) != KVM_PAGES_PER_HPAGE(PG_LEVEL_4K)); BUILD_BUG_ON((1UL << KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) != KVM_PAGES_PER_HPAGE(PG_LEVEL_2M)); BUILD_BUG_ON((1UL << KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) != KVM_PAGES_PER_HPAGE(PG_LEVEL_1G)); Link: https://lore.kernel.org/r/20240809194335.1726916-19-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 53e24892055a..b751e7e2a05e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1464,7 +1464,7 @@ static bool slot_rmap_walk_okay(struct slot_rmap_walk_iterator *iterator) static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator) { while (++iterator->rmap <= iterator->end_rmap) { - iterator->gfn += (1UL << KVM_HPAGE_GFN_SHIFT(iterator->level)); + iterator->gfn += KVM_PAGES_PER_HPAGE(iterator->level); if (iterator->rmap->val) return; From f3009482512eb057e7161214a068c6bd7bae83a4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Aug 2024 17:15:16 -0700 Subject: [PATCH 205/655] KVM: VMX: Set PFERR_GUEST_{FINAL,PAGE}_MASK if and only if the GVA is valid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set PFERR_GUEST_{FINAL,PAGE}_MASK based on EPT_VIOLATION_GVA_TRANSLATED if and only if EPT_VIOLATION_GVA_IS_VALID is also set in exit qualification. Per the SDM, bit 8 (EPT_VIOLATION_GVA_TRANSLATED) is valid if and only if bit 7 (EPT_VIOLATION_GVA_IS_VALID) is set, and is '0' if bit 7 is '0'. Bit 7 (a.k.a. EPT_VIOLATION_GVA_IS_VALID) Set if the guest linear-address field is valid. The guest linear-address field is valid for all EPT violations except those resulting from an attempt to load the guest PDPTEs as part of the execution of the MOV CR instruction and those due to trace-address pre-translation Bit 8 (a.k.a. EPT_VIOLATION_GVA_TRANSLATED) If bit 7 is 1: • Set if the access causing the EPT violation is to a guest-physical address that is the translation of a linear address. • Clear if the access causing the EPT violation is to a paging-structure entry as part of a page walk or the update of an accessed or dirty bit. Reserved if bit 7 is 0 (cleared to 0). Failure to guard the logic on GVA_IS_VALID results in KVM marking the page fault as PFERR_GUEST_PAGE_MASK when there is no known GVA, which can put the vCPU into an infinite loop due to kvm_mmu_page_fault() getting false positive on its PFERR_NESTED_GUEST_PAGE logic (though only because that logic is also buggy/flawed). In practice, this is largely a non-issue because so GVA_IS_VALID is almost always set. However, when TDX comes along, GVA_IS_VALID will *never* be set, as the TDX Module deliberately clears bits 12:7 in exit qualification, e.g. so that the faulting virtual address and other metadata that aren't practically useful for the hypervisor aren't leaked to the untrusted host. When exit is due to EPT violation, bits 12-7 of the exit qualification are cleared to 0. Fixes: eebed2438923 ("kvm: nVMX: Add support for fast unprotection of nested guest page tables") Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/20240831001538.336683-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/vmx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fe99deceebbd..ec1aee1f9057 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5807,8 +5807,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK) ? PFERR_PRESENT_MASK : 0; - error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ? - PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; + if (error_code & EPT_VIOLATION_GVA_IS_VALID) + error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ? + PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; /* * Check that the GPA doesn't exceed physical memory limits, as that is From 1d7f856c2ca449f04a22d876e36b464b7a9d28b6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 Sep 2024 12:50:09 +0200 Subject: [PATCH 206/655] jump_label: Fix static_key_slow_dec() yet again While commit 83ab38ef0a0b ("jump_label: Fix concurrency issues in static_key_slow_dec()") fixed one problem, it created yet another, notably the following is now possible: slow_dec if (try_dec) // dec_not_one-ish, false // enabled == 1 slow_inc if (inc_not_disabled) // inc_not_zero-ish // enabled == 2 return guard((mutex)(&jump_label_mutex); if (atomic_cmpxchg(1,0)==1) // false, we're 2 slow_dec if (try-dec) // dec_not_one, true // enabled == 1 return else try_dec() // dec_not_one, false WARN Use dec_and_test instead of cmpxchg(), like it was prior to 83ab38ef0a0b. Add a few WARNs for the paranoid. Fixes: 83ab38ef0a0b ("jump_label: Fix concurrency issues in static_key_slow_dec()") Reported-by: "Darrick J. Wong" Tested-by: Klara Modin Signed-off-by: Peter Zijlstra (Intel) --- kernel/jump_label.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 6dc76b590703..93a822d3c468 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -168,7 +168,7 @@ bool static_key_slow_inc_cpuslocked(struct static_key *key) jump_label_update(key); /* * Ensure that when static_key_fast_inc_not_disabled() or - * static_key_slow_try_dec() observe the positive value, + * static_key_dec_not_one() observe the positive value, * they must also observe all the text changes. */ atomic_set_release(&key->enabled, 1); @@ -250,7 +250,7 @@ void static_key_disable(struct static_key *key) } EXPORT_SYMBOL_GPL(static_key_disable); -static bool static_key_slow_try_dec(struct static_key *key) +static bool static_key_dec_not_one(struct static_key *key) { int v; @@ -274,6 +274,14 @@ static bool static_key_slow_try_dec(struct static_key *key) * enabled. This suggests an ordering problem on the user side. */ WARN_ON_ONCE(v < 0); + + /* + * Warn about underflow, and lie about success in an attempt to + * not make things worse. + */ + if (WARN_ON_ONCE(v == 0)) + return true; + if (v <= 1) return false; } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v - 1))); @@ -284,15 +292,27 @@ static bool static_key_slow_try_dec(struct static_key *key) static void __static_key_slow_dec_cpuslocked(struct static_key *key) { lockdep_assert_cpus_held(); + int val; - if (static_key_slow_try_dec(key)) + if (static_key_dec_not_one(key)) return; guard(mutex)(&jump_label_mutex); - if (atomic_cmpxchg(&key->enabled, 1, 0) == 1) + val = atomic_read(&key->enabled); + /* + * It should be impossible to observe -1 with jump_label_mutex held, + * see static_key_slow_inc_cpuslocked(). + */ + if (WARN_ON_ONCE(val == -1)) + return; + /* + * Cannot already be 0, something went sideways. + */ + if (WARN_ON_ONCE(val == 0)) + return; + + if (atomic_dec_and_test(&key->enabled)) jump_label_update(key); - else - WARN_ON_ONCE(!static_key_slow_try_dec(key)); } static void __static_key_slow_dec(struct static_key *key) @@ -329,7 +349,7 @@ void __static_key_slow_dec_deferred(struct static_key *key, { STATIC_KEY_CHECK_USE(key); - if (static_key_slow_try_dec(key)) + if (static_key_dec_not_one(key)) return; schedule_delayed_work(work, timeout); From d00b83d416e73bc3fa4d21b14bec920e88b70ce6 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 9 Sep 2024 14:29:05 -0400 Subject: [PATCH 207/655] locking/rwsem: Move is_rwsem_reader_owned() and rwsem_owner() under CONFIG_DEBUG_RWSEMS Both is_rwsem_reader_owned() and rwsem_owner() are currently only used when CONFIG_DEBUG_RWSEMS is defined. This causes a compilation error with clang when `make W=1` and CONFIG_WERROR=y: kernel/locking/rwsem.c:187:20: error: unused function 'is_rwsem_reader_owned' [-Werror,-Wunused-function] 187 | static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) | ^~~~~~~~~~~~~~~~~~~~~ kernel/locking/rwsem.c:271:35: error: unused function 'rwsem_owner' [-Werror,-Wunused-function] 271 | static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) | ^~~~~~~~~~~ Fix this by moving these two functions under the CONFIG_DEBUG_RWSEMS define. Reported-by: Andy Shevchenko Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andy Shevchenko Tested-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240909182905.161156-1-longman@redhat.com --- kernel/locking/rwsem.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 33cac79e3994..4b041e9c408f 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -181,12 +181,21 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) __rwsem_set_reader_owned(sem, current); } +#ifdef CONFIG_DEBUG_RWSEMS +/* + * Return just the real task structure pointer of the owner + */ +static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) +{ + return (struct task_struct *) + (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); +} + /* * Return true if the rwsem is owned by a reader. */ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) { -#ifdef CONFIG_DEBUG_RWSEMS /* * Check the count to see if it is write-locked. */ @@ -194,11 +203,9 @@ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) if (count & RWSEM_WRITER_MASK) return false; -#endif return rwsem_test_oflags(sem, RWSEM_READER_OWNED); } -#ifdef CONFIG_DEBUG_RWSEMS /* * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there * is a task pointer in owner of a reader-owned rwsem, it will be the @@ -265,15 +272,6 @@ static inline bool rwsem_write_trylock(struct rw_semaphore *sem) return false; } -/* - * Return just the real task structure pointer of the owner - */ -static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) -{ - return (struct task_struct *) - (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); -} - /* * Return the real task structure pointer of the owner and the embedded * flags in the owner. pflags must be non-NULL. From 4440337af4d415c8abf8b9b0e10c79b7518e6e3c Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Wed, 7 Aug 2024 14:35:31 +0200 Subject: [PATCH 208/655] KVM: SVM: let alternatives handle the cases when RSB filling is required Remove superfluous RSB filling after a VMEXIT when the CPU already has flushed the RSB after a VMEXIT when AutoIBRS is enabled. The initial implementation for adding RETPOLINES added an ALTERNATIVES implementation for filling the RSB after a VMEXIT in commit 117cc7a908c8 ("x86/retpoline: Fill return stack buffer on vmexit"). Later, X86_FEATURE_RSB_VMEXIT was added in commit 9756bba28470 ("x86/speculation: Fill RSB on vmexit for IBRS") to handle stuffing the RSB if RETPOLINE=y *or* KERNEL_IBRS=y, i.e. to also stuff the RSB if the kernel is configured to do IBRS mitigations on entry/exit. The AutoIBRS (on AMD) feature implementation added in commit e7862eda309e ("x86/cpu: Support AMD Automatic IBRS") used the already-implemented logic for EIBRS in spectre_v2_determine_rsb_fill_type_on_vmexit() -- but did not update the code at VMEXIT to act on the mode selected in that function -- resulting in VMEXITs continuing to clear the RSB when RETPOLINES are enabled, despite the presence of AutoIBRS. Signed-off-by: Amit Shah Link: https://lore.kernel.org/r/20240807123531.69677-1-amit@kernel.org [sean: massage changeloge, drop comment about AMD not needing RSB_VMEXIT_LITE] Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/vmenter.S | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index a0c8eb37d3e1..2ed80aea3bb1 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -209,10 +209,8 @@ SYM_FUNC_START(__svm_vcpu_run) 7: vmload %_ASM_AX 8: -#ifdef CONFIG_MITIGATION_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE -#endif + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT /* Clobbers RAX, RCX, RDX. */ RESTORE_HOST_SPEC_CTRL @@ -348,10 +346,8 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) 2: cli -#ifdef CONFIG_MITIGATION_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE -#endif + FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT /* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */ RESTORE_HOST_SPEC_CTRL From 9c2010bccc0ce012f52de18ebd0c3add241f75b8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 10 Sep 2024 10:52:59 -0700 Subject: [PATCH 209/655] dm-integrity: check mac_size against HASH_MAX_DIGESTSIZE in sb_mac() sb_mac() verifies that the superblock + MAC don't exceed 512 bytes. Because the superblock is currently 64 bytes, this really verifies mac_size <= 448. This confuses smatch into thinking that mac_size may be as large as 448, which is inconsistent with the later code that assumes the MAC fits in a buffer of size HASH_MAX_DIGESTSIZE (64). In fact mac_size <= HASH_MAX_DIGESTSIZE is guaranteed by the crypto API, as that is the whole point of HASH_MAX_DIGESTSIZE. But, let's be defensive and explicitly check for this. This suppresses the false positive smatch warning. It does not fix an actual bug. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202409061401.44rtN1bh-lkp@intel.com/ Signed-off-by: Eric Biggers Signed-off-by: Mikulas Patocka --- drivers/md/dm-integrity.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index c40df05e0521..42c9dc2ee0c0 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -494,7 +494,8 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr) __u8 *sb = (__u8 *)ic->sb; __u8 *mac = sb + (1 << SECTOR_SHIFT) - mac_size; - if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT) { + if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT || + mac_size > HASH_MAX_DIGESTSIZE) { dm_integrity_io_error(ic, "digest is too long", -EINVAL); return -EINVAL; } From c7f06284a6427475e3df742215535ec3f6cd9662 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Sep 2024 10:39:15 +0300 Subject: [PATCH 210/655] ep93xx: clock: Fix off by one in ep93xx_div_recalc_rate() The psc->div[] array has psc->num_div elements. These values come from when we call clk_hw_register_div(). It's adc_divisors and ARRAY_SIZE(adc_divisors)) and so on. So this condition needs to be >= instead of > to prevent an out of bounds read. Fixes: 9645ccc7bd7a ("ep93xx: clock: convert in-place to COMMON_CLK") Signed-off-by: Dan Carpenter Acked-by: Alexander Sverdlin Reviewed-by: Nikita Shubin Signed-off-by: Alexander Sverdlin Link: https://lore.kernel.org/r/1caf01ad4c0a8069535813c26c7f0b8ea011155e.camel@linaro.org Signed-off-by: Arnd Bergmann --- arch/arm/mach-ep93xx/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index 85a496ddc619..e9f72a529b50 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -359,7 +359,7 @@ static unsigned long ep93xx_div_recalc_rate(struct clk_hw *hw, u32 val = __raw_readl(psc->reg); u8 index = (val & psc->mask) >> psc->shift; - if (index > psc->num_div) + if (index >= psc->num_div) return 0; return DIV_ROUND_UP_ULL(parent_rate, psc->div[index]); From efb0b309fa0d8a92f9b303d292944cda08349eed Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Sun, 8 Sep 2024 10:48:47 +0800 Subject: [PATCH 211/655] driver core: Trivially simplify ((struct device_private *)curr)->device->p to @curr Trivially simplify ((struct device_private *)curr)->device->p to @curr in deferred_devs_show() since both are same. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240908-trivial_simpli-v1-1-53e0f1363299@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 9b745ba54de1..a7cc7ff0923b 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -248,7 +248,7 @@ static int deferred_devs_show(struct seq_file *s, void *data) list_for_each_entry(curr, &deferred_probe_pending_list, deferred_probe) seq_printf(s, "%s\t%s", dev_name(curr->device), - curr->device->p->deferred_probe_reason ?: "\n"); + curr->deferred_probe_reason ?: "\n"); mutex_unlock(&deferred_probe_mutex); From cbe8e464eb29057f0f368e493277bdaa6dc5d656 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:26 +0300 Subject: [PATCH 212/655] gpio: ep93xx: split device in multiple Prepare ep93xx SOC gpio to convert into device tree driver: - dropped banks and legacy defines - split AB IRQ and make it shared We are relying on IRQ number information A, B ports have single shared IRQ, while F port have dedicated IRQ for each line. Also we had to split single ep93xx platform_device into multiple, one for each port, without this we can't do a full working transition from legacy platform code into device tree capable. All GPIO_LOOKUP were change to match new chip namings. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Acked-by: Bartosz Golaszewski Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- arch/arm/mach-ep93xx/core.c | 121 +++++++++-- arch/arm/mach-ep93xx/edb93xx.c | 2 +- arch/arm/mach-ep93xx/ts72xx.c | 4 +- arch/arm/mach-ep93xx/vision_ep9307.c | 10 +- drivers/gpio/gpio-ep93xx.c | 311 ++++++++++----------------- 5 files changed, 228 insertions(+), 220 deletions(-) diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 8b1ec60a9a46..03bce5e9d1f1 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "hardware.h" #include @@ -139,9 +140,80 @@ EXPORT_SYMBOL_GPL(ep93xx_chip_revision); /************************************************************************* * EP93xx GPIO *************************************************************************/ -static struct resource ep93xx_gpio_resource[] = { - DEFINE_RES_MEM(EP93XX_GPIO_PHYS_BASE, 0xcc), +/* port A */ +static struct resource ep93xx_a_gpio_resources[] = { + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE, 0x04, "data"), + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x10, 0x04, "dir"), + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x90, 0x1c, "intr"), DEFINE_RES_IRQ(IRQ_EP93XX_GPIO_AB), +}; + +static struct platform_device ep93xx_a_gpio = { + .name = "gpio-ep93xx", + .id = 0, + .num_resources = ARRAY_SIZE(ep93xx_a_gpio_resources), + .resource = ep93xx_a_gpio_resources, +}; + +/* port B */ +static struct resource ep93xx_b_gpio_resources[] = { + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x04, 0x04, "data"), + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x14, 0x04, "dir"), + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0xac, 0x1c, "intr"), + DEFINE_RES_IRQ(IRQ_EP93XX_GPIO_AB), +}; + +static struct platform_device ep93xx_b_gpio = { + .name = "gpio-ep93xx", + .id = 1, + .num_resources = ARRAY_SIZE(ep93xx_b_gpio_resources), + .resource = ep93xx_b_gpio_resources, +}; + +/* port C */ +static struct resource ep93xx_c_gpio_resources[] = { + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x08, 0x04, "data"), + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x18, 0x04, "dir"), +}; + +static struct platform_device ep93xx_c_gpio = { + .name = "gpio-ep93xx", + .id = 2, + .num_resources = ARRAY_SIZE(ep93xx_c_gpio_resources), + .resource = ep93xx_c_gpio_resources, +}; + +/* port D */ +static struct resource ep93xx_d_gpio_resources[] = { + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x0c, 0x04, "data"), + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x1c, 0x04, "dir"), +}; + +static struct platform_device ep93xx_d_gpio = { + .name = "gpio-ep93xx", + .id = 3, + .num_resources = ARRAY_SIZE(ep93xx_d_gpio_resources), + .resource = ep93xx_d_gpio_resources, +}; + +/* port E */ +static struct resource ep93xx_e_gpio_resources[] = { + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x20, 0x04, "data"), + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x24, 0x04, "dir"), +}; + +static struct platform_device ep93xx_e_gpio = { + .name = "gpio-ep93xx", + .id = 4, + .num_resources = ARRAY_SIZE(ep93xx_e_gpio_resources), + .resource = ep93xx_e_gpio_resources, +}; + +/* port F */ +static struct resource ep93xx_f_gpio_resources[] = { + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x30, 0x04, "data"), + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x34, 0x04, "dir"), + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x4c, 0x1c, "intr"), DEFINE_RES_IRQ(IRQ_EP93XX_GPIO0MUX), DEFINE_RES_IRQ(IRQ_EP93XX_GPIO1MUX), DEFINE_RES_IRQ(IRQ_EP93XX_GPIO2MUX), @@ -152,11 +224,34 @@ static struct resource ep93xx_gpio_resource[] = { DEFINE_RES_IRQ(IRQ_EP93XX_GPIO7MUX), }; -static struct platform_device ep93xx_gpio_device = { - .name = "gpio-ep93xx", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_gpio_resource), - .resource = ep93xx_gpio_resource, +static struct platform_device ep93xx_f_gpio = { + .name = "gpio-ep93xx", + .id = 5, + .num_resources = ARRAY_SIZE(ep93xx_f_gpio_resources), + .resource = ep93xx_f_gpio_resources, +}; + +/* port G */ +static struct resource ep93xx_g_gpio_resources[] = { + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x38, 0x04, "data"), + DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x3c, 0x04, "dir"), +}; + +static struct platform_device ep93xx_g_gpio = { + .name = "gpio-ep93xx", + .id = 6, + .num_resources = ARRAY_SIZE(ep93xx_g_gpio_resources), + .resource = ep93xx_g_gpio_resources, +}; + +static struct platform_device *ep93xx_gpio_device[] __initdata = { + &ep93xx_a_gpio, + &ep93xx_b_gpio, + &ep93xx_c_gpio, + &ep93xx_d_gpio, + &ep93xx_e_gpio, + &ep93xx_f_gpio, + &ep93xx_g_gpio, }; /************************************************************************* @@ -335,9 +430,9 @@ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = { .dev_id = "i2c-gpio.0", .table = { /* Use local offsets on gpiochip/port "G" */ - GPIO_LOOKUP_IDX("G", 1, NULL, 0, + GPIO_LOOKUP_IDX("gpio-ep93xx.6", 1, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), - GPIO_LOOKUP_IDX("G", 0, NULL, 1, + GPIO_LOOKUP_IDX("gpio-ep93xx.6", 0, NULL, 1, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), { } }, @@ -441,8 +536,8 @@ static struct gpiod_lookup_table ep93xx_leds_gpio_table = { .dev_id = "leds-gpio", .table = { /* Use local offsets on gpiochip/port "E" */ - GPIO_LOOKUP_IDX("E", 0, NULL, 0, GPIO_ACTIVE_HIGH), - GPIO_LOOKUP_IDX("E", 1, NULL, 1, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio-ep93xx.4", 0, NULL, 0, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio-ep93xx.4", 1, NULL, 1, GPIO_ACTIVE_HIGH), { } }, }; @@ -975,6 +1070,7 @@ static struct device __init *ep93xx_init_soc(void) struct device __init *ep93xx_init_devices(void) { struct device *parent; + unsigned int i; /* Disallow access to MaverickCrunch initially */ ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_CPENA); @@ -989,7 +1085,8 @@ struct device __init *ep93xx_init_devices(void) parent = ep93xx_init_soc(); /* Get the GPIO working early, other devices need it */ - platform_device_register(&ep93xx_gpio_device); + for (i = 0; i < ARRAY_SIZE(ep93xx_gpio_device); i++) + platform_device_register(ep93xx_gpio_device[i]); amba_device_register(&uart1_device, &iomem_resource); amba_device_register(&uart2_device, &iomem_resource); diff --git a/arch/arm/mach-ep93xx/edb93xx.c b/arch/arm/mach-ep93xx/edb93xx.c index dbdb822a0100..356b0460c7ed 100644 --- a/arch/arm/mach-ep93xx/edb93xx.c +++ b/arch/arm/mach-ep93xx/edb93xx.c @@ -105,7 +105,7 @@ static struct spi_board_info edb93xx_spi_board_info[] __initdata = { static struct gpiod_lookup_table edb93xx_spi_cs_gpio_table = { .dev_id = "spi0", .table = { - GPIO_LOOKUP("A", 6, "cs", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("gpio-ep93xx.0", 6, "cs", GPIO_ACTIVE_LOW), { }, }, }; diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c index d3de7283ecb3..0bbdf587c685 100644 --- a/arch/arm/mach-ep93xx/ts72xx.c +++ b/arch/arm/mach-ep93xx/ts72xx.c @@ -268,7 +268,7 @@ static struct spi_board_info bk3_spi_board_info[] __initdata = { static struct gpiod_lookup_table bk3_spi_cs_gpio_table = { .dev_id = "spi0", .table = { - GPIO_LOOKUP("F", 3, "cs", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("gpio-ep93xx.5", 3, "cs", GPIO_ACTIVE_LOW), { }, }, }; @@ -318,7 +318,7 @@ static struct gpiod_lookup_table ts72xx_spi_cs_gpio_table = { .dev_id = "spi0", .table = { /* DIO_17 */ - GPIO_LOOKUP("F", 2, "cs", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("gpio-ep93xx.5", 2, "cs", GPIO_ACTIVE_LOW), { }, }, }; diff --git a/arch/arm/mach-ep93xx/vision_ep9307.c b/arch/arm/mach-ep93xx/vision_ep9307.c index 9471938df64c..b3087b8eed3f 100644 --- a/arch/arm/mach-ep93xx/vision_ep9307.c +++ b/arch/arm/mach-ep93xx/vision_ep9307.c @@ -206,9 +206,9 @@ static struct gpiod_lookup_table vision_spi_mmc_gpio_table = { .dev_id = "mmc_spi.2", /* "mmc_spi @ CS2 */ .table = { /* Card detect */ - GPIO_LOOKUP_IDX("B", 7, NULL, 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-ep93xx.1", 7, NULL, 0, GPIO_ACTIVE_LOW), /* Write protect */ - GPIO_LOOKUP_IDX("F", 0, NULL, 1, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio-ep93xx.5", 0, NULL, 1, GPIO_ACTIVE_HIGH), { }, }, }; @@ -253,9 +253,9 @@ static struct gpiod_lookup_table vision_spi_cs4271_gpio_table = { static struct gpiod_lookup_table vision_spi_cs_gpio_table = { .dev_id = "spi0", .table = { - GPIO_LOOKUP_IDX("A", 6, "cs", 0, GPIO_ACTIVE_LOW), - GPIO_LOOKUP_IDX("A", 7, "cs", 1, GPIO_ACTIVE_LOW), - GPIO_LOOKUP_IDX("G", 2, "cs", 2, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-ep93xx.0", 6, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-ep93xx.0", 7, "cs", 1, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-ep93xx.6", 2, "cs", 2, GPIO_ACTIVE_LOW), { }, }, }; diff --git a/drivers/gpio/gpio-ep93xx.c b/drivers/gpio/gpio-ep93xx.c index 6cedf46efec6..a55f635585f4 100644 --- a/drivers/gpio/gpio-ep93xx.c +++ b/drivers/gpio/gpio-ep93xx.c @@ -18,30 +18,10 @@ #include #include #include - -#define EP93XX_GPIO_F_INT_STATUS 0x5c -#define EP93XX_GPIO_A_INT_STATUS 0xa0 -#define EP93XX_GPIO_B_INT_STATUS 0xbc - -/* Maximum value for gpio line identifiers */ -#define EP93XX_GPIO_LINE_MAX 63 - -/* Number of GPIO chips in EP93XX */ -#define EP93XX_GPIO_CHIP_NUM 8 - -/* Maximum value for irq capable line identifiers */ -#define EP93XX_GPIO_LINE_MAX_IRQ 23 - -#define EP93XX_GPIO_A_IRQ_BASE 64 -#define EP93XX_GPIO_B_IRQ_BASE 72 -/* - * Static mapping of GPIO bank F IRQS: - * F0..F7 (16..24) to irq 80..87. - */ -#define EP93XX_GPIO_F_IRQ_BASE 80 +#include struct ep93xx_gpio_irq_chip { - u8 irq_offset; + void __iomem *base; u8 int_unmasked; u8 int_enabled; u8 int_type1; @@ -50,15 +30,11 @@ struct ep93xx_gpio_irq_chip { }; struct ep93xx_gpio_chip { + void __iomem *base; struct gpio_chip gc; struct ep93xx_gpio_irq_chip *eic; }; -struct ep93xx_gpio { - void __iomem *base; - struct ep93xx_gpio_chip gc[EP93XX_GPIO_CHIP_NUM]; -}; - #define to_ep93xx_gpio_chip(x) container_of(x, struct ep93xx_gpio_chip, gc) static struct ep93xx_gpio_irq_chip *to_ep93xx_gpio_irq_chip(struct gpio_chip *gc) @@ -79,25 +55,23 @@ static struct ep93xx_gpio_irq_chip *to_ep93xx_gpio_irq_chip(struct gpio_chip *gc #define EP93XX_INT_RAW_STATUS_OFFSET 0x14 #define EP93XX_INT_DEBOUNCE_OFFSET 0x18 -static void ep93xx_gpio_update_int_params(struct ep93xx_gpio *epg, - struct ep93xx_gpio_irq_chip *eic) +static void ep93xx_gpio_update_int_params(struct ep93xx_gpio_irq_chip *eic) { - writeb_relaxed(0, epg->base + eic->irq_offset + EP93XX_INT_EN_OFFSET); + writeb_relaxed(0, eic->base + EP93XX_INT_EN_OFFSET); writeb_relaxed(eic->int_type2, - epg->base + eic->irq_offset + EP93XX_INT_TYPE2_OFFSET); + eic->base + EP93XX_INT_TYPE2_OFFSET); writeb_relaxed(eic->int_type1, - epg->base + eic->irq_offset + EP93XX_INT_TYPE1_OFFSET); + eic->base + EP93XX_INT_TYPE1_OFFSET); writeb_relaxed(eic->int_unmasked & eic->int_enabled, - epg->base + eic->irq_offset + EP93XX_INT_EN_OFFSET); + eic->base + EP93XX_INT_EN_OFFSET); } static void ep93xx_gpio_int_debounce(struct gpio_chip *gc, unsigned int offset, bool enable) { - struct ep93xx_gpio *epg = gpiochip_get_data(gc); struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); int port_mask = BIT(offset); @@ -106,53 +80,43 @@ static void ep93xx_gpio_int_debounce(struct gpio_chip *gc, else eic->int_debounce &= ~port_mask; - writeb(eic->int_debounce, - epg->base + eic->irq_offset + EP93XX_INT_DEBOUNCE_OFFSET); + writeb(eic->int_debounce, eic->base + EP93XX_INT_DEBOUNCE_OFFSET); } -static void ep93xx_gpio_ab_irq_handler(struct irq_desc *desc) +static u32 ep93xx_gpio_ab_irq_handler(struct gpio_chip *gc) { - struct gpio_chip *gc = irq_desc_get_handler_data(desc); - struct ep93xx_gpio *epg = gpiochip_get_data(gc); - struct irq_chip *irqchip = irq_desc_get_chip(desc); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); unsigned long stat; int offset; - chained_irq_enter(irqchip, desc); - - /* - * Dispatch the IRQs to the irqdomain of each A and B - * gpiochip irqdomains depending on what has fired. - * The tricky part is that the IRQ line is shared - * between bank A and B and each has their own gpiochip. - */ - stat = readb(epg->base + EP93XX_GPIO_A_INT_STATUS); + stat = readb(eic->base + EP93XX_INT_STATUS_OFFSET); for_each_set_bit(offset, &stat, 8) - generic_handle_domain_irq(epg->gc[0].gc.irq.domain, - offset); + generic_handle_domain_irq(gc->irq.domain, offset); - stat = readb(epg->base + EP93XX_GPIO_B_INT_STATUS); - for_each_set_bit(offset, &stat, 8) - generic_handle_domain_irq(epg->gc[1].gc.irq.domain, - offset); + return stat; +} - chained_irq_exit(irqchip, desc); +static irqreturn_t ep93xx_ab_irq_handler(int irq, void *dev_id) +{ + return IRQ_RETVAL(ep93xx_gpio_ab_irq_handler(dev_id)); } static void ep93xx_gpio_f_irq_handler(struct irq_desc *desc) { - /* - * map discontiguous hw irq range to continuous sw irq range: - * - * IRQ_EP93XX_GPIO{0..7}MUX -> EP93XX_GPIO_LINE_F{0..7} - */ struct irq_chip *irqchip = irq_desc_get_chip(desc); - unsigned int irq = irq_desc_get_irq(desc); - int port_f_idx = (irq & 7) ^ 4; /* {20..23,48..51} -> {0..7} */ - int gpio_irq = EP93XX_GPIO_F_IRQ_BASE + port_f_idx; + struct gpio_chip *gc = irq_desc_get_handler_data(desc); + struct gpio_irq_chip *gic = &gc->irq; + unsigned int parent = irq_desc_get_irq(desc); + unsigned int i; chained_irq_enter(irqchip, desc); - generic_handle_irq(gpio_irq); + for (i = 0; i < gic->num_parents; i++) + if (gic->parents[i] == parent) + break; + + if (i < gic->num_parents) + generic_handle_domain_irq(gc->irq.domain, i); + chained_irq_exit(irqchip, desc); } @@ -160,31 +124,29 @@ static void ep93xx_gpio_irq_ack(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); - struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port_mask = BIT(d->irq & 7); + int port_mask = BIT(irqd_to_hwirq(d)); if (irqd_get_trigger_type(d) == IRQ_TYPE_EDGE_BOTH) { eic->int_type2 ^= port_mask; /* switch edge direction */ - ep93xx_gpio_update_int_params(epg, eic); + ep93xx_gpio_update_int_params(eic); } - writeb(port_mask, epg->base + eic->irq_offset + EP93XX_INT_EOI_OFFSET); + writeb(port_mask, eic->base + EP93XX_INT_EOI_OFFSET); } static void ep93xx_gpio_irq_mask_ack(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); - struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port_mask = BIT(d->irq & 7); + int port_mask = BIT(irqd_to_hwirq(d)); if (irqd_get_trigger_type(d) == IRQ_TYPE_EDGE_BOTH) eic->int_type2 ^= port_mask; /* switch edge direction */ eic->int_unmasked &= ~port_mask; - ep93xx_gpio_update_int_params(epg, eic); + ep93xx_gpio_update_int_params(eic); - writeb(port_mask, epg->base + eic->irq_offset + EP93XX_INT_EOI_OFFSET); + writeb(port_mask, eic->base + EP93XX_INT_EOI_OFFSET); gpiochip_disable_irq(gc, irqd_to_hwirq(d)); } @@ -192,10 +154,9 @@ static void ep93xx_gpio_irq_mask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); - struct ep93xx_gpio *epg = gpiochip_get_data(gc); - eic->int_unmasked &= ~BIT(d->irq & 7); - ep93xx_gpio_update_int_params(epg, eic); + eic->int_unmasked &= ~BIT(irqd_to_hwirq(d)); + ep93xx_gpio_update_int_params(eic); gpiochip_disable_irq(gc, irqd_to_hwirq(d)); } @@ -203,11 +164,10 @@ static void ep93xx_gpio_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); - struct ep93xx_gpio *epg = gpiochip_get_data(gc); gpiochip_enable_irq(gc, irqd_to_hwirq(d)); - eic->int_unmasked |= BIT(d->irq & 7); - ep93xx_gpio_update_int_params(epg, eic); + eic->int_unmasked |= BIT(irqd_to_hwirq(d)); + ep93xx_gpio_update_int_params(eic); } /* @@ -219,8 +179,7 @@ static int ep93xx_gpio_irq_type(struct irq_data *d, unsigned int type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); - struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int offset = d->irq & 7; + irq_hw_number_t offset = irqd_to_hwirq(d); int port_mask = BIT(offset); irq_flow_handler_t handler; @@ -264,51 +223,11 @@ static int ep93xx_gpio_irq_type(struct irq_data *d, unsigned int type) eic->int_enabled |= port_mask; - ep93xx_gpio_update_int_params(epg, eic); + ep93xx_gpio_update_int_params(eic); return 0; } -/************************************************************************* - * gpiolib interface for EP93xx on-chip GPIOs - *************************************************************************/ -struct ep93xx_gpio_bank { - const char *label; - int data; - int dir; - int irq; - int base; - bool has_irq; - bool has_hierarchical_irq; - unsigned int irq_base; -}; - -#define EP93XX_GPIO_BANK(_label, _data, _dir, _irq, _base, _has_irq, _has_hier, _irq_base) \ - { \ - .label = _label, \ - .data = _data, \ - .dir = _dir, \ - .irq = _irq, \ - .base = _base, \ - .has_irq = _has_irq, \ - .has_hierarchical_irq = _has_hier, \ - .irq_base = _irq_base, \ - } - -static struct ep93xx_gpio_bank ep93xx_gpio_banks[] = { - /* Bank A has 8 IRQs */ - EP93XX_GPIO_BANK("A", 0x00, 0x10, 0x90, 0, true, false, EP93XX_GPIO_A_IRQ_BASE), - /* Bank B has 8 IRQs */ - EP93XX_GPIO_BANK("B", 0x04, 0x14, 0xac, 8, true, false, EP93XX_GPIO_B_IRQ_BASE), - EP93XX_GPIO_BANK("C", 0x08, 0x18, 0x00, 40, false, false, 0), - EP93XX_GPIO_BANK("D", 0x0c, 0x1c, 0x00, 24, false, false, 0), - EP93XX_GPIO_BANK("E", 0x20, 0x24, 0x00, 32, false, false, 0), - /* Bank F has 8 IRQs */ - EP93XX_GPIO_BANK("F", 0x30, 0x34, 0x4c, 16, false, true, EP93XX_GPIO_F_IRQ_BASE), - EP93XX_GPIO_BANK("G", 0x38, 0x3c, 0x00, 48, false, false, 0), - EP93XX_GPIO_BANK("H", 0x40, 0x44, 0x00, 56, false, false, 0), -}; - static int ep93xx_gpio_set_config(struct gpio_chip *gc, unsigned offset, unsigned long config) { @@ -342,110 +261,102 @@ static const struct irq_chip gpio_eic_irq_chip = { GPIOCHIP_IRQ_RESOURCE_HELPERS, }; -static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc, - struct platform_device *pdev, - struct ep93xx_gpio *epg, - struct ep93xx_gpio_bank *bank) +static int ep93xx_setup_irqs(struct platform_device *pdev, + struct ep93xx_gpio_chip *egc) { - void __iomem *data = epg->base + bank->data; - void __iomem *dir = epg->base + bank->dir; struct gpio_chip *gc = &egc->gc; struct device *dev = &pdev->dev; - struct gpio_irq_chip *girq; - int err; + struct gpio_irq_chip *girq = &gc->irq; + int ret, irq, i; + void __iomem *intr; - err = bgpio_init(gc, dev, 1, data, NULL, NULL, dir, NULL, 0); - if (err) - return err; + intr = devm_platform_ioremap_resource_byname(pdev, "intr"); + if (IS_ERR(intr)) + return PTR_ERR(intr); - gc->label = bank->label; - gc->base = bank->base; + gc->set_config = ep93xx_gpio_set_config; + egc->eic = devm_kzalloc(dev, sizeof(*egc->eic), GFP_KERNEL); + if (!egc->eic) + return -ENOMEM; - girq = &gc->irq; - if (bank->has_irq || bank->has_hierarchical_irq) { - gc->set_config = ep93xx_gpio_set_config; - egc->eic = devm_kcalloc(dev, 1, - sizeof(*egc->eic), - GFP_KERNEL); - if (!egc->eic) - return -ENOMEM; - egc->eic->irq_offset = bank->irq; - gpio_irq_chip_set_chip(girq, &gpio_eic_irq_chip); - } + egc->eic->base = intr; + gpio_irq_chip_set_chip(girq, &gpio_eic_irq_chip); + girq->num_parents = platform_irq_count(pdev); + if (girq->num_parents == 0) + return -EINVAL; - if (bank->has_irq) { - int ab_parent_irq = platform_get_irq(pdev, 0); + girq->parents = devm_kcalloc(dev, girq->num_parents, + sizeof(*girq->parents), + GFP_KERNEL); + if (!girq->parents) + return -ENOMEM; - girq->parent_handler = ep93xx_gpio_ab_irq_handler; - girq->num_parents = 1; - girq->parents = devm_kcalloc(dev, girq->num_parents, - sizeof(*girq->parents), - GFP_KERNEL); - if (!girq->parents) - return -ENOMEM; - girq->default_type = IRQ_TYPE_NONE; - girq->handler = handle_level_irq; - girq->parents[0] = ab_parent_irq; - girq->first = bank->irq_base; - } + if (girq->num_parents == 1) { /* A/B irqchips */ + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; - /* Only bank F has especially funky IRQ handling */ - if (bank->has_hierarchical_irq) { - int gpio_irq; - int i; + ret = devm_request_irq(dev, irq, ep93xx_ab_irq_handler, + IRQF_SHARED, gc->label, gc); + if (ret) + return dev_err_probe(dev, ret, "requesting IRQ: %d\n", irq); - /* - * FIXME: convert this to use hierarchical IRQ support! - * this requires fixing the root irqchip to be hierarchical. - */ + girq->parents[0] = irq; + } else { /* F irqchip */ girq->parent_handler = ep93xx_gpio_f_irq_handler; - girq->num_parents = 8; - girq->parents = devm_kcalloc(dev, girq->num_parents, - sizeof(*girq->parents), - GFP_KERNEL); - if (!girq->parents) - return -ENOMEM; - /* Pick resources 1..8 for these IRQs */ + for (i = 0; i < girq->num_parents; i++) { - girq->parents[i] = platform_get_irq(pdev, i + 1); - gpio_irq = bank->irq_base + i; - irq_set_chip_data(gpio_irq, &epg->gc[5]); - irq_set_chip_and_handler(gpio_irq, - girq->chip, - handle_level_irq); - irq_clear_status_flags(gpio_irq, IRQ_NOREQUEST); + irq = platform_get_irq(pdev, i); + if (irq < 0) + continue; + + girq->parents[i] = irq; } - girq->default_type = IRQ_TYPE_NONE; - girq->handler = handle_level_irq; - girq->first = bank->irq_base; + + girq->map = girq->parents; } - return devm_gpiochip_add_data(dev, gc, epg); + girq->default_type = IRQ_TYPE_NONE; + /* TODO: replace with handle_bad_irq() once we are fully hierarchical */ + girq->handler = handle_simple_irq; + + return 0; } static int ep93xx_gpio_probe(struct platform_device *pdev) { - struct ep93xx_gpio *epg; - int i; + struct ep93xx_gpio_chip *egc; + struct gpio_chip *gc; + void __iomem *data; + void __iomem *dir; + int ret; - epg = devm_kzalloc(&pdev->dev, sizeof(*epg), GFP_KERNEL); - if (!epg) + egc = devm_kzalloc(&pdev->dev, sizeof(*egc), GFP_KERNEL); + if (!egc) return -ENOMEM; - epg->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(epg->base)) - return PTR_ERR(epg->base); + data = devm_platform_ioremap_resource_byname(pdev, "data"); + if (IS_ERR(data)) + return PTR_ERR(data); - for (i = 0; i < ARRAY_SIZE(ep93xx_gpio_banks); i++) { - struct ep93xx_gpio_chip *gc = &epg->gc[i]; - struct ep93xx_gpio_bank *bank = &ep93xx_gpio_banks[i]; + dir = devm_platform_ioremap_resource_byname(pdev, "dir"); + if (IS_ERR(dir)) + return PTR_ERR(dir); - if (ep93xx_gpio_add_bank(gc, pdev, epg, bank)) - dev_warn(&pdev->dev, "Unable to add gpio bank %s\n", - bank->label); + gc = &egc->gc; + ret = bgpio_init(gc, &pdev->dev, 1, data, NULL, NULL, dir, NULL, 0); + if (ret) + return dev_err_probe(&pdev->dev, ret, "unable to init generic GPIO\n"); + + gc->label = dev_name(&pdev->dev); + if (platform_irq_count(pdev) > 0) { + dev_dbg(&pdev->dev, "setting up irqs for %s\n", dev_name(&pdev->dev)); + ret = ep93xx_setup_irqs(pdev, egc); + if (ret) + dev_err_probe(&pdev->dev, ret, "setup irqs failed"); } - return 0; + return devm_gpiochip_add_data(&pdev->dev, gc, egc); } static struct platform_driver ep93xx_gpio_driver = { From ede5bbe488d162bcd572880e58f9044c9df84050 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:27 +0300 Subject: [PATCH 213/655] ARM: ep93xx: add regmap aux_dev The following driver's should be instantiated by ep93xx syscon driver: - reboot - pinctrl - clock They all require access to DEVCFG register with a shared lock held, to avoid conflict writing to swlocked parts of DEVCFG. Provide common resources such as base, regmap and spinlock via auxiliary bus framework. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Linus Walleij Reviewed-by: Stephen Boyd Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- include/linux/soc/cirrus/ep93xx.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h index 56fbe2dc59b1..a27447971302 100644 --- a/include/linux/soc/cirrus/ep93xx.h +++ b/include/linux/soc/cirrus/ep93xx.h @@ -3,6 +3,18 @@ #define _SOC_EP93XX_H struct platform_device; +struct regmap; +struct spinlock_t; + +enum ep93xx_soc_model { + EP93XX_9301_SOC, + EP93XX_9307_SOC, + EP93XX_9312_SOC, +}; + +#include +#include +#include #define EP93XX_CHIP_REV_D0 3 #define EP93XX_CHIP_REV_D1 4 @@ -10,6 +22,20 @@ struct platform_device; #define EP93XX_CHIP_REV_E1 6 #define EP93XX_CHIP_REV_E2 7 +struct ep93xx_regmap_adev { + struct auxiliary_device adev; + struct regmap *map; + void __iomem *base; + spinlock_t *lock; + void (*write)(struct regmap *map, spinlock_t *lock, unsigned int reg, + unsigned int val); + void (*update_bits)(struct regmap *map, spinlock_t *lock, + unsigned int reg, unsigned int mask, unsigned int val); +}; + +#define to_ep93xx_regmap_adev(_adev) \ + container_of((_adev), struct ep93xx_regmap_adev, adev) + #ifdef CONFIG_ARCH_EP93XX int ep93xx_pwm_acquire_gpio(struct platform_device *pdev); void ep93xx_pwm_release_gpio(struct platform_device *pdev); From 8a6b7e2b3acfc1bd6f653a4d12c04aa1df736b84 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:28 +0300 Subject: [PATCH 214/655] clk: ep93xx: add DT support for Cirrus EP93xx Rewrite EP93xx clock driver located in arch/arm/mach-ep93xx/clock.c trying to do everything the device tree way: - provide clock acces via of - drop clk_hw_register_clkdev - drop init code and use module_auxiliary_driver Co-developed-by: Alexander Sverdlin Signed-off-by: Alexander Sverdlin Signed-off-by: Nikita Shubin Signed-off-by: Arnd Bergmann --- drivers/clk/Kconfig | 8 + drivers/clk/Makefile | 1 + drivers/clk/clk-ep93xx.c | 846 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 855 insertions(+) create mode 100644 drivers/clk/clk-ep93xx.c diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 983ef4f36d8c..abc438536f93 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -218,6 +218,14 @@ config COMMON_CLK_EN7523 This driver provides the fixed clocks and gates present on Airoha ARM silicon. +config COMMON_CLK_EP93XX + tristate "Clock driver for Cirrus Logic ep93xx SoC" + depends on ARCH_EP93XX || COMPILE_TEST + select AUXILIARY_BUS + select REGMAP_MMIO + help + This driver supports the SoC clocks on the Cirrus Logic ep93xx. + config COMMON_CLK_FSL_FLEXSPI tristate "Clock driver for FlexSPI on Layerscape SoCs" depends on ARCH_LAYERSCAPE || COMPILE_TEST diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index f793a16cad40..52f57a569b4e 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_COMMON_CLK_CDCE706) += clk-cdce706.o obj-$(CONFIG_COMMON_CLK_CDCE925) += clk-cdce925.o obj-$(CONFIG_ARCH_CLPS711X) += clk-clps711x.o obj-$(CONFIG_COMMON_CLK_CS2000_CP) += clk-cs2000-cp.o +obj-$(CONFIG_COMMON_CLK_EP93XX) += clk-ep93xx.o obj-$(CONFIG_ARCH_SPARX5) += clk-sparx5.o obj-$(CONFIG_COMMON_CLK_EN7523) += clk-en7523.o obj-$(CONFIG_COMMON_CLK_FIXED_MMIO) += clk-fixed-mmio.o diff --git a/drivers/clk/clk-ep93xx.c b/drivers/clk/clk-ep93xx.c new file mode 100644 index 000000000000..4727c06a59ba --- /dev/null +++ b/drivers/clk/clk-ep93xx.c @@ -0,0 +1,846 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Clock control for Cirrus EP93xx chips. + * Copyright (C) 2021 Nikita Shubin + * + * Based on a rewrite of arch/arm/mach-ep93xx/clock.c: + * Copyright (C) 2006 Lennert Buytenhek + */ +#define pr_fmt(fmt) "ep93xx " KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#define EP93XX_EXT_CLK_RATE 14745600 +#define EP93XX_EXT_RTC_RATE 32768 + +#define EP93XX_SYSCON_POWER_STATE 0x00 +#define EP93XX_SYSCON_PWRCNT 0x04 +#define EP93XX_SYSCON_PWRCNT_UARTBAUD BIT(29) +#define EP93XX_SYSCON_PWRCNT_USH_EN 28 +#define EP93XX_SYSCON_PWRCNT_DMA_M2M1 27 +#define EP93XX_SYSCON_PWRCNT_DMA_M2M0 26 +#define EP93XX_SYSCON_PWRCNT_DMA_M2P8 25 +#define EP93XX_SYSCON_PWRCNT_DMA_M2P9 24 +#define EP93XX_SYSCON_PWRCNT_DMA_M2P6 23 +#define EP93XX_SYSCON_PWRCNT_DMA_M2P7 22 +#define EP93XX_SYSCON_PWRCNT_DMA_M2P4 21 +#define EP93XX_SYSCON_PWRCNT_DMA_M2P5 20 +#define EP93XX_SYSCON_PWRCNT_DMA_M2P2 19 +#define EP93XX_SYSCON_PWRCNT_DMA_M2P3 18 +#define EP93XX_SYSCON_PWRCNT_DMA_M2P0 17 +#define EP93XX_SYSCON_PWRCNT_DMA_M2P1 16 +#define EP93XX_SYSCON_CLKSET1 0x20 +#define EP93XX_SYSCON_CLKSET1_NBYP1 BIT(23) +#define EP93XX_SYSCON_CLKSET2 0x24 +#define EP93XX_SYSCON_CLKSET2_NBYP2 BIT(19) +#define EP93XX_SYSCON_CLKSET2_PLL2_EN BIT(18) +#define EP93XX_SYSCON_DEVCFG 0x80 +#define EP93XX_SYSCON_DEVCFG_U3EN 24 +#define EP93XX_SYSCON_DEVCFG_U2EN 20 +#define EP93XX_SYSCON_DEVCFG_U1EN 18 +#define EP93XX_SYSCON_VIDCLKDIV 0x84 +#define EP93XX_SYSCON_CLKDIV_ENABLE 15 +#define EP93XX_SYSCON_CLKDIV_ESEL BIT(14) +#define EP93XX_SYSCON_CLKDIV_PSEL BIT(13) +#define EP93XX_SYSCON_CLKDIV_MASK GENMASK(14, 13) +#define EP93XX_SYSCON_CLKDIV_PDIV_SHIFT 8 +#define EP93XX_SYSCON_I2SCLKDIV 0x8c +#define EP93XX_SYSCON_I2SCLKDIV_SENA 31 +#define EP93XX_SYSCON_I2SCLKDIV_ORIDE BIT(29) +#define EP93XX_SYSCON_I2SCLKDIV_SPOL BIT(19) +#define EP93XX_SYSCON_KEYTCHCLKDIV 0x90 +#define EP93XX_SYSCON_KEYTCHCLKDIV_TSEN 31 +#define EP93XX_SYSCON_KEYTCHCLKDIV_ADIV 16 +#define EP93XX_SYSCON_KEYTCHCLKDIV_KEN 15 +#define EP93XX_SYSCON_KEYTCHCLKDIV_KDIV 0 +#define EP93XX_SYSCON_CHIPID 0x94 +#define EP93XX_SYSCON_CHIPID_ID 0x9213 + +#define EP93XX_FIXED_CLK_COUNT 21 + +static const char ep93xx_adc_divisors[] = { 16, 4 }; +static const char ep93xx_sclk_divisors[] = { 2, 4 }; +static const char ep93xx_lrclk_divisors[] = { 32, 64, 128 }; + +struct ep93xx_clk { + struct clk_hw hw; + u16 idx; + u16 reg; + u32 mask; + u8 bit_idx; + u8 shift; + u8 width; + u8 num_div; + const char *div; +}; + +struct ep93xx_clk_priv { + spinlock_t lock; + struct ep93xx_regmap_adev *aux_dev; + struct device *dev; + void __iomem *base; + struct regmap *map; + struct clk_hw *fixed[EP93XX_FIXED_CLK_COUNT]; + struct ep93xx_clk reg[]; +}; + +static struct ep93xx_clk *ep93xx_clk_from(struct clk_hw *hw) +{ + return container_of(hw, struct ep93xx_clk, hw); +} + +static struct ep93xx_clk_priv *ep93xx_priv_from(struct ep93xx_clk *clk) +{ + return container_of(clk, struct ep93xx_clk_priv, reg[clk->idx]); +} + +static void ep93xx_clk_write(struct ep93xx_clk_priv *priv, unsigned int reg, unsigned int val) +{ + struct ep93xx_regmap_adev *aux = priv->aux_dev; + + aux->write(aux->map, aux->lock, reg, val); +} + +static int ep93xx_clk_is_enabled(struct clk_hw *hw) +{ + struct ep93xx_clk *clk = ep93xx_clk_from(hw); + struct ep93xx_clk_priv *priv = ep93xx_priv_from(clk); + u32 val; + + regmap_read(priv->map, clk->reg, &val); + + return !!(val & BIT(clk->bit_idx)); +} + +static int ep93xx_clk_enable(struct clk_hw *hw) +{ + struct ep93xx_clk *clk = ep93xx_clk_from(hw); + struct ep93xx_clk_priv *priv = ep93xx_priv_from(clk); + u32 val; + + guard(spinlock_irqsave)(&priv->lock); + + regmap_read(priv->map, clk->reg, &val); + val |= BIT(clk->bit_idx); + + ep93xx_clk_write(priv, clk->reg, val); + + return 0; +} + +static void ep93xx_clk_disable(struct clk_hw *hw) +{ + struct ep93xx_clk *clk = ep93xx_clk_from(hw); + struct ep93xx_clk_priv *priv = ep93xx_priv_from(clk); + u32 val; + + guard(spinlock_irqsave)(&priv->lock); + + regmap_read(priv->map, clk->reg, &val); + val &= ~BIT(clk->bit_idx); + + ep93xx_clk_write(priv, clk->reg, val); +} + +static const struct clk_ops clk_ep93xx_gate_ops = { + .enable = ep93xx_clk_enable, + .disable = ep93xx_clk_disable, + .is_enabled = ep93xx_clk_is_enabled, +}; + +static int ep93xx_clk_register_gate(struct ep93xx_clk *clk, + const char *name, + struct clk_parent_data *parent_data, + unsigned long flags, + unsigned int reg, + u8 bit_idx) +{ + struct ep93xx_clk_priv *priv = ep93xx_priv_from(clk); + struct clk_init_data init = { }; + + init.name = name; + init.ops = &clk_ep93xx_gate_ops; + init.flags = flags; + init.parent_data = parent_data; + init.num_parents = 1; + + clk->reg = reg; + clk->bit_idx = bit_idx; + clk->hw.init = &init; + + return devm_clk_hw_register(priv->dev, &clk->hw); +} + +static u8 ep93xx_mux_get_parent(struct clk_hw *hw) +{ + struct ep93xx_clk *clk = ep93xx_clk_from(hw); + struct ep93xx_clk_priv *priv = ep93xx_priv_from(clk); + u32 val; + + regmap_read(priv->map, clk->reg, &val); + + val &= EP93XX_SYSCON_CLKDIV_MASK; + + switch (val) { + case EP93XX_SYSCON_CLKDIV_ESEL: + return 1; /* PLL1 */ + case EP93XX_SYSCON_CLKDIV_MASK: + return 2; /* PLL2 */ + default: + return 0; /* XTALI */ + }; +} + +static int ep93xx_mux_set_parent_lock(struct clk_hw *hw, u8 index) +{ + struct ep93xx_clk *clk = ep93xx_clk_from(hw); + struct ep93xx_clk_priv *priv = ep93xx_priv_from(clk); + u32 val; + + if (index >= 3) + return -EINVAL; + + guard(spinlock_irqsave)(&priv->lock); + + regmap_read(priv->map, clk->reg, &val); + val &= ~(EP93XX_SYSCON_CLKDIV_MASK); + val |= index > 0 ? EP93XX_SYSCON_CLKDIV_ESEL : 0; + val |= index > 1 ? EP93XX_SYSCON_CLKDIV_PSEL : 0; + + ep93xx_clk_write(priv, clk->reg, val); + + return 0; +} + +static bool is_best(unsigned long rate, unsigned long now, + unsigned long best) +{ + return abs_diff(rate, now) < abs_diff(rate, best); +} + +static int ep93xx_mux_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + unsigned long best_rate = 0, actual_rate, mclk_rate; + unsigned long rate = req->rate; + struct clk_hw *parent_best = NULL; + unsigned long parent_rate_best; + unsigned long parent_rate; + int div, pdiv; + unsigned int i; + + /* + * Try the two pll's and the external clock, + * because the valid predividers are 2, 2.5 and 3, we multiply + * all the clocks by 2 to avoid floating point math. + * + * This is based on the algorithm in the ep93xx raster guide: + * http://be-a-maverick.com/en/pubs/appNote/AN269REV1.pdf + * + */ + for (i = 0; i < clk_hw_get_num_parents(hw); i++) { + struct clk_hw *parent = clk_hw_get_parent_by_index(hw, i); + + parent_rate = clk_hw_get_rate(parent); + mclk_rate = parent_rate * 2; + + /* Try each predivider value */ + for (pdiv = 4; pdiv <= 6; pdiv++) { + div = DIV_ROUND_CLOSEST(mclk_rate, rate * pdiv); + if (!in_range(div, 1, 127)) + continue; + + actual_rate = DIV_ROUND_CLOSEST(mclk_rate, pdiv * div); + if (is_best(rate, actual_rate, best_rate)) { + best_rate = actual_rate; + parent_rate_best = parent_rate; + parent_best = parent; + } + } + } + + if (!parent_best) + return -EINVAL; + + req->best_parent_rate = parent_rate_best; + req->best_parent_hw = parent_best; + req->rate = best_rate; + + return 0; +} + +static unsigned long ep93xx_ddiv_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct ep93xx_clk *clk = ep93xx_clk_from(hw); + struct ep93xx_clk_priv *priv = ep93xx_priv_from(clk); + unsigned int pdiv, div; + u32 val; + + regmap_read(priv->map, clk->reg, &val); + pdiv = (val >> EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) & GENMASK(1, 0); + div = val & GENMASK(6, 0); + if (!div) + return 0; + + return DIV_ROUND_CLOSEST(parent_rate * 2, (pdiv + 3) * div); +} + +static int ep93xx_ddiv_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct ep93xx_clk *clk = ep93xx_clk_from(hw); + struct ep93xx_clk_priv *priv = ep93xx_priv_from(clk); + int pdiv, div, npdiv, ndiv; + unsigned long actual_rate, mclk_rate, rate_err = ULONG_MAX; + u32 val; + + regmap_read(priv->map, clk->reg, &val); + mclk_rate = parent_rate * 2; + + for (pdiv = 4; pdiv <= 6; pdiv++) { + div = DIV_ROUND_CLOSEST(mclk_rate, rate * pdiv); + if (!in_range(div, 1, 127)) + continue; + + actual_rate = DIV_ROUND_CLOSEST(mclk_rate, pdiv * div); + if (abs(actual_rate - rate) < rate_err) { + npdiv = pdiv - 3; + ndiv = div; + rate_err = abs(actual_rate - rate); + } + } + + if (rate_err == ULONG_MAX) + return -EINVAL; + + /* + * Clear old dividers. + * Bit 7 is reserved bit in all ClkDiv registers. + */ + val &= ~(GENMASK(9, 0) & ~BIT(7)); + + /* Set the new pdiv and div bits for the new clock rate */ + val |= (npdiv << EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) | ndiv; + + ep93xx_clk_write(priv, clk->reg, val); + + return 0; +} + +static const struct clk_ops clk_ddiv_ops = { + .enable = ep93xx_clk_enable, + .disable = ep93xx_clk_disable, + .is_enabled = ep93xx_clk_is_enabled, + .get_parent = ep93xx_mux_get_parent, + .set_parent = ep93xx_mux_set_parent_lock, + .determine_rate = ep93xx_mux_determine_rate, + .recalc_rate = ep93xx_ddiv_recalc_rate, + .set_rate = ep93xx_ddiv_set_rate, +}; + +static int ep93xx_clk_register_ddiv(struct ep93xx_clk *clk, + const char *name, + struct clk_parent_data *parent_data, + u8 num_parents, + unsigned int reg, + u8 bit_idx) +{ + struct ep93xx_clk_priv *priv = ep93xx_priv_from(clk); + struct clk_init_data init = { }; + + init.name = name; + init.ops = &clk_ddiv_ops; + init.flags = 0; + init.parent_data = parent_data; + init.num_parents = num_parents; + + clk->reg = reg; + clk->bit_idx = bit_idx; + clk->hw.init = &init; + + return devm_clk_hw_register(priv->dev, &clk->hw); +} + +static unsigned long ep93xx_div_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct ep93xx_clk *clk = ep93xx_clk_from(hw); + struct ep93xx_clk_priv *priv = ep93xx_priv_from(clk); + u32 val; + u8 index; + + regmap_read(priv->map, clk->reg, &val); + index = (val & clk->mask) >> clk->shift; + if (index > clk->num_div) + return 0; + + return DIV_ROUND_CLOSEST(parent_rate, clk->div[index]); +} + +static long ep93xx_div_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + struct ep93xx_clk *clk = ep93xx_clk_from(hw); + unsigned long best = 0, now; + unsigned int i; + + for (i = 0; i < clk->num_div; i++) { + if ((rate * clk->div[i]) == *parent_rate) + return rate; + + now = DIV_ROUND_CLOSEST(*parent_rate, clk->div[i]); + if (!best || is_best(rate, now, best)) + best = now; + } + + return best; +} + +static int ep93xx_div_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct ep93xx_clk *clk = ep93xx_clk_from(hw); + struct ep93xx_clk_priv *priv = ep93xx_priv_from(clk); + unsigned int i; + u32 val; + + regmap_read(priv->map, clk->reg, &val); + val &= ~clk->mask; + for (i = 0; i < clk->num_div; i++) + if (rate == DIV_ROUND_CLOSEST(parent_rate, clk->div[i])) + break; + + if (i == clk->num_div) + return -EINVAL; + + val |= i << clk->shift; + + ep93xx_clk_write(priv, clk->reg, val); + + return 0; +} + +static const struct clk_ops ep93xx_div_ops = { + .enable = ep93xx_clk_enable, + .disable = ep93xx_clk_disable, + .is_enabled = ep93xx_clk_is_enabled, + .recalc_rate = ep93xx_div_recalc_rate, + .round_rate = ep93xx_div_round_rate, + .set_rate = ep93xx_div_set_rate, +}; + +static int ep93xx_register_div(struct ep93xx_clk *clk, + const char *name, + const struct clk_parent_data *parent_data, + unsigned int reg, + u8 enable_bit, + u8 shift, + u8 width, + const char *clk_divisors, + u8 num_div) +{ + struct ep93xx_clk_priv *priv = ep93xx_priv_from(clk); + struct clk_init_data init = { }; + + init.name = name; + init.ops = &ep93xx_div_ops; + init.flags = 0; + init.parent_data = parent_data; + init.num_parents = 1; + + clk->reg = reg; + clk->bit_idx = enable_bit; + clk->mask = GENMASK(shift + width - 1, shift); + clk->shift = shift; + clk->div = clk_divisors; + clk->num_div = num_div; + clk->hw.init = &init; + + return devm_clk_hw_register(priv->dev, &clk->hw); +} + +struct ep93xx_gate { + unsigned int idx; + unsigned int bit; + const char *name; +}; + +static const struct ep93xx_gate ep93xx_uarts[] = { + { EP93XX_CLK_UART1, EP93XX_SYSCON_DEVCFG_U1EN, "uart1" }, + { EP93XX_CLK_UART2, EP93XX_SYSCON_DEVCFG_U2EN, "uart2" }, + { EP93XX_CLK_UART3, EP93XX_SYSCON_DEVCFG_U3EN, "uart3" }, +}; + +static int ep93xx_uart_clock_init(struct ep93xx_clk_priv *priv) +{ + struct clk_parent_data parent_data = { }; + unsigned int i, idx, ret, clk_uart_div; + struct ep93xx_clk *clk; + u32 val; + + regmap_read(priv->map, EP93XX_SYSCON_PWRCNT, &val); + if (val & EP93XX_SYSCON_PWRCNT_UARTBAUD) + clk_uart_div = 1; + else + clk_uart_div = 2; + + priv->fixed[EP93XX_CLK_UART] = + devm_clk_hw_register_fixed_factor_index(priv->dev, "uart", + 0, /* XTALI external clock */ + 0, 1, clk_uart_div); + parent_data.hw = priv->fixed[EP93XX_CLK_UART]; + + /* parenting uart gate clocks to uart clock */ + for (i = 0; i < ARRAY_SIZE(ep93xx_uarts); i++) { + idx = ep93xx_uarts[i].idx - EP93XX_CLK_UART1; + clk = &priv->reg[idx]; + clk->idx = idx; + ret = ep93xx_clk_register_gate(clk, + ep93xx_uarts[i].name, + &parent_data, CLK_SET_RATE_PARENT, + EP93XX_SYSCON_DEVCFG, + ep93xx_uarts[i].bit); + if (ret) + return dev_err_probe(priv->dev, ret, + "failed to register uart[%d] clock\n", i); + } + + return 0; +} + +static const struct ep93xx_gate ep93xx_dmas[] = { + { EP93XX_CLK_M2M0, EP93XX_SYSCON_PWRCNT_DMA_M2M0, "m2m0" }, + { EP93XX_CLK_M2M1, EP93XX_SYSCON_PWRCNT_DMA_M2M1, "m2m1" }, + { EP93XX_CLK_M2P0, EP93XX_SYSCON_PWRCNT_DMA_M2P0, "m2p0" }, + { EP93XX_CLK_M2P1, EP93XX_SYSCON_PWRCNT_DMA_M2P1, "m2p1" }, + { EP93XX_CLK_M2P2, EP93XX_SYSCON_PWRCNT_DMA_M2P2, "m2p2" }, + { EP93XX_CLK_M2P3, EP93XX_SYSCON_PWRCNT_DMA_M2P3, "m2p3" }, + { EP93XX_CLK_M2P4, EP93XX_SYSCON_PWRCNT_DMA_M2P4, "m2p4" }, + { EP93XX_CLK_M2P5, EP93XX_SYSCON_PWRCNT_DMA_M2P5, "m2p5" }, + { EP93XX_CLK_M2P6, EP93XX_SYSCON_PWRCNT_DMA_M2P6, "m2p6" }, + { EP93XX_CLK_M2P7, EP93XX_SYSCON_PWRCNT_DMA_M2P7, "m2p7" }, + { EP93XX_CLK_M2P8, EP93XX_SYSCON_PWRCNT_DMA_M2P8, "m2p8" }, + { EP93XX_CLK_M2P9, EP93XX_SYSCON_PWRCNT_DMA_M2P9, "m2p9" }, +}; + +static int ep93xx_dma_clock_init(struct ep93xx_clk_priv *priv) +{ + struct clk_parent_data parent_data = { }; + unsigned int i, idx; + + parent_data.hw = priv->fixed[EP93XX_CLK_HCLK]; + for (i = 0; i < ARRAY_SIZE(ep93xx_dmas); i++) { + idx = ep93xx_dmas[i].idx; + priv->fixed[idx] = devm_clk_hw_register_gate_parent_data(priv->dev, + ep93xx_dmas[i].name, + &parent_data, 0, + priv->base + EP93XX_SYSCON_PWRCNT, + ep93xx_dmas[i].bit, + 0, + &priv->lock); + if (IS_ERR(priv->fixed[idx])) + return PTR_ERR(priv->fixed[idx]); + } + + return 0; +} + +static struct clk_hw *of_clk_ep93xx_get(struct of_phandle_args *clkspec, void *data) +{ + struct ep93xx_clk_priv *priv = data; + unsigned int idx = clkspec->args[0]; + + if (idx < EP93XX_CLK_UART1) + return priv->fixed[idx]; + + if (idx <= EP93XX_CLK_I2S_LRCLK) + return &priv->reg[idx - EP93XX_CLK_UART1].hw; + + return ERR_PTR(-EINVAL); +} + +/* + * PLL rate = 14.7456 MHz * (X1FBD + 1) * (X2FBD + 1) / (X2IPD + 1) / 2^PS + */ +static unsigned long calc_pll_rate(u64 rate, u32 config_word) +{ + rate *= ((config_word >> 11) & GENMASK(4, 0)) + 1; /* X1FBD */ + rate *= ((config_word >> 5) & GENMASK(5, 0)) + 1; /* X2FBD */ + do_div(rate, (config_word & GENMASK(4, 0)) + 1); /* X2IPD */ + rate >>= (config_word >> 16) & GENMASK(1, 0); /* PS */ + + return rate; +} + +static int ep93xx_plls_init(struct ep93xx_clk_priv *priv) +{ + const char fclk_divisors[] = { 1, 2, 4, 8, 16, 1, 1, 1 }; + const char hclk_divisors[] = { 1, 2, 4, 5, 6, 8, 16, 32 }; + const char pclk_divisors[] = { 1, 2, 4, 8 }; + struct clk_parent_data xtali = { .index = 0 }; + unsigned int clk_f_div, clk_h_div, clk_p_div; + unsigned long clk_pll1_rate, clk_pll2_rate; + struct device *dev = priv->dev; + struct clk_hw *hw, *pll1; + u32 value; + + /* Determine the bootloader configured pll1 rate */ + regmap_read(priv->map, EP93XX_SYSCON_CLKSET1, &value); + + if (value & EP93XX_SYSCON_CLKSET1_NBYP1) + clk_pll1_rate = calc_pll_rate(EP93XX_EXT_CLK_RATE, value); + else + clk_pll1_rate = EP93XX_EXT_CLK_RATE; + + pll1 = devm_clk_hw_register_fixed_rate_parent_data(dev, "pll1", &xtali, + 0, clk_pll1_rate); + if (IS_ERR(pll1)) + return PTR_ERR(pll1); + + priv->fixed[EP93XX_CLK_PLL1] = pll1; + + /* Initialize the pll1 derived clocks */ + clk_f_div = fclk_divisors[(value >> 25) & GENMASK(2, 0)]; + clk_h_div = hclk_divisors[(value >> 20) & GENMASK(2, 0)]; + clk_p_div = pclk_divisors[(value >> 18) & GENMASK(1, 0)]; + + hw = devm_clk_hw_register_fixed_factor_parent_hw(dev, "fclk", pll1, 0, 1, clk_f_div); + if (IS_ERR(hw)) + return PTR_ERR(hw); + + priv->fixed[EP93XX_CLK_FCLK] = hw; + + hw = devm_clk_hw_register_fixed_factor_parent_hw(dev, "hclk", pll1, 0, 1, clk_h_div); + if (IS_ERR(hw)) + return PTR_ERR(hw); + + priv->fixed[EP93XX_CLK_HCLK] = hw; + + hw = devm_clk_hw_register_fixed_factor_parent_hw(dev, "pclk", hw, 0, 1, clk_p_div); + if (IS_ERR(hw)) + return PTR_ERR(hw); + + priv->fixed[EP93XX_CLK_PCLK] = hw; + + /* Determine the bootloader configured pll2 rate */ + regmap_read(priv->map, EP93XX_SYSCON_CLKSET2, &value); + if (!(value & EP93XX_SYSCON_CLKSET2_NBYP2)) + clk_pll2_rate = EP93XX_EXT_CLK_RATE; + else if (value & EP93XX_SYSCON_CLKSET2_PLL2_EN) + clk_pll2_rate = calc_pll_rate(EP93XX_EXT_CLK_RATE, value); + else + clk_pll2_rate = 0; + + hw = devm_clk_hw_register_fixed_rate_parent_data(dev, "pll2", &xtali, + 0, clk_pll2_rate); + if (IS_ERR(hw)) + return PTR_ERR(hw); + + priv->fixed[EP93XX_CLK_PLL2] = hw; + + return 0; +} + +static int ep93xx_clk_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct ep93xx_regmap_adev *rdev = to_ep93xx_regmap_adev(adev); + struct clk_parent_data xtali = { .index = 0 }; + struct clk_parent_data ddiv_pdata[3] = { }; + unsigned int clk_spi_div, clk_usb_div; + struct clk_parent_data pdata = {}; + struct device *dev = &adev->dev; + struct ep93xx_clk_priv *priv; + struct ep93xx_clk *clk; + struct clk_hw *hw; + unsigned int idx; + int ret; + u32 value; + + priv = devm_kzalloc(dev, struct_size(priv, reg, 10), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + spin_lock_init(&priv->lock); + priv->dev = dev; + priv->aux_dev = rdev; + priv->map = rdev->map; + priv->base = rdev->base; + + ret = ep93xx_plls_init(priv); + if (ret) + return ret; + + regmap_read(priv->map, EP93XX_SYSCON_CLKSET2, &value); + clk_usb_div = (value >> 28 & GENMASK(3, 0)) + 1; + hw = devm_clk_hw_register_fixed_factor_parent_hw(dev, "usb_clk", + priv->fixed[EP93XX_CLK_PLL2], 0, 1, + clk_usb_div); + if (IS_ERR(hw)) + return PTR_ERR(hw); + + priv->fixed[EP93XX_CLK_USB] = hw; + + ret = ep93xx_uart_clock_init(priv); + if (ret) + return ret; + + ret = ep93xx_dma_clock_init(priv); + if (ret) + return ret; + + clk_spi_div = id->driver_data; + hw = devm_clk_hw_register_fixed_factor_index(dev, "ep93xx-spi.0", + 0, /* XTALI external clock */ + 0, 1, clk_spi_div); + if (IS_ERR(hw)) + return PTR_ERR(hw); + + priv->fixed[EP93XX_CLK_SPI] = hw; + + /* PWM clock */ + hw = devm_clk_hw_register_fixed_factor_index(dev, "pwm_clk", 0, /* XTALI external clock */ + 0, 1, 1); + if (IS_ERR(hw)) + return PTR_ERR(hw); + + priv->fixed[EP93XX_CLK_PWM] = hw; + + /* USB clock */ + pdata.hw = priv->fixed[EP93XX_CLK_USB]; + hw = devm_clk_hw_register_gate_parent_data(priv->dev, "ohci-platform", &pdata, + 0, priv->base + EP93XX_SYSCON_PWRCNT, + EP93XX_SYSCON_PWRCNT_USH_EN, 0, + &priv->lock); + if (IS_ERR(hw)) + return PTR_ERR(hw); + + priv->fixed[EP93XX_CLK_USB] = hw; + + ddiv_pdata[0].index = 0; /* XTALI external clock */ + ddiv_pdata[1].hw = priv->fixed[EP93XX_CLK_PLL1]; + ddiv_pdata[2].hw = priv->fixed[EP93XX_CLK_PLL2]; + + /* touchscreen/ADC clock */ + idx = EP93XX_CLK_ADC - EP93XX_CLK_UART1; + clk = &priv->reg[idx]; + clk->idx = idx; + ret = ep93xx_register_div(clk, "ep93xx-adc", &xtali, + EP93XX_SYSCON_KEYTCHCLKDIV, + EP93XX_SYSCON_KEYTCHCLKDIV_TSEN, + EP93XX_SYSCON_KEYTCHCLKDIV_ADIV, + 1, + ep93xx_adc_divisors, + ARRAY_SIZE(ep93xx_adc_divisors)); + + + /* keypad clock */ + idx = EP93XX_CLK_KEYPAD - EP93XX_CLK_UART1; + clk = &priv->reg[idx]; + clk->idx = idx; + ret = ep93xx_register_div(clk, "ep93xx-keypad", &xtali, + EP93XX_SYSCON_KEYTCHCLKDIV, + EP93XX_SYSCON_KEYTCHCLKDIV_KEN, + EP93XX_SYSCON_KEYTCHCLKDIV_KDIV, + 1, + ep93xx_adc_divisors, + ARRAY_SIZE(ep93xx_adc_divisors)); + + /* + * On reset PDIV and VDIV is set to zero, while PDIV zero + * means clock disable, VDIV shouldn't be zero. + * So we set both video and i2s dividers to minimum. + * ENA - Enable CLK divider. + * PDIV - 00 - Disable clock + * VDIV - at least 2 + */ + + /* Check and enable video clk registers */ + regmap_read(priv->map, EP93XX_SYSCON_VIDCLKDIV, &value); + value |= BIT(EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) | 2; + ep93xx_clk_write(priv, EP93XX_SYSCON_VIDCLKDIV, value); + + /* Check and enable i2s clk registers */ + regmap_read(priv->map, EP93XX_SYSCON_I2SCLKDIV, &value); + value |= BIT(EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) | 2; + + /* + * Override the SAI_MSTR_CLK_CFG from the I2S block and use the + * I2SClkDiv Register settings. LRCLK transitions on the falling SCLK + * edge. + */ + value |= EP93XX_SYSCON_I2SCLKDIV_ORIDE | EP93XX_SYSCON_I2SCLKDIV_SPOL; + ep93xx_clk_write(priv, EP93XX_SYSCON_I2SCLKDIV, value); + + /* video clk */ + idx = EP93XX_CLK_VIDEO - EP93XX_CLK_UART1; + clk = &priv->reg[idx]; + clk->idx = idx; + ret = ep93xx_clk_register_ddiv(clk, "ep93xx-fb", + ddiv_pdata, ARRAY_SIZE(ddiv_pdata), + EP93XX_SYSCON_VIDCLKDIV, + EP93XX_SYSCON_CLKDIV_ENABLE); + + /* i2s clk */ + idx = EP93XX_CLK_I2S_MCLK - EP93XX_CLK_UART1; + clk = &priv->reg[idx]; + clk->idx = idx; + ret = ep93xx_clk_register_ddiv(clk, "mclk", + ddiv_pdata, ARRAY_SIZE(ddiv_pdata), + EP93XX_SYSCON_I2SCLKDIV, + EP93XX_SYSCON_CLKDIV_ENABLE); + + /* i2s sclk */ + idx = EP93XX_CLK_I2S_SCLK - EP93XX_CLK_UART1; + clk = &priv->reg[idx]; + clk->idx = idx; + pdata.hw = &priv->reg[EP93XX_CLK_I2S_MCLK - EP93XX_CLK_UART1].hw; + ret = ep93xx_register_div(clk, "sclk", &pdata, + EP93XX_SYSCON_I2SCLKDIV, + EP93XX_SYSCON_I2SCLKDIV_SENA, + 16, /* EP93XX_I2SCLKDIV_SDIV_SHIFT */ + 1, /* EP93XX_I2SCLKDIV_SDIV_WIDTH */ + ep93xx_sclk_divisors, + ARRAY_SIZE(ep93xx_sclk_divisors)); + + /* i2s lrclk */ + idx = EP93XX_CLK_I2S_LRCLK - EP93XX_CLK_UART1; + clk = &priv->reg[idx]; + clk->idx = idx; + pdata.hw = &priv->reg[EP93XX_CLK_I2S_SCLK - EP93XX_CLK_UART1].hw; + ret = ep93xx_register_div(clk, "lrclk", &pdata, + EP93XX_SYSCON_I2SCLKDIV, + EP93XX_SYSCON_I2SCLKDIV_SENA, + 17, /* EP93XX_I2SCLKDIV_LRDIV32_SHIFT */ + 2, /* EP93XX_I2SCLKDIV_LRDIV32_WIDTH */ + ep93xx_lrclk_divisors, + ARRAY_SIZE(ep93xx_lrclk_divisors)); + + /* IrDa clk uses same pattern but no init code presents in original clock driver */ + return devm_of_clk_add_hw_provider(priv->dev, of_clk_ep93xx_get, priv); +} + +static const struct auxiliary_device_id ep93xx_clk_ids[] = { + { .name = "soc_ep93xx.clk-ep93xx", .driver_data = 2, }, + { .name = "soc_ep93xx.clk-ep93xx.e2", .driver_data = 1, }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(auxiliary, ep93xx_clk_ids); + +static struct auxiliary_driver ep93xx_clk_driver = { + .probe = ep93xx_clk_probe, + .id_table = ep93xx_clk_ids, +}; +module_auxiliary_driver(ep93xx_clk_driver); From 035f90076fd1cafb17468a2dcef7aad189190980 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:29 +0300 Subject: [PATCH 215/655] pinctrl: add a Cirrus ep93xx SoC pin controller Add a pin control (only multiplexing) driver for ep93xx SoC so we can fully convert ep93xx to device tree. This driver is capable of muxing ep9301/ep9302/ep9307/ep9312/ep9315 variants, this is chosen based on "compatible" in device tree. Co-developed-by: Alexander Sverdlin Signed-off-by: Alexander Sverdlin Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Linus Walleij Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- drivers/pinctrl/Kconfig | 7 + drivers/pinctrl/Makefile | 1 + drivers/pinctrl/pinctrl-ep93xx.c | 1434 ++++++++++++++++++++++++++++++ 3 files changed, 1442 insertions(+) create mode 100644 drivers/pinctrl/pinctrl-ep93xx.c diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index 7e4f93a3bc7a..85f1c74330ce 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -194,6 +194,13 @@ config PINCTRL_DIGICOLOR select PINMUX select GENERIC_PINCONF +config PINCTRL_EP93XX + bool + depends on ARCH_EP93XX || COMPILE_TEST + select PINMUX + select GENERIC_PINCONF + select MFD_SYSCON + config PINCTRL_EQUILIBRIUM tristate "Generic pinctrl and GPIO driver for Intel Lightning Mountain SoC" depends on OF && HAS_IOMEM diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile index cc809669405a..f28602d95424 100644 --- a/drivers/pinctrl/Makefile +++ b/drivers/pinctrl/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_PINCTRL_DA850_PUPD) += pinctrl-da850-pupd.o obj-$(CONFIG_PINCTRL_DA9062) += pinctrl-da9062.o obj-$(CONFIG_PINCTRL_DIGICOLOR) += pinctrl-digicolor.o obj-$(CONFIG_PINCTRL_EQUILIBRIUM) += pinctrl-equilibrium.o +obj-$(CONFIG_PINCTRL_EP93XX) += pinctrl-ep93xx.o obj-$(CONFIG_PINCTRL_GEMINI) += pinctrl-gemini.o obj-$(CONFIG_PINCTRL_INGENIC) += pinctrl-ingenic.o obj-$(CONFIG_PINCTRL_K210) += pinctrl-k210.o diff --git a/drivers/pinctrl/pinctrl-ep93xx.c b/drivers/pinctrl/pinctrl-ep93xx.c new file mode 100644 index 000000000000..458cb0e99c65 --- /dev/null +++ b/drivers/pinctrl/pinctrl-ep93xx.c @@ -0,0 +1,1434 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Driver for the EP93xx pin controller + * based on linux/drivers/pinctrl/pinmux-gemini.c + * + * Copyright (C) 2022 Nikita Shubin + * + * This is a group-only pin controller. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include "pinctrl-utils.h" + +#define DRIVER_NAME "pinctrl-ep93xx" + +enum ep93xx_pinctrl_model { + EP93XX_9301_PINCTRL, + EP93XX_9307_PINCTRL, + EP93XX_9312_PINCTRL, +}; + +struct ep93xx_pmx { + struct device *dev; + struct pinctrl_dev *pctl; + struct ep93xx_regmap_adev *aux_dev; + struct regmap *map; + enum ep93xx_pinctrl_model model; +}; + +static void ep93xx_pinctrl_update_bits(struct ep93xx_pmx *pmx, unsigned int reg, + unsigned int mask, unsigned int val) +{ + struct ep93xx_regmap_adev *aux = pmx->aux_dev; + + aux->update_bits(aux->map, aux->lock, reg, mask, val); +} + +struct ep93xx_pin_group { + struct pingroup grp; + u32 mask; + u32 value; +}; + +#define PMX_GROUP(_name, _pins, _mask, _value) \ + { \ + .grp = PINCTRL_PINGROUP(_name, _pins, ARRAY_SIZE(_pins)), \ + .mask = _mask, \ + .value = _value, \ + } + +#define EP93XX_SYSCON_DEVCFG 0x80 + +/* + * There are several system configuration options selectable by the DeviceCfg and SysCfg + * registers. These registers provide the selection of several pin multiplexing options and also + * provide software access to the system reset configuration options. Please refer to the + * descriptions of the registers, “DeviceCfg” on page 5-25 and “SysCfg” on page 5-34, for a + * detailed explanation. + */ +#define EP93XX_SYSCON_DEVCFG_D1ONG BIT(30) +#define EP93XX_SYSCON_DEVCFG_D0ONG BIT(29) +#define EP93XX_SYSCON_DEVCFG_IONU2 BIT(28) +#define EP93XX_SYSCON_DEVCFG_GONK BIT(27) +#define EP93XX_SYSCON_DEVCFG_TONG BIT(26) +#define EP93XX_SYSCON_DEVCFG_MONG BIT(25) +#define EP93XX_SYSCON_DEVCFG_A2ONG BIT(22) +#define EP93XX_SYSCON_DEVCFG_A1ONG BIT(21) +#define EP93XX_SYSCON_DEVCFG_HONIDE BIT(11) +#define EP93XX_SYSCON_DEVCFG_GONIDE BIT(10) +#define EP93XX_SYSCON_DEVCFG_PONG BIT(9) +#define EP93XX_SYSCON_DEVCFG_EONIDE BIT(8) +#define EP93XX_SYSCON_DEVCFG_I2SONSSP BIT(7) +#define EP93XX_SYSCON_DEVCFG_I2SONAC97 BIT(6) +#define EP93XX_SYSCON_DEVCFG_RASONP3 BIT(4) + +#define PADS_MASK (GENMASK(30, 25) | BIT(22) | BIT(21) | GENMASK(11, 6) | BIT(4)) +#define PADS_MAXBIT 30 + +/* Ordered by bit index */ +static const char * const ep93xx_padgroups[] = { + NULL, NULL, NULL, NULL, + "RasOnP3", + NULL, + "I2SonAC97", + "I2SonSSP", + "EonIDE", + "PonG", + "GonIDE", + "HonIDE", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "A1onG", + "A2onG", + NULL, NULL, + "MonG", + "TonG", + "GonK", + "IonU2", + "D0onG", + "D1onG", +}; + +/* ep9301, ep9302 */ +static const struct pinctrl_pin_desc ep9301_pins[] = { + PINCTRL_PIN(1, "CSn[7]"), + PINCTRL_PIN(2, "CSn[6]"), + PINCTRL_PIN(3, "CSn[3]"), + PINCTRL_PIN(4, "CSn[2]"), + PINCTRL_PIN(5, "CSn[1]"), + PINCTRL_PIN(6, "AD[25]"), + PINCTRL_PIN(7, "vdd_ring"), + PINCTRL_PIN(8, "gnd_ring"), + PINCTRL_PIN(9, "AD[24]"), + PINCTRL_PIN(10, "SDCLK"), + PINCTRL_PIN(11, "AD[23]"), + PINCTRL_PIN(12, "vdd_core"), + PINCTRL_PIN(13, "gnd_core"), + PINCTRL_PIN(14, "SDWEn"), + PINCTRL_PIN(15, "SDCSn[3]"), + PINCTRL_PIN(16, "SDCSn[2]"), + PINCTRL_PIN(17, "SDCSn[1]"), + PINCTRL_PIN(18, "SDCSn[0]"), + PINCTRL_PIN(19, "vdd_ring"), + PINCTRL_PIN(20, "gnd_ring"), + PINCTRL_PIN(21, "RASn"), + PINCTRL_PIN(22, "CASn"), + PINCTRL_PIN(23, "DQMn[1]"), + PINCTRL_PIN(24, "DQMn[0]"), + PINCTRL_PIN(25, "AD[22]"), + PINCTRL_PIN(26, "AD[21]"), + PINCTRL_PIN(27, "vdd_ring"), + PINCTRL_PIN(28, "gnd_ring"), + PINCTRL_PIN(29, "DA[15]"), + PINCTRL_PIN(30, "AD[7]"), + PINCTRL_PIN(31, "DA[14]"), + PINCTRL_PIN(32, "AD[6]"), + PINCTRL_PIN(33, "DA[13]"), + PINCTRL_PIN(34, "vdd_core"), + PINCTRL_PIN(35, "gnd_core"), + PINCTRL_PIN(36, "AD[5]"), + PINCTRL_PIN(37, "DA[12]"), + PINCTRL_PIN(38, "AD[4]"), + PINCTRL_PIN(39, "DA[11]"), + PINCTRL_PIN(40, "AD[3]"), + PINCTRL_PIN(41, "vdd_ring"), + PINCTRL_PIN(42, "gnd_ring"), + PINCTRL_PIN(43, "DA[10]"), + PINCTRL_PIN(44, "AD[2]"), + PINCTRL_PIN(45, "DA[9]"), + PINCTRL_PIN(46, "AD[1]"), + PINCTRL_PIN(47, "DA[8]"), + PINCTRL_PIN(48, "AD[0]"), + PINCTRL_PIN(49, "vdd_ring"), + PINCTRL_PIN(50, "gnd_ring"), + PINCTRL_PIN(51, "NC"), + PINCTRL_PIN(52, "NC"), + PINCTRL_PIN(53, "vdd_ring"), + PINCTRL_PIN(54, "gnd_ring"), + PINCTRL_PIN(55, "AD[15]"), + PINCTRL_PIN(56, "DA[7]"), + PINCTRL_PIN(57, "vdd_core"), + PINCTRL_PIN(58, "gnd_core"), + PINCTRL_PIN(59, "AD[14]"), + PINCTRL_PIN(60, "DA[6]"), + PINCTRL_PIN(61, "AD[13]"), + PINCTRL_PIN(62, "DA[5]"), + PINCTRL_PIN(63, "AD[12]"), + PINCTRL_PIN(64, "DA[4]"), + PINCTRL_PIN(65, "AD[11]"), + PINCTRL_PIN(66, "vdd_ring"), + PINCTRL_PIN(67, "gnd_ring"), + PINCTRL_PIN(68, "DA[3]"), + PINCTRL_PIN(69, "AD[10]"), + PINCTRL_PIN(70, "DA[2]"), + PINCTRL_PIN(71, "AD[9]"), + PINCTRL_PIN(72, "DA[1]"), + PINCTRL_PIN(73, "AD[8]"), + PINCTRL_PIN(74, "DA[0]"), + PINCTRL_PIN(75, "DSRn"), + PINCTRL_PIN(76, "DTRn"), + PINCTRL_PIN(77, "TCK"), + PINCTRL_PIN(78, "TDI"), + PINCTRL_PIN(79, "TDO"), + PINCTRL_PIN(80, "TMS"), + PINCTRL_PIN(81, "vdd_ring"), + PINCTRL_PIN(82, "gnd_ring"), + PINCTRL_PIN(83, "BOOT[1]"), + PINCTRL_PIN(84, "BOOT[0]"), + PINCTRL_PIN(85, "gnd_ring"), + PINCTRL_PIN(86, "NC"), + PINCTRL_PIN(87, "EECLK"), + PINCTRL_PIN(88, "EEDAT"), + PINCTRL_PIN(89, "ASYNC"), + PINCTRL_PIN(90, "vdd_core"), + PINCTRL_PIN(91, "gnd_core"), + PINCTRL_PIN(92, "ASDO"), + PINCTRL_PIN(93, "SCLK1"), + PINCTRL_PIN(94, "SFRM1"), + PINCTRL_PIN(95, "SSPRX1"), + PINCTRL_PIN(96, "SSPTX1"), + PINCTRL_PIN(97, "GRLED"), + PINCTRL_PIN(98, "RDLED"), + PINCTRL_PIN(99, "vdd_ring"), + PINCTRL_PIN(100, "gnd_ring"), + PINCTRL_PIN(101, "INT[3]"), + PINCTRL_PIN(102, "INT[1]"), + PINCTRL_PIN(103, "INT[0]"), + PINCTRL_PIN(104, "RTSn"), + PINCTRL_PIN(105, "USBm[0]"), + PINCTRL_PIN(106, "USBp[0]"), + PINCTRL_PIN(107, "ABITCLK"), + PINCTRL_PIN(108, "CTSn"), + PINCTRL_PIN(109, "RXD[0]"), + PINCTRL_PIN(110, "RXD[1]"), + PINCTRL_PIN(111, "vdd_ring"), + PINCTRL_PIN(112, "gnd_ring"), + PINCTRL_PIN(113, "TXD[0]"), + PINCTRL_PIN(114, "TXD[1]"), + PINCTRL_PIN(115, "CGPIO[0]"), + PINCTRL_PIN(116, "gnd_core"), + PINCTRL_PIN(117, "PLL_GND"), + PINCTRL_PIN(118, "XTALI"), + PINCTRL_PIN(119, "XTALO"), + PINCTRL_PIN(120, "PLL_VDD"), + PINCTRL_PIN(121, "vdd_core"), + PINCTRL_PIN(122, "gnd_ring"), + PINCTRL_PIN(123, "vdd_ring"), + PINCTRL_PIN(124, "RSTOn"), + PINCTRL_PIN(125, "PRSTn"), + PINCTRL_PIN(126, "CSn[0]"), + PINCTRL_PIN(127, "gnd_core"), + PINCTRL_PIN(128, "vdd_core"), + PINCTRL_PIN(129, "gnd_ring"), + PINCTRL_PIN(130, "vdd_ring"), + PINCTRL_PIN(131, "ADC[4]"), + PINCTRL_PIN(132, "ADC[3]"), + PINCTRL_PIN(133, "ADC[2]"), + PINCTRL_PIN(134, "ADC[1]"), + PINCTRL_PIN(135, "ADC[0]"), + PINCTRL_PIN(136, "ADC_VDD"), + PINCTRL_PIN(137, "RTCXTALI"), + PINCTRL_PIN(138, "RTCXTALO"), + PINCTRL_PIN(139, "ADC_GND"), + PINCTRL_PIN(140, "EGPIO[11]"), + PINCTRL_PIN(141, "EGPIO[10]"), + PINCTRL_PIN(142, "EGPIO[9]"), + PINCTRL_PIN(143, "EGPIO[8]"), + PINCTRL_PIN(144, "EGPIO[7]"), + PINCTRL_PIN(145, "EGPIO[6]"), + PINCTRL_PIN(146, "EGPIO[5]"), + PINCTRL_PIN(147, "EGPIO[4]"), + PINCTRL_PIN(148, "EGPIO[3]"), + PINCTRL_PIN(149, "gnd_ring"), + PINCTRL_PIN(150, "vdd_ring"), + PINCTRL_PIN(151, "EGPIO[2]"), + PINCTRL_PIN(152, "EGPIO[1]"), + PINCTRL_PIN(153, "EGPIO[0]"), + PINCTRL_PIN(154, "ARSTn"), + PINCTRL_PIN(155, "TRSTn"), + PINCTRL_PIN(156, "ASDI"), + PINCTRL_PIN(157, "USBm[2]"), + PINCTRL_PIN(158, "USBp[2]"), + PINCTRL_PIN(159, "WAITn"), + PINCTRL_PIN(160, "EGPIO[15]"), + PINCTRL_PIN(161, "gnd_ring"), + PINCTRL_PIN(162, "vdd_ring"), + PINCTRL_PIN(163, "EGPIO[14]"), + PINCTRL_PIN(164, "EGPIO[13]"), + PINCTRL_PIN(165, "EGPIO[12]"), + PINCTRL_PIN(166, "gnd_core"), + PINCTRL_PIN(167, "vdd_core"), + PINCTRL_PIN(168, "FGPIO[3]"), + PINCTRL_PIN(169, "FGPIO[2]"), + PINCTRL_PIN(170, "FGPIO[1]"), + PINCTRL_PIN(171, "gnd_ring"), + PINCTRL_PIN(172, "vdd_ring"), + PINCTRL_PIN(173, "CLD"), + PINCTRL_PIN(174, "CRS"), + PINCTRL_PIN(175, "TXERR"), + PINCTRL_PIN(176, "TXEN"), + PINCTRL_PIN(177, "MIITXD[0]"), + PINCTRL_PIN(178, "MIITXD[1]"), + PINCTRL_PIN(179, "MIITXD[2]"), + PINCTRL_PIN(180, "MIITXD[3]"), + PINCTRL_PIN(181, "TXCLK"), + PINCTRL_PIN(182, "RXERR"), + PINCTRL_PIN(183, "RXDVAL"), + PINCTRL_PIN(184, "MIIRXD[0]"), + PINCTRL_PIN(185, "MIIRXD[1]"), + PINCTRL_PIN(186, "MIIRXD[2]"), + PINCTRL_PIN(187, "gnd_ring"), + PINCTRL_PIN(188, "vdd_ring"), + PINCTRL_PIN(189, "MIIRXD[3]"), + PINCTRL_PIN(190, "RXCLK"), + PINCTRL_PIN(191, "MDIO"), + PINCTRL_PIN(192, "MDC"), + PINCTRL_PIN(193, "RDn"), + PINCTRL_PIN(194, "WRn"), + PINCTRL_PIN(195, "AD[16]"), + PINCTRL_PIN(196, "AD[17]"), + PINCTRL_PIN(197, "gnd_core"), + PINCTRL_PIN(198, "vdd_core"), + PINCTRL_PIN(199, "HGPIO[2]"), + PINCTRL_PIN(200, "HGPIO[3]"), + PINCTRL_PIN(201, "HGPIO[4]"), + PINCTRL_PIN(202, "HGPIO[5]"), + PINCTRL_PIN(203, "gnd_ring"), + PINCTRL_PIN(204, "vdd_ring"), + PINCTRL_PIN(205, "AD[18]"), + PINCTRL_PIN(206, "AD[19]"), + PINCTRL_PIN(207, "AD[20]"), + PINCTRL_PIN(208, "SDCLKEN"), +}; + +static const unsigned int ssp_ep9301_pins[] = { + 93, 94, 95, 96, +}; + +static const unsigned int ac97_ep9301_pins[] = { + 89, 92, 107, 154, 156, +}; + +/* + * Note: The EP9307 processor has one PWM with one output, PWMOUT. + * Note: The EP9301, EP9302, EP9312, and EP9315 processors each have two PWMs with + * two outputs, PWMOUT and PWMO1. PWMO1 is an alternate function for EGPIO14. + */ +/* The GPIO14E (14) pin overlap with pwm1 */ +static const unsigned int pwm_9301_pins[] = { 163 }; + +static const unsigned int gpio1a_9301_pins[] = { 163 }; + +/* ep9301/9302 have only 0 pin of GPIO C Port exposed */ +static const unsigned int gpio2a_9301_pins[] = { 115 }; + +/* ep9301/9302 have only 4,5 pin of GPIO E Port exposed */ +static const unsigned int gpio4a_9301_pins[] = { 97, 98 }; + +/* ep9301/9302 have only 4,5 pin of GPIO G Port exposed */ +static const unsigned int gpio6a_9301_pins[] = { 87, 88 }; + +static const unsigned int gpio7a_9301_pins[] = { 199, 200, 201, 202 }; + +/* Groups for the ep9301/ep9302 SoC/package */ +static const struct ep93xx_pin_group ep9301_pin_groups[] = { + PMX_GROUP("ssp", ssp_ep9301_pins, EP93XX_SYSCON_DEVCFG_I2SONSSP, 0), + PMX_GROUP("i2s_on_ssp", ssp_ep9301_pins, EP93XX_SYSCON_DEVCFG_I2SONSSP, + EP93XX_SYSCON_DEVCFG_I2SONSSP), + PMX_GROUP("ac97", ac97_ep9301_pins, EP93XX_SYSCON_DEVCFG_I2SONAC97, 0), + PMX_GROUP("i2s_on_ac97", ac97_ep9301_pins, EP93XX_SYSCON_DEVCFG_I2SONAC97, + EP93XX_SYSCON_DEVCFG_I2SONAC97), + PMX_GROUP("pwm1", pwm_9301_pins, EP93XX_SYSCON_DEVCFG_PONG, EP93XX_SYSCON_DEVCFG_PONG), + PMX_GROUP("gpio1agrp", gpio1a_9301_pins, EP93XX_SYSCON_DEVCFG_PONG, 0), + PMX_GROUP("gpio2agrp", gpio2a_9301_pins, EP93XX_SYSCON_DEVCFG_GONK, + EP93XX_SYSCON_DEVCFG_GONK), + PMX_GROUP("gpio4agrp", gpio4a_9301_pins, EP93XX_SYSCON_DEVCFG_EONIDE, + EP93XX_SYSCON_DEVCFG_EONIDE), + PMX_GROUP("gpio6agrp", gpio6a_9301_pins, EP93XX_SYSCON_DEVCFG_GONIDE, + EP93XX_SYSCON_DEVCFG_GONIDE), + PMX_GROUP("gpio7agrp", gpio7a_9301_pins, EP93XX_SYSCON_DEVCFG_HONIDE, + EP93XX_SYSCON_DEVCFG_HONIDE), +}; + +static const struct pinctrl_pin_desc ep9307_pins[] = { + /* Row A */ + PINCTRL_PIN(0, "CSn[1]"), /* A1 */ + PINCTRL_PIN(1, "CSn[7]"), /* A2 */ + PINCTRL_PIN(2, "SDCLKEN"), /* A3 */ + PINCTRL_PIN(3, "DA[31]"), /* A4 */ + PINCTRL_PIN(4, "DA[29]"), /* A5 */ + PINCTRL_PIN(5, "DA[27]"), /* A6 */ + PINCTRL_PIN(6, "HGPIO[2]"), /* A7 */ + PINCTRL_PIN(7, "RDn"), /* A8 */ + PINCTRL_PIN(8, "MIIRXD[3]"), /* A9 */ + PINCTRL_PIN(9, "RXDVAL"), /* A10 */ + PINCTRL_PIN(10, "MIITXD[1]"), /* A11 */ + PINCTRL_PIN(11, "CRS"), /* A12 */ + PINCTRL_PIN(12, "FGPIO[7]"), /* A13 */ + PINCTRL_PIN(13, "FGPIO[0]"), /* A14 */ + PINCTRL_PIN(14, "WAITn"), /* A15 */ + PINCTRL_PIN(15, "USBm[2]"), /* A16 */ + PINCTRL_PIN(16, "ASDI"), /* A17 */ + /* Row B */ + PINCTRL_PIN(17, "AD[25]"), /* B1 */ + PINCTRL_PIN(18, "CSn[2]"), /* B2 */ + PINCTRL_PIN(19, "CSn[6]"), /* B3 */ + PINCTRL_PIN(20, "AD[20]"), /* B4 */ + PINCTRL_PIN(21, "DA[30]"), /* B5 */ + PINCTRL_PIN(22, "AD[18]"), /* B6 */ + PINCTRL_PIN(23, "HGPIO[3]"), /* B7 */ + PINCTRL_PIN(24, "AD[17]"), /* B8 */ + PINCTRL_PIN(25, "RXCLK"), /* B9 */ + PINCTRL_PIN(26, "MIIRXD[1]"), /* B10 */ + PINCTRL_PIN(27, "MIITXD[2]"), /* B11 */ + PINCTRL_PIN(28, "TXEN"), /* B12 */ + PINCTRL_PIN(29, "FGPIO[5]"), /* B13 */ + PINCTRL_PIN(30, "EGPIO[15]"), /* B14 */ + PINCTRL_PIN(31, "USBp[2]"), /* B15 */ + PINCTRL_PIN(32, "ARSTn"), /* B16 */ + PINCTRL_PIN(33, "ADC_VDD"), /* B17 */ + /* Row C */ + PINCTRL_PIN(34, "AD[23]"), /* C1 */ + PINCTRL_PIN(35, "DA[26]"), /* C2 */ + PINCTRL_PIN(36, "CSn[3]"), /* C3 */ + PINCTRL_PIN(37, "DA[25]"), /* C4 */ + PINCTRL_PIN(38, "AD[24]"), /* C5 */ + PINCTRL_PIN(39, "AD[19]"), /* C6 */ + PINCTRL_PIN(40, "HGPIO[5]"), /* C7 */ + PINCTRL_PIN(41, "WRn"), /* C8 */ + PINCTRL_PIN(42, "MDIO"), /* C9 */ + PINCTRL_PIN(43, "MIIRXD[2]"), /* C10 */ + PINCTRL_PIN(44, "TXCLK"), /* C11 */ + PINCTRL_PIN(45, "MIITXD[0]"), /* C12 */ + PINCTRL_PIN(46, "CLD"), /* C13 */ + PINCTRL_PIN(47, "EGPIO[13]"), /* C14 */ + PINCTRL_PIN(48, "TRSTn"), /* C15 */ + PINCTRL_PIN(49, "Xp"), /* C16 */ + PINCTRL_PIN(50, "Xm"), /* C17 */ + /* Row D */ + PINCTRL_PIN(51, "SDCSn[3]"), /* D1 */ + PINCTRL_PIN(52, "DA[23]"), /* D2 */ + PINCTRL_PIN(53, "SDCLK"), /* D3 */ + PINCTRL_PIN(54, "DA[24]"), /* D4 */ + PINCTRL_PIN(55, "HGPIO[7]"), /* D5 */ + PINCTRL_PIN(56, "HGPIO[6]"), /* D6 */ + PINCTRL_PIN(57, "A[28]"), /* D7 */ + PINCTRL_PIN(58, "HGPIO[4]"), /* D8 */ + PINCTRL_PIN(59, "AD[16]"), /* D9 */ + PINCTRL_PIN(60, "MDC"), /* D10 */ + PINCTRL_PIN(61, "RXERR"), /* D11 */ + PINCTRL_PIN(62, "MIITXD[3]"), /* D12 */ + PINCTRL_PIN(63, "EGPIO[12]"), /* D13 */ + PINCTRL_PIN(64, "EGPIO[1]"), /* D14 */ + PINCTRL_PIN(65, "EGPIO[0]"), /* D15 */ + PINCTRL_PIN(66, "Ym"), /* D16 */ + PINCTRL_PIN(67, "Yp"), /* D17 */ + /* Row E */ + PINCTRL_PIN(68, "SDCSn[2]"), /* E1 */ + PINCTRL_PIN(69, "SDWEN"), /* E2 */ + PINCTRL_PIN(70, "DA[22]"), /* E3 */ + PINCTRL_PIN(71, "AD[3]"), /* E4 */ + PINCTRL_PIN(72, "DA[15]"), /* E5 */ + PINCTRL_PIN(73, "AD[21]"), /* E6 */ + PINCTRL_PIN(74, "DA[17]"), /* E7 */ + PINCTRL_PIN(75, "vddr"), /* E8 */ + PINCTRL_PIN(76, "vddr"), /* E9 */ + PINCTRL_PIN(77, "vddr"), /* E10 */ + PINCTRL_PIN(78, "MIIRXD[0]"), /* E11 */ + PINCTRL_PIN(79, "TXERR"), /* E12 */ + PINCTRL_PIN(80, "EGPIO[2]"), /* E13 */ + PINCTRL_PIN(81, "EGPIO[4]"), /* E14 */ + PINCTRL_PIN(82, "EGPIO[3]"), /* E15 */ + PINCTRL_PIN(83, "sXp"), /* E16 */ + PINCTRL_PIN(84, "sXm"), /* E17 */ + /* Row F */ + PINCTRL_PIN(85, "RASn"), /* F1 */ + PINCTRL_PIN(86, "SDCSn[1]"), /* F2 */ + PINCTRL_PIN(87, "SDCSn[0]"), /* F3 */ + PINCTRL_PIN(88, "DQMn[3]"), /* F4 */ + PINCTRL_PIN(89, "AD[5]"), /* F5 */ + PINCTRL_PIN(90, "gndr"), /* F6 */ + PINCTRL_PIN(91, "gndr"), /* F7 */ + PINCTRL_PIN(92, "gndr"), /* F8 */ + PINCTRL_PIN(93, "vddc"), /* F9 */ + PINCTRL_PIN(94, "vddc"), /* F10 */ + PINCTRL_PIN(95, "gndr"), /* F11 */ + PINCTRL_PIN(96, "EGPIO[7]"), /* F12 */ + PINCTRL_PIN(97, "EGPIO[5]"), /* F13 */ + PINCTRL_PIN(98, "ADC GND"), /* F14 */ + PINCTRL_PIN(99, "EGPIO[6]"), /* F15 */ + PINCTRL_PIN(100, "sYm"), /* F16 */ + PINCTRL_PIN(101, "syp"), /* F17 */ + /* Row G */ + PINCTRL_PIN(102, "DQMn[0]"), /* G1 */ + PINCTRL_PIN(103, "CASn"), /* G2 */ + PINCTRL_PIN(104, "DA[21]"), /* G3 */ + PINCTRL_PIN(105, "AD[22]"), /* G4 */ + PINCTRL_PIN(106, "vddr"), /* G5 */ + PINCTRL_PIN(107, "gndr"), /* G6 */ + PINCTRL_PIN(108, "gndr"), /* G12 */ + PINCTRL_PIN(109, "EGPIO[9]"), /* G13 */ + PINCTRL_PIN(110, "EGPIO[10]"), /* G14 */ + PINCTRL_PIN(111, "EGPIO[11]"), /* G15 */ + PINCTRL_PIN(112, "RTCXTALO"), /* G16 */ + PINCTRL_PIN(113, "RTCXTALI"), /* G17 */ + /* Row H */ + PINCTRL_PIN(114, "DA[18]"), /* H1 */ + PINCTRL_PIN(115, "DA[20]"), /* H2 */ + PINCTRL_PIN(116, "DA[19]"), /* H3 */ + PINCTRL_PIN(117, "DA[16]"), /* H4 */ + PINCTRL_PIN(118, "vddr"), /* H5 */ + PINCTRL_PIN(119, "vddc"), /* H6 */ + PINCTRL_PIN(120, "gndc"), /* H7 */ + PINCTRL_PIN(121, "gndc"), /* H9 */ + PINCTRL_PIN(122, "gndc"), /* H10 */ + PINCTRL_PIN(123, "gndr"), /* H12 */ + PINCTRL_PIN(124, "vddr"), /* H13 */ + PINCTRL_PIN(125, "EGPIO[8]"), /* H14 */ + PINCTRL_PIN(126, "PRSTN"), /* H15 */ + PINCTRL_PIN(127, "COL[7]"), /* H16 */ + PINCTRL_PIN(128, "RSTON"), /* H17 */ + /* Row J */ + PINCTRL_PIN(129, "AD[6]"), /* J1 */ + PINCTRL_PIN(130, "DA[14]"), /* J2 */ + PINCTRL_PIN(131, "AD[7]"), /* J3 */ + PINCTRL_PIN(132, "DA[13]"), /* J4 */ + PINCTRL_PIN(133, "vddr"), /* J5 */ + PINCTRL_PIN(134, "vddc"), /* J6 */ + PINCTRL_PIN(135, "gndc"), /* J8 */ + PINCTRL_PIN(136, "gndc"), /* J10 */ + PINCTRL_PIN(137, "vddc"), /* J12 */ + PINCTRL_PIN(138, "vddr"), /* J13 */ + PINCTRL_PIN(139, "COL[5]"), /* J14 */ + PINCTRL_PIN(140, "COL[6]"), /* J15 */ + PINCTRL_PIN(141, "CSn[0]"), /* J16 */ + PINCTRL_PIN(142, "COL[3]"), /* J17 */ + /* Row K */ + PINCTRL_PIN(143, "AD[4]"), /* K1 */ + PINCTRL_PIN(144, "DA[12]"), /* K2 */ + PINCTRL_PIN(145, "DA[10]"), /* K3 */ + PINCTRL_PIN(146, "DA[11]"), /* K4 */ + PINCTRL_PIN(147, "vddr"), /* K5 */ + PINCTRL_PIN(148, "gndr"), /* K6 */ + PINCTRL_PIN(149, "gndc"), /* K8 */ + PINCTRL_PIN(150, "gndc"), /* K9 */ + PINCTRL_PIN(151, "gndc"), /* K10 */ + PINCTRL_PIN(152, "vddc"), /* K12 */ + PINCTRL_PIN(153, "COL[4]"), /* K13 */ + PINCTRL_PIN(154, "PLL_VDD"), /* K14 */ + PINCTRL_PIN(155, "COL[2]"), /* K15 */ + PINCTRL_PIN(156, "COL[1]"), /* K16 */ + PINCTRL_PIN(157, "COL[0]"), /* K17 */ + /* Row L */ + PINCTRL_PIN(158, "DA[9]"), /* L1 */ + PINCTRL_PIN(159, "AD[2]"), /* L2 */ + PINCTRL_PIN(160, "AD[1]"), /* L3 */ + PINCTRL_PIN(161, "DA[8]"), /* L4 */ + PINCTRL_PIN(162, "BLANK"), /* L5 */ + PINCTRL_PIN(163, "gndr"), /* L6 */ + PINCTRL_PIN(164, "gndr"), /* L7 */ + PINCTRL_PIN(165, "ROW[7]"), /* L8 */ + PINCTRL_PIN(166, "ROW[5]"), /* L9 */ + PINCTRL_PIN(167, "PLL GND"), /* L10 */ + PINCTRL_PIN(168, "XTALI"), /* L11 */ + PINCTRL_PIN(169, "XTALO"), /* L12 */ + /* Row M */ + PINCTRL_PIN(170, "BRIGHT"), /* M1 */ + PINCTRL_PIN(171, "AD[0]"), /* M2 */ + PINCTRL_PIN(172, "DQMn[1]"), /* M3 */ + PINCTRL_PIN(173, "DQMn[2]"), /* M4 */ + PINCTRL_PIN(174, "P[17]"), /* M5 */ + PINCTRL_PIN(175, "gndr"), /* M6 */ + PINCTRL_PIN(176, "gndr"), /* M7 */ + PINCTRL_PIN(177, "vddc"), /* M8 */ + PINCTRL_PIN(178, "vddc"), /* M9 */ + PINCTRL_PIN(179, "gndr"), /* M10 */ + PINCTRL_PIN(180, "gndr"), /* M11 */ + PINCTRL_PIN(181, "ROW[6]"), /* M12 */ + PINCTRL_PIN(182, "ROW[4]"), /* M13 */ + PINCTRL_PIN(183, "ROW[1]"), /* M14 */ + PINCTRL_PIN(184, "ROW[0]"), /* M15 */ + PINCTRL_PIN(185, "ROW[3]"), /* M16 */ + PINCTRL_PIN(186, "ROW[2]"), /* M17 */ + /* Row N */ + PINCTRL_PIN(187, "P[14]"), /* N1 */ + PINCTRL_PIN(188, "P[16]"), /* N2 */ + PINCTRL_PIN(189, "P[15]"), /* N3 */ + PINCTRL_PIN(190, "P[13]"), /* N4 */ + PINCTRL_PIN(191, "P[12]"), /* N5 */ + PINCTRL_PIN(192, "DA[5]"), /* N6 */ + PINCTRL_PIN(193, "vddr"), /* N7 */ + PINCTRL_PIN(194, "vddr"), /* N8 */ + PINCTRL_PIN(195, "vddr"), /* N9 */ + PINCTRL_PIN(196, "vddr"), /* N10 */ + PINCTRL_PIN(197, "EECLK"), /* N11 */ + PINCTRL_PIN(198, "ASDO"), /* N12 */ + PINCTRL_PIN(199, "CTSn"), /* N13 */ + PINCTRL_PIN(200, "RXD[0]"), /* N14 */ + PINCTRL_PIN(201, "TXD[0]"), /* N15 */ + PINCTRL_PIN(202, "TXD[1]"), /* N16 */ + PINCTRL_PIN(203, "TXD[2]"), /* N17 */ + /* Row P */ + PINCTRL_PIN(204, "SPCLK"), /* P1 */ + PINCTRL_PIN(205, "P[10]"), /* P2 */ + PINCTRL_PIN(206, "P[11]"), /* P3 */ + PINCTRL_PIN(207, "P[3]"), /* P4 */ + PINCTRL_PIN(208, "AD[15]"), /* P5 */ + PINCTRL_PIN(209, "AD[13]"), /* P6 */ + PINCTRL_PIN(210, "AD[12]"), /* P7 */ + PINCTRL_PIN(211, "DA[2]"), /* P8 */ + PINCTRL_PIN(212, "AD[8]"), /* P9 */ + PINCTRL_PIN(213, "TCK"), /* P10 */ + PINCTRL_PIN(214, "BOOT[1]"), /* P11 */ + PINCTRL_PIN(215, "EEDAT"), /* P12 */ + PINCTRL_PIN(216, "GRLED"), /* P13 */ + PINCTRL_PIN(217, "RDLED"), /* P14 */ + PINCTRL_PIN(218, "GGPIO[2]"), /* P15 */ + PINCTRL_PIN(219, "RXD[1]"), /* P16 */ + PINCTRL_PIN(220, "RXD[2]"), /* P17 */ + /* Row R */ + PINCTRL_PIN(221, "P[9]"), /* R1 */ + PINCTRL_PIN(222, "HSYNC"), /* R2 */ + PINCTRL_PIN(223, "P[6]"), /* R3 */ + PINCTRL_PIN(224, "P[5]"), /* R4 */ + PINCTRL_PIN(225, "P[0]"), /* R5 */ + PINCTRL_PIN(226, "AD[14]"), /* R6 */ + PINCTRL_PIN(227, "DA[4]"), /* R7 */ + PINCTRL_PIN(228, "DA[1]"), /* R8 */ + PINCTRL_PIN(229, "DTRn"), /* R9 */ + PINCTRL_PIN(230, "TDI"), /* R10 */ + PINCTRL_PIN(231, "BOOT[0]"), /* R11 */ + PINCTRL_PIN(232, "ASYNC"), /* R12 */ + PINCTRL_PIN(233, "SSPTX[1]"), /* R13 */ + PINCTRL_PIN(234, "PWMOUT"), /* R14 */ + PINCTRL_PIN(235, "USBm[0]"), /* R15 */ + PINCTRL_PIN(236, "ABITCLK"), /* R16 */ + PINCTRL_PIN(237, "USBp[0]"), /* R17 */ + /* Row T */ + PINCTRL_PIN(238, "NC"), /* T1 */ + PINCTRL_PIN(239, "NC"), /* T2 */ + PINCTRL_PIN(240, "V_CSYNC"), /* T3 */ + PINCTRL_PIN(241, "P[7]"), /* T4 */ + PINCTRL_PIN(242, "P[2]"), /* T5 */ + PINCTRL_PIN(243, "DA[7]"), /* T6 */ + PINCTRL_PIN(244, "AD[11]"), /* T7 */ + PINCTRL_PIN(245, "AD[9]"), /* T8 */ + PINCTRL_PIN(246, "DSRn"), /* T9 */ + PINCTRL_PIN(247, "TMS"), /* T10 */ + PINCTRL_PIN(248, "gndr"), /* T11 */ + PINCTRL_PIN(249, "SFRM[1]"), /* T12 */ + PINCTRL_PIN(250, "INT[2]"), /* T13 */ + PINCTRL_PIN(251, "INT[0]"), /* T14 */ + PINCTRL_PIN(252, "USBp[1]"), /* T15 */ + PINCTRL_PIN(253, "NC"), /* T16 */ + PINCTRL_PIN(254, "NC"), /* T17 */ + /* Row U */ + PINCTRL_PIN(255, "NC"), /* U1 */ + PINCTRL_PIN(256, "NC"), /* U2 */ + PINCTRL_PIN(257, "P[8]"), /* U3 */ + PINCTRL_PIN(258, "P[4]"), /* U4 */ + PINCTRL_PIN(259, "P[1]"), /* U5 */ + PINCTRL_PIN(260, "DA[6]"), /* U6 */ + PINCTRL_PIN(261, "DA[3]"), /* U7 */ + PINCTRL_PIN(262, "AD[10]"), /* U8 */ + PINCTRL_PIN(263, "DA[0]"), /* U9 */ + PINCTRL_PIN(264, "TDO"), /* U10 */ + PINCTRL_PIN(265, "NC"), /* U11 */ + PINCTRL_PIN(266, "SCLK[1]"), /* U12 */ + PINCTRL_PIN(267, "SSPRX[1]"), /* U13 */ + PINCTRL_PIN(268, "INT[1]"), /* U14 */ + PINCTRL_PIN(269, "RTSn"), /* U15 */ + PINCTRL_PIN(270, "USBm[1]"), /* U16 */ + PINCTRL_PIN(271, "NC"), /* U17 */ +}; + +static const unsigned int ssp_ep9307_pins[] = { + 233, 249, 266, 267, +}; + +static const unsigned int ac97_ep9307_pins[] = { + 16, 32, 198, 232, 236, +}; + +/* I can't find info on those - it's some internal state */ +static const unsigned int raster_on_sdram0_pins[] = { +}; + +static const unsigned int raster_on_sdram3_pins[] = { +}; + +/* ROW[N] */ +static const unsigned int gpio2a_9307_pins[] = { + 165, 166, 181, 182, 183, 184, 185, 186, +}; + +/* COL[N] */ +static const unsigned int gpio3a_9307_pins[] = { + 127, 139, 140, 142, 153, 155, 156, 157, +}; + +static const unsigned int keypad_9307_pins[] = { + 127, 139, 140, 142, 153, 155, 156, 157, + 165, 166, 181, 182, 183, 184, 185, 186, +}; + +/* ep9307 have only 4,5 pin of GPIO E Port exposed */ +static const unsigned int gpio4a_9307_pins[] = { 216, 217 }; + +/* ep9307 have only 2 pin of GPIO G Port exposed */ +static const unsigned int gpio6a_9307_pins[] = { 219 }; + +static const unsigned int gpio7a_9307_pins[] = { 7, 24, 41, 56, 57, 59 }; + +static const struct ep93xx_pin_group ep9307_pin_groups[] = { + PMX_GROUP("ssp", ssp_ep9307_pins, EP93XX_SYSCON_DEVCFG_I2SONSSP, 0), + PMX_GROUP("i2s_on_ssp", ssp_ep9307_pins, EP93XX_SYSCON_DEVCFG_I2SONSSP, + EP93XX_SYSCON_DEVCFG_I2SONSSP), + PMX_GROUP("ac97", ac97_ep9307_pins, EP93XX_SYSCON_DEVCFG_I2SONAC97, 0), + PMX_GROUP("i2s_on_ac97", ac97_ep9301_pins, EP93XX_SYSCON_DEVCFG_I2SONAC97, + EP93XX_SYSCON_DEVCFG_I2SONAC97), + PMX_GROUP("rasteronsdram0grp", raster_on_sdram0_pins, EP93XX_SYSCON_DEVCFG_RASONP3, 0), + PMX_GROUP("rasteronsdram3grp", raster_on_sdram0_pins, EP93XX_SYSCON_DEVCFG_RASONP3, + EP93XX_SYSCON_DEVCFG_RASONP3), + PMX_GROUP("gpio2agrp", gpio2a_9307_pins, EP93XX_SYSCON_DEVCFG_GONK, + EP93XX_SYSCON_DEVCFG_GONK), + PMX_GROUP("gpio3agrp", gpio3a_9307_pins, EP93XX_SYSCON_DEVCFG_GONK, + EP93XX_SYSCON_DEVCFG_GONK), + PMX_GROUP("keypadgrp", keypad_9307_pins, EP93XX_SYSCON_DEVCFG_GONK, 0), + PMX_GROUP("gpio4agrp", gpio4a_9307_pins, EP93XX_SYSCON_DEVCFG_EONIDE, + EP93XX_SYSCON_DEVCFG_EONIDE), + PMX_GROUP("gpio6agrp", gpio6a_9307_pins, EP93XX_SYSCON_DEVCFG_GONIDE, + EP93XX_SYSCON_DEVCFG_GONIDE), + PMX_GROUP("gpio7agrp", gpio7a_9307_pins, EP93XX_SYSCON_DEVCFG_HONIDE, + EP93XX_SYSCON_DEVCFG_HONIDE), +}; + +/* ep9312, ep9315 */ +static const struct pinctrl_pin_desc ep9312_pins[] = { + /* Row A */ + PINCTRL_PIN(0, "CSN[7]"), /* A1 */ + PINCTRL_PIN(1, "DA[28]"), /* A2 */ + PINCTRL_PIN(2, "AD[18]"), /* A3 */ + PINCTRL_PIN(3, "DD[8]"), /* A4 */ + PINCTRL_PIN(4, "DD[4]"), /* A5 */ + PINCTRL_PIN(5, "AD[17]"), /* A6 */ + PINCTRL_PIN(6, "RDN"), /* A7 */ + PINCTRL_PIN(7, "RXCLK"), /* A8 */ + PINCTRL_PIN(8, "MIIRXD[0]"), /* A9 */ + PINCTRL_PIN(9, "RXDVAL"), /* A10 */ + PINCTRL_PIN(10, "MIITXD[2]"), /* A11 */ + PINCTRL_PIN(11, "TXERR"), /* A12 */ + PINCTRL_PIN(12, "CLD"), /* A13 */ + PINCTRL_PIN(13, "NC"), /* A14 */ + PINCTRL_PIN(14, "NC"), /* A15 */ + PINCTRL_PIN(15, "NC"), /* A16 */ + PINCTRL_PIN(16, "EGPIO[12]"), /* A17 */ + PINCTRL_PIN(17, "EGPIO[15]"), /* A18 */ + PINCTRL_PIN(18, "NC"), /* A19 */ + PINCTRL_PIN(19, "NC"), /* A20 */ + /* Row B */ + PINCTRL_PIN(20, "CSN[2]"), /* B1 */ + PINCTRL_PIN(21, "DA[31]"), /* B2 */ + PINCTRL_PIN(22, "DA[30]"), /* B3 */ + PINCTRL_PIN(23, "DA[27]"), /* B4 */ + PINCTRL_PIN(24, "DD[7]"), /* B5 */ + PINCTRL_PIN(25, "DD[3]"), /* B6 */ + PINCTRL_PIN(26, "WRN"), /* B7 */ + PINCTRL_PIN(27, "MDIO"), /* B8 */ + PINCTRL_PIN(28, "MIIRXD[1]"), /* B9 */ + PINCTRL_PIN(29, "RXERR"), /* B10 */ + PINCTRL_PIN(30, "MIITXD[1]"), /* B11 */ + PINCTRL_PIN(31, "CRS"), /* B12 */ + PINCTRL_PIN(32, "NC"), /* B13 */ + PINCTRL_PIN(33, "NC"), /* B14 */ + PINCTRL_PIN(34, "NC"), /* B15 */ + PINCTRL_PIN(35, "NC"), /* B16 */ + PINCTRL_PIN(36, "EGPIO[13]"), /* B17 */ + PINCTRL_PIN(37, "NC"), /* B18 */ + PINCTRL_PIN(38, "WAITN"), /* B19 */ + PINCTRL_PIN(39, "TRSTN"), /* B20 */ + /* Row C */ + PINCTRL_PIN(40, "CSN[1]"), /* C1 */ + PINCTRL_PIN(41, "CSN[3]"), /* C2 */ + PINCTRL_PIN(42, "AD[20]"), /* C3 */ + PINCTRL_PIN(43, "DA[29]"), /* C4 */ + PINCTRL_PIN(44, "DD[10]"), /* C5 */ + PINCTRL_PIN(45, "DD[6]"), /* C6 */ + PINCTRL_PIN(46, "DD[2]"), /* C7 */ + PINCTRL_PIN(47, "MDC"), /* C8 */ + PINCTRL_PIN(48, "MIIRXD[3]"), /* C9 */ + PINCTRL_PIN(49, "TXCLK"), /* C10 */ + PINCTRL_PIN(50, "MIITXD[0]"), /* C11 */ + PINCTRL_PIN(51, "NC"), /* C12 */ + PINCTRL_PIN(52, "NC"), /* C13 */ + PINCTRL_PIN(53, "NC"), /* C14 */ + PINCTRL_PIN(54, "NC"), /* C15 */ + PINCTRL_PIN(55, "NC"), /* C16 */ + PINCTRL_PIN(56, "NC"), /* C17 */ + PINCTRL_PIN(57, "USBP[2]"), /* C18 */ + PINCTRL_PIN(58, "IORDY"), /* C19 */ + PINCTRL_PIN(59, "DMACKN"), /* C20 */ + /* Row D */ + PINCTRL_PIN(60, "AD[24]"), /* D1 */ + PINCTRL_PIN(61, "DA[25]"), /* D2 */ + PINCTRL_PIN(62, "DD[11]"), /* D3 */ + PINCTRL_PIN(63, "SDCLKEN"), /* D4 */ + PINCTRL_PIN(64, "AD[19]"), /* D5 */ + PINCTRL_PIN(65, "DD[9]"), /* D6 */ + PINCTRL_PIN(66, "DD[5]"), /* D7 */ + PINCTRL_PIN(67, "AD[16]"), /* D8 */ + PINCTRL_PIN(68, "MIIRXD[2]"), /* D9 */ + PINCTRL_PIN(69, "MIITXD[3]"), /* D10 */ + PINCTRL_PIN(70, "TXEN"), /* D11 */ + PINCTRL_PIN(71, "NC"), /* D12 */ + PINCTRL_PIN(72, "NC"), /* D13 */ + PINCTRL_PIN(73, "NC"), /* D14 */ + PINCTRL_PIN(74, "EGPIO[14]"), /* D15 */ + PINCTRL_PIN(75, "NC"), /* D16 */ + PINCTRL_PIN(76, "USBM[2]"), /* D17 */ + PINCTRL_PIN(77, "ARSTN"), /* D18 */ + PINCTRL_PIN(78, "DIORN"), /* D19 */ + PINCTRL_PIN(79, "EGPIO[1]"), /* D20 */ + /* Row E */ + PINCTRL_PIN(80, "AD[23]"), /* E1 */ + PINCTRL_PIN(81, "DA[23]"), /* E2 */ + PINCTRL_PIN(82, "DA[26]"), /* E3 */ + PINCTRL_PIN(83, "CSN[6]"), /* E4 */ + PINCTRL_PIN(84, "GND"), /* E5 */ + PINCTRL_PIN(85, "GND"), /* E6 */ + PINCTRL_PIN(86, "CVDD"), /* E7 */ + PINCTRL_PIN(87, "CVDD"), /* E8 */ + PINCTRL_PIN(88, "RVDD"), /* E9 */ + PINCTRL_PIN(89, "GND"), /* E10 */ + PINCTRL_PIN(90, "GND"), /* E11 */ + PINCTRL_PIN(91, "RVDD"), /* E12 */ + PINCTRL_PIN(92, "CVDD"), /* E13 */ + PINCTRL_PIN(93, "CVDD"), /* E14 */ + PINCTRL_PIN(94, "GND"), /* E15 */ + PINCTRL_PIN(95, "ASDI"), /* E16 */ + PINCTRL_PIN(96, "DIOWN"), /* E17 */ + PINCTRL_PIN(97, "EGPIO[0]"), /* E18 */ + PINCTRL_PIN(98, "EGPIO[3]"), /* E19 */ + PINCTRL_PIN(99, "EGPIO[5]"), /* E20 */ + /* Row F */ + PINCTRL_PIN(100, "SDCSN[3]"), /* F1 */ + PINCTRL_PIN(101, "DA[22]"), /* F2 */ + PINCTRL_PIN(102, "DA[24]"), /* F3 */ + PINCTRL_PIN(103, "AD[25]"), /* F4 */ + PINCTRL_PIN(104, "RVDD"), /* F5 */ + PINCTRL_PIN(105, "GND"), /* F6 */ + PINCTRL_PIN(106, "CVDD"), /* F7 */ + PINCTRL_PIN(107, "CVDD"), /* F14 */ + PINCTRL_PIN(108, "GND"), /* F15 */ + PINCTRL_PIN(109, "GND"), /* F16 */ + PINCTRL_PIN(110, "EGPIO[2]"), /* F17 */ + PINCTRL_PIN(111, "EGPIO[4]"), /* F18 */ + PINCTRL_PIN(112, "EGPIO[6]"), /* F19 */ + PINCTRL_PIN(113, "EGPIO[8]"), /* F20 */ + /* Row G */ + PINCTRL_PIN(114, "SDCSN[0]"), /* G1 */ + PINCTRL_PIN(115, "SDCSN[1]"), /* G2 */ + PINCTRL_PIN(116, "SDWEN"), /* G3 */ + PINCTRL_PIN(117, "SDCLK"), /* G4 */ + PINCTRL_PIN(118, "RVDD"), /* G5 */ + PINCTRL_PIN(119, "RVDD"), /* G6 */ + PINCTRL_PIN(120, "RVDD"), /* G15 */ + PINCTRL_PIN(121, "RVDD"), /* G16 */ + PINCTRL_PIN(122, "EGPIO[7]"), /* G17 */ + PINCTRL_PIN(123, "EGPIO[9]"), /* G18 */ + PINCTRL_PIN(124, "EGPIO[10]"), /* G19 */ + PINCTRL_PIN(125, "EGPIO[11]"), /* G20 */ + /* Row H */ + PINCTRL_PIN(126, "DQMN[3]"), /* H1 */ + PINCTRL_PIN(127, "CASN"), /* H2 */ + PINCTRL_PIN(128, "RASN"), /* H3 */ + PINCTRL_PIN(129, "SDCSN[2]"), /* H4 */ + PINCTRL_PIN(130, "CVDD"), /* H5 */ + PINCTRL_PIN(131, "GND"), /* H8 */ + PINCTRL_PIN(132, "GND"), /* H9 */ + PINCTRL_PIN(133, "GND"), /* H10 */ + PINCTRL_PIN(134, "GND"), /* H11 */ + PINCTRL_PIN(135, "GND"), /* H12 */ + PINCTRL_PIN(136, "GND"), /* H13 */ + PINCTRL_PIN(137, "RVDD"), /* H16 */ + PINCTRL_PIN(138, "RTCXTALO"), /* H17 */ + PINCTRL_PIN(139, "ADC_VDD"), /* H18 */ + PINCTRL_PIN(140, "ADC_GND"), /* H19 */ + PINCTRL_PIN(141, "XP"), /* H20 */ + /* Row J */ + PINCTRL_PIN(142, "DA[21]"), /* J1 */ + PINCTRL_PIN(143, "DQMN[0]"), /* J2 */ + PINCTRL_PIN(144, "DQMN[1]"), /* J3 */ + PINCTRL_PIN(145, "DQMN[2]"), /* J4 */ + PINCTRL_PIN(146, "GND"), /* J5 */ + PINCTRL_PIN(147, "GND"), /* J8 */ + PINCTRL_PIN(148, "GND"), /* J9 */ + PINCTRL_PIN(149, "GND"), /* J10 */ + PINCTRL_PIN(150, "GND"), /* J11 */ + PINCTRL_PIN(151, "GND"), /* J12 */ + PINCTRL_PIN(152, "GND"), /* J13 */ + PINCTRL_PIN(153, "CVDD"), /* J16 */ + PINCTRL_PIN(154, "RTCXTALI"), /* J17 */ + PINCTRL_PIN(155, "XM"), /* J18 */ + PINCTRL_PIN(156, "YP"), /* J19 */ + PINCTRL_PIN(157, "YM"), /* J20 */ + /* Row K */ + PINCTRL_PIN(158, "AD[22]"), /* K1 */ + PINCTRL_PIN(159, "DA[20]"), /* K2 */ + PINCTRL_PIN(160, "AD[21]"), /* K3 */ + PINCTRL_PIN(161, "DA[19]"), /* K4 */ + PINCTRL_PIN(162, "RVDD"), /* K5 */ + PINCTRL_PIN(163, "GND"), /* K8 */ + PINCTRL_PIN(164, "GND"), /* K9 */ + PINCTRL_PIN(165, "GND"), /* K10 */ + PINCTRL_PIN(166, "GND"), /* K11 */ + PINCTRL_PIN(167, "GND"), /* K12 */ + PINCTRL_PIN(168, "GND"), /* K13 */ + PINCTRL_PIN(169, "CVDD"), /* K16 */ + PINCTRL_PIN(170, "SYM"), /* K17 */ + PINCTRL_PIN(171, "SYP"), /* K18 */ + PINCTRL_PIN(172, "SXM"), /* K19 */ + PINCTRL_PIN(173, "SXP"), /* K20 */ + /* Row L */ + PINCTRL_PIN(174, "DA[18]"), /* L1 */ + PINCTRL_PIN(175, "DA[17]"), /* L2 */ + PINCTRL_PIN(176, "DA[16]"), /* L3 */ + PINCTRL_PIN(177, "DA[15]"), /* L4 */ + PINCTRL_PIN(178, "GND"), /* L5 */ + PINCTRL_PIN(179, "GND"), /* L8 */ + PINCTRL_PIN(180, "GND"), /* L9 */ + PINCTRL_PIN(181, "GND"), /* L10 */ + PINCTRL_PIN(182, "GND"), /* L11 */ + PINCTRL_PIN(183, "GND"), /* L12 */ + PINCTRL_PIN(184, "GND"), /* L13 */ + PINCTRL_PIN(185, "CVDD"), /* L16 */ + PINCTRL_PIN(186, "COL[5]"), /* L17 */ + PINCTRL_PIN(187, "COL[7]"), /* L18 */ + PINCTRL_PIN(188, "RSTON"), /* L19 */ + PINCTRL_PIN(189, "PRSTN"), /* L20 */ + /* Row M */ + PINCTRL_PIN(190, "AD[7]"), /* M1 */ + PINCTRL_PIN(191, "DA[14]"), /* M2 */ + PINCTRL_PIN(192, "AD[6]"), /* M3 */ + PINCTRL_PIN(193, "AD[5]"), /* M4 */ + PINCTRL_PIN(194, "CVDD"), /* M5 */ + PINCTRL_PIN(195, "GND"), /* M8 */ + PINCTRL_PIN(196, "GND"), /* M9 */ + PINCTRL_PIN(197, "GND"), /* M10 */ + PINCTRL_PIN(198, "GND"), /* M11 */ + PINCTRL_PIN(199, "GND"), /* M12 */ + PINCTRL_PIN(200, "GND"), /* M13 */ + PINCTRL_PIN(201, "GND"), /* M16 */ + PINCTRL_PIN(202, "COL[4]"), /* M17 */ + PINCTRL_PIN(203, "COL[3]"), /* M18 */ + PINCTRL_PIN(204, "COL[6]"), /* M19 */ + PINCTRL_PIN(205, "CSN[0]"), /* M20 */ + /* Row N */ + PINCTRL_PIN(206, "DA[13]"), /* N1 */ + PINCTRL_PIN(207, "DA[12]"), /* N2 */ + PINCTRL_PIN(208, "DA[11]"), /* N3 */ + PINCTRL_PIN(209, "AD[3]"), /* N4 */ + PINCTRL_PIN(210, "CVDD"), /* N5 */ + PINCTRL_PIN(211, "CVDD"), /* N6 */ + PINCTRL_PIN(212, "GND"), /* N8 */ + PINCTRL_PIN(213, "GND"), /* N9 */ + PINCTRL_PIN(214, "GND"), /* N10 */ + PINCTRL_PIN(215, "GND"), /* N11 */ + PINCTRL_PIN(216, "GND"), /* N12 */ + PINCTRL_PIN(217, "GND"), /* N13 */ + PINCTRL_PIN(218, "GND"), /* N15 */ + PINCTRL_PIN(219, "GND"), /* N16 */ + PINCTRL_PIN(220, "XTALO"), /* N17 */ + PINCTRL_PIN(221, "COL[0]"), /* N18 */ + PINCTRL_PIN(222, "COL[1]"), /* N19 */ + PINCTRL_PIN(223, "COL[2]"), /* N20 */ + /* Row P */ + PINCTRL_PIN(224, "AD[4]"), /* P1 */ + PINCTRL_PIN(225, "DA[10]"), /* P2 */ + PINCTRL_PIN(226, "DA[9]"), /* P3 */ + PINCTRL_PIN(227, "BRIGHT"), /* P4 */ + PINCTRL_PIN(228, "RVDD"), /* P5 */ + PINCTRL_PIN(229, "RVDD"), /* P6 */ + PINCTRL_PIN(230, "RVDD"), /* P15 */ + PINCTRL_PIN(231, "RVDD"), /* P16 */ + PINCTRL_PIN(232, "XTALI"), /* P17 */ + PINCTRL_PIN(233, "PLL_VDD"), /* P18 */ + PINCTRL_PIN(234, "ROW[6]"), /* P19 */ + PINCTRL_PIN(235, "ROW[7]"), /* P20 */ + /* Row R */ + PINCTRL_PIN(236, "AD[2]"), /* R1 */ + PINCTRL_PIN(237, "AD[1]"), /* R2 */ + PINCTRL_PIN(238, "P[17]"), /* R3 */ + PINCTRL_PIN(239, "P[14]"), /* R4 */ + PINCTRL_PIN(240, "RVDD"), /* R5 */ + PINCTRL_PIN(241, "RVDD"), /* R6 */ + PINCTRL_PIN(242, "GND"), /* R7 */ + PINCTRL_PIN(243, "CVDD"), /* R8 */ + PINCTRL_PIN(244, "CVDD"), /* R13 */ + PINCTRL_PIN(245, "GND"), /* R14 */ + PINCTRL_PIN(246, "RVDD"), /* R15 */ + PINCTRL_PIN(247, "RVDD"), /* R16 */ + PINCTRL_PIN(248, "ROW[0]"), /* R17 */ + PINCTRL_PIN(249, "ROW[3]"), /* R18 */ + PINCTRL_PIN(250, "PLL_GND"), /* R19 */ + PINCTRL_PIN(251, "ROW[5]"), /* R20 */ + /* Row T */ + PINCTRL_PIN(252, "DA[8]"), /* T1 */ + PINCTRL_PIN(253, "BLANK"), /* T2 */ + PINCTRL_PIN(254, "P[13]"), /* T3 */ + PINCTRL_PIN(255, "SPCLK"), /* T4 */ + PINCTRL_PIN(256, "V_CSYNC"), /* T5 */ + PINCTRL_PIN(257, "DD[14]"), /* T6 */ + PINCTRL_PIN(258, "GND"), /* T7 */ + PINCTRL_PIN(259, "CVDD"), /* T8 */ + PINCTRL_PIN(260, "RVDD"), /* T9 */ + PINCTRL_PIN(261, "GND"), /* T10 */ + PINCTRL_PIN(262, "GND"), /* T11 */ + PINCTRL_PIN(263, "RVDD"), /* T12 */ + PINCTRL_PIN(264, "CVDD"), /* T13 */ + PINCTRL_PIN(265, "GND"), /* T14 */ + PINCTRL_PIN(266, "INT[0]"), /* T15 */ + PINCTRL_PIN(267, "USBM[1]"), /* T16 */ + PINCTRL_PIN(268, "RXD[0]"), /* T17 */ + PINCTRL_PIN(269, "TXD[2]"), /* T18 */ + PINCTRL_PIN(270, "ROW[2]"), /* T19 */ + PINCTRL_PIN(271, "ROW[4]"), /* T20 */ + /* Row U */ + PINCTRL_PIN(272, "AD[0]"), /* U1 */ + PINCTRL_PIN(273, "P[15]"), /* U2 */ + PINCTRL_PIN(274, "P[10]"), /* U3 */ + PINCTRL_PIN(275, "P[7]"), /* U4 */ + PINCTRL_PIN(276, "P[6]"), /* U5 */ + PINCTRL_PIN(277, "P[4]"), /* U6 */ + PINCTRL_PIN(278, "P[0]"), /* U7 */ + PINCTRL_PIN(279, "AD[13]"), /* U8 */ + PINCTRL_PIN(280, "DA[3]"), /* U9 */ + PINCTRL_PIN(281, "DA[0]"), /* U10 */ + PINCTRL_PIN(282, "DSRN"), /* U11 */ + PINCTRL_PIN(283, "BOOT[1]"), /* U12 */ + PINCTRL_PIN(284, "NC"), /* U13 */ + PINCTRL_PIN(285, "SSPRX1"), /* U14 */ + PINCTRL_PIN(286, "INT[1]"), /* U15 */ + PINCTRL_PIN(287, "PWMOUT"), /* U16 */ + PINCTRL_PIN(288, "USBM[0]"), /* U17 */ + PINCTRL_PIN(289, "RXD[1]"), /* U18 */ + PINCTRL_PIN(290, "TXD[1]"), /* U19 */ + PINCTRL_PIN(291, "ROW[1]"), /* U20 */ + /* Row V */ + PINCTRL_PIN(292, "P[16]"), /* V1 */ + PINCTRL_PIN(293, "P[11]"), /* V2 */ + PINCTRL_PIN(294, "P[8]"), /* V3 */ + PINCTRL_PIN(295, "DD[15]"), /* V4 */ + PINCTRL_PIN(296, "DD[13]"), /* V5 */ + PINCTRL_PIN(297, "P[1]"), /* V6 */ + PINCTRL_PIN(298, "AD[14]"), /* V7 */ + PINCTRL_PIN(299, "AD[12]"), /* V8 */ + PINCTRL_PIN(300, "DA[2]"), /* V9 */ + PINCTRL_PIN(301, "IDECS0N"), /* V10 */ + PINCTRL_PIN(302, "IDEDA[2]"), /* V11 */ + PINCTRL_PIN(303, "TDI"), /* V12 */ + PINCTRL_PIN(304, "GND"), /* V13 */ + PINCTRL_PIN(305, "ASYNC"), /* V14 */ + PINCTRL_PIN(306, "SSPTX1"), /* V15 */ + PINCTRL_PIN(307, "INT[2]"), /* V16 */ + PINCTRL_PIN(308, "RTSN"), /* V17 */ + PINCTRL_PIN(309, "USBP[0]"), /* V18 */ + PINCTRL_PIN(310, "CTSN"), /* V19 */ + PINCTRL_PIN(311, "TXD[0]"), /* V20 */ + /* Row W */ + PINCTRL_PIN(312, "P[12]"), /* W1 */ + PINCTRL_PIN(313, "P[9]"), /* W2 */ + PINCTRL_PIN(314, "DD[0]"), /* W3 */ + PINCTRL_PIN(315, "P[5]"), /* W4 */ + PINCTRL_PIN(316, "P[3]"), /* W5 */ + PINCTRL_PIN(317, "DA[7]"), /* W6 */ + PINCTRL_PIN(318, "DA[5]"), /* W7 */ + PINCTRL_PIN(319, "AD[11]"), /* W8 */ + PINCTRL_PIN(320, "AD[9]"), /* W9 */ + PINCTRL_PIN(321, "IDECS1N"), /* W10 */ + PINCTRL_PIN(322, "IDEDA[1]"), /* W11 */ + PINCTRL_PIN(323, "TCK"), /* W12 */ + PINCTRL_PIN(324, "TMS"), /* W13 */ + PINCTRL_PIN(325, "EECLK"), /* W14 */ + PINCTRL_PIN(326, "SCLK1"), /* W15 */ + PINCTRL_PIN(327, "GRLED"), /* W16 */ + PINCTRL_PIN(328, "INT[3]"), /* W17 */ + PINCTRL_PIN(329, "SLA[1]"), /* W18 */ + PINCTRL_PIN(330, "SLA[0]"), /* W19 */ + PINCTRL_PIN(331, "RXD[2]"), /* W20 */ + /* Row Y */ + PINCTRL_PIN(332, "HSYNC"), /* Y1 */ + PINCTRL_PIN(333, "DD[1]"), /* Y2 */ + PINCTRL_PIN(334, "DD[12]"), /* Y3 */ + PINCTRL_PIN(335, "P[2]"), /* Y4 */ + PINCTRL_PIN(336, "AD[15]"), /* Y5 */ + PINCTRL_PIN(337, "DA[6]"), /* Y6 */ + PINCTRL_PIN(338, "DA[4]"), /* Y7 */ + PINCTRL_PIN(339, "AD[10]"), /* Y8 */ + PINCTRL_PIN(340, "DA[1]"), /* Y9 */ + PINCTRL_PIN(341, "AD[8]"), /* Y10 */ + PINCTRL_PIN(342, "IDEDA[0]"), /* Y11 */ + PINCTRL_PIN(343, "DTRN"), /* Y12 */ + PINCTRL_PIN(344, "TDO"), /* Y13 */ + PINCTRL_PIN(345, "BOOT[0]"), /* Y14 */ + PINCTRL_PIN(346, "EEDAT"), /* Y15 */ + PINCTRL_PIN(347, "ASDO"), /* Y16 */ + PINCTRL_PIN(348, "SFRM1"), /* Y17 */ + PINCTRL_PIN(349, "RDLED"), /* Y18 */ + PINCTRL_PIN(350, "USBP[1]"), /* Y19 */ + PINCTRL_PIN(351, "ABITCLK"), /* Y20 */ +}; + +static const unsigned int ssp_ep9312_pins[] = { + 285, 306, 326, 348, +}; + +static const unsigned int ac97_ep9312_pins[] = { + 77, 95, 305, 347, 351, +}; + +static const unsigned int pwm_ep9312_pins[] = { 74 }; + +static const unsigned int gpio1a_ep9312_pins[] = { 74 }; + +static const unsigned int gpio2a_9312_pins[] = { + 234, 235, 248, 249, 251, 270, 271, 291, +}; + +static const unsigned int gpio3a_9312_pins[] = { + 186, 187, 202, 203, 204, 221, 222, 223, +}; + +static const unsigned int keypad_9312_pins[] = { + 186, 187, 202, 203, 204, 221, 222, 223, + 234, 235, 248, 249, 251, 270, 271, 291, +}; + +static const unsigned int gpio4a_9312_pins[] = { + 78, 301, 302, 321, 322, 342, +}; + +static const unsigned int gpio6a_9312_pins[] = { + 257, 295, 296, 334, +}; + +static const unsigned int gpio7a_9312_pins[] = { + 4, 24, 25, 45, 46, 66, 314, 333, +}; + +static const unsigned int ide_9312_pins[] = { + 78, 301, 302, 321, 322, 342, 257, 295, + 296, 334, 4, 24, 25, 45, 46, 66, + 314, 333, +}; + +static const struct ep93xx_pin_group ep9312_pin_groups[] = { + PMX_GROUP("ssp", ssp_ep9312_pins, EP93XX_SYSCON_DEVCFG_I2SONSSP, 0), + PMX_GROUP("i2s_on_ssp", ssp_ep9312_pins, EP93XX_SYSCON_DEVCFG_I2SONSSP, + EP93XX_SYSCON_DEVCFG_I2SONSSP), + PMX_GROUP("pwm1", pwm_ep9312_pins, EP93XX_SYSCON_DEVCFG_PONG, + EP93XX_SYSCON_DEVCFG_PONG), + PMX_GROUP("gpio1agrp", gpio1a_ep9312_pins, EP93XX_SYSCON_DEVCFG_PONG, 0), + PMX_GROUP("ac97", ac97_ep9312_pins, EP93XX_SYSCON_DEVCFG_I2SONAC97, 0), + PMX_GROUP("i2s_on_ac97", ac97_ep9312_pins, EP93XX_SYSCON_DEVCFG_I2SONAC97, + EP93XX_SYSCON_DEVCFG_I2SONAC97), + PMX_GROUP("rasteronsdram0grp", raster_on_sdram0_pins, EP93XX_SYSCON_DEVCFG_RASONP3, 0), + PMX_GROUP("rasteronsdram3grp", raster_on_sdram0_pins, EP93XX_SYSCON_DEVCFG_RASONP3, + EP93XX_SYSCON_DEVCFG_RASONP3), + PMX_GROUP("gpio2agrp", gpio2a_9312_pins, EP93XX_SYSCON_DEVCFG_GONK, + EP93XX_SYSCON_DEVCFG_GONK), + PMX_GROUP("gpio3agrp", gpio3a_9312_pins, EP93XX_SYSCON_DEVCFG_GONK, + EP93XX_SYSCON_DEVCFG_GONK), + PMX_GROUP("keypadgrp", keypad_9312_pins, EP93XX_SYSCON_DEVCFG_GONK, 0), + PMX_GROUP("gpio4agrp", gpio4a_9312_pins, EP93XX_SYSCON_DEVCFG_EONIDE, + EP93XX_SYSCON_DEVCFG_EONIDE), + PMX_GROUP("gpio6agrp", gpio6a_9312_pins, EP93XX_SYSCON_DEVCFG_GONIDE, + EP93XX_SYSCON_DEVCFG_GONIDE), + PMX_GROUP("gpio7agrp", gpio7a_9312_pins, EP93XX_SYSCON_DEVCFG_HONIDE, + EP93XX_SYSCON_DEVCFG_HONIDE), + PMX_GROUP("idegrp", ide_9312_pins, EP93XX_SYSCON_DEVCFG_EONIDE | + EP93XX_SYSCON_DEVCFG_GONIDE | EP93XX_SYSCON_DEVCFG_HONIDE, 0), +}; + +static int ep93xx_get_groups_count(struct pinctrl_dev *pctldev) +{ + struct ep93xx_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); + + switch (pmx->model) { + case EP93XX_9301_PINCTRL: + return ARRAY_SIZE(ep9301_pin_groups); + case EP93XX_9307_PINCTRL: + return ARRAY_SIZE(ep9307_pin_groups); + case EP93XX_9312_PINCTRL: + return ARRAY_SIZE(ep9312_pin_groups); + default: + return 0; + } +} + +static const char *ep93xx_get_group_name(struct pinctrl_dev *pctldev, + unsigned int selector) +{ + struct ep93xx_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); + + switch (pmx->model) { + case EP93XX_9301_PINCTRL: + return ep9301_pin_groups[selector].grp.name; + case EP93XX_9307_PINCTRL: + return ep9307_pin_groups[selector].grp.name; + case EP93XX_9312_PINCTRL: + return ep9312_pin_groups[selector].grp.name; + default: + return NULL; + } +} + +static int ep93xx_get_group_pins(struct pinctrl_dev *pctldev, + unsigned int selector, + const unsigned int **pins, + unsigned int *num_pins) +{ + struct ep93xx_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); + + switch (pmx->model) { + case EP93XX_9301_PINCTRL: + *pins = ep9301_pin_groups[selector].grp.pins; + *num_pins = ep9301_pin_groups[selector].grp.npins; + break; + case EP93XX_9307_PINCTRL: + *pins = ep9307_pin_groups[selector].grp.pins; + *num_pins = ep9307_pin_groups[selector].grp.npins; + break; + case EP93XX_9312_PINCTRL: + *pins = ep9312_pin_groups[selector].grp.pins; + *num_pins = ep9312_pin_groups[selector].grp.npins; + break; + default: + return -EINVAL; + } + + return 0; +} + +static const struct pinctrl_ops ep93xx_pctrl_ops = { + .get_groups_count = ep93xx_get_groups_count, + .get_group_name = ep93xx_get_group_name, + .get_group_pins = ep93xx_get_group_pins, + .dt_node_to_map = pinconf_generic_dt_node_to_map_all, + .dt_free_map = pinconf_generic_dt_free_map, +}; + +static const char * const spigrps[] = { "ssp" }; +static const char * const ac97grps[] = { "ac97" }; +static const char * const i2sgrps[] = { "i2s_on_ssp", "i2s_on_ac97" }; +static const char * const pwm1grps[] = { "pwm1" }; +static const char * const gpiogrps[] = { "gpio1agrp", "gpio2agrp", "gpio3agrp", + "gpio4agrp", "gpio6agrp", "gpio7agrp" }; +static const char * const rastergrps[] = { "rasteronsdram0grp", "rasteronsdram3grp"}; +static const char * const keypadgrps[] = { "keypadgrp"}; +static const char * const idegrps[] = { "idegrp"}; + +static const struct pinfunction ep93xx_pmx_functions[] = { + PINCTRL_PINFUNCTION("spi", spigrps, ARRAY_SIZE(spigrps)), + PINCTRL_PINFUNCTION("ac97", ac97grps, ARRAY_SIZE(ac97grps)), + PINCTRL_PINFUNCTION("i2s", i2sgrps, ARRAY_SIZE(i2sgrps)), + PINCTRL_PINFUNCTION("pwm", pwm1grps, ARRAY_SIZE(pwm1grps)), + PINCTRL_PINFUNCTION("keypad", keypadgrps, ARRAY_SIZE(keypadgrps)), + PINCTRL_PINFUNCTION("pata", idegrps, ARRAY_SIZE(idegrps)), + PINCTRL_PINFUNCTION("lcd", rastergrps, ARRAY_SIZE(rastergrps)), + PINCTRL_PINFUNCTION("gpio", gpiogrps, ARRAY_SIZE(gpiogrps)), +}; + +static int ep93xx_pmx_set_mux(struct pinctrl_dev *pctldev, + unsigned int selector, + unsigned int group) +{ + struct ep93xx_pmx *pmx; + const struct pinfunction *func; + const struct ep93xx_pin_group *grp; + u32 before, after, expected; + unsigned long tmp; + int i; + + pmx = pinctrl_dev_get_drvdata(pctldev); + + switch (pmx->model) { + case EP93XX_9301_PINCTRL: + grp = &ep9301_pin_groups[group]; + break; + case EP93XX_9307_PINCTRL: + grp = &ep9307_pin_groups[group]; + break; + case EP93XX_9312_PINCTRL: + grp = &ep9312_pin_groups[group]; + break; + default: + dev_err(pmx->dev, "invalid SoC type\n"); + return -ENODEV; + } + + func = &ep93xx_pmx_functions[selector]; + + dev_dbg(pmx->dev, + "ACTIVATE function \"%s\" with group \"%s\" (mask=0x%x, value=0x%x)\n", + func->name, grp->grp.name, grp->mask, grp->value); + + regmap_read(pmx->map, EP93XX_SYSCON_DEVCFG, &before); + ep93xx_pinctrl_update_bits(pmx, EP93XX_SYSCON_DEVCFG, + grp->mask, grp->value); + regmap_read(pmx->map, EP93XX_SYSCON_DEVCFG, &after); + + dev_dbg(pmx->dev, "before=0x%x, after=0x%x, mask=0x%lx\n", + before, after, PADS_MASK); + + /* Which bits changed */ + before &= PADS_MASK; + after &= PADS_MASK; + expected = before & ~grp->mask; + expected |= grp->value; + expected &= PADS_MASK; + + /* Print changed states */ + tmp = expected ^ after; + for_each_set_bit(i, &tmp, PADS_MAXBIT) { + bool enabled = expected & BIT(i); + + dev_err(pmx->dev, + "pin group %s could not be %s: probably a hardware limitation\n", + ep93xx_padgroups[i], str_enabled_disabled(enabled)); + dev_err(pmx->dev, + "DeviceCfg before: %08x, after %08x, expected %08x\n", + before, after, expected); + } + + return tmp ? -EINVAL : 0; +}; + +static int ep93xx_pmx_get_funcs_count(struct pinctrl_dev *pctldev) +{ + return ARRAY_SIZE(ep93xx_pmx_functions); +} + +static const char *ep93xx_pmx_get_func_name(struct pinctrl_dev *pctldev, + unsigned int selector) +{ + return ep93xx_pmx_functions[selector].name; +} + +static int ep93xx_pmx_get_groups(struct pinctrl_dev *pctldev, + unsigned int selector, + const char * const **groups, + unsigned int * const num_groups) +{ + *groups = ep93xx_pmx_functions[selector].groups; + *num_groups = ep93xx_pmx_functions[selector].ngroups; + return 0; +} + +static const struct pinmux_ops ep93xx_pmx_ops = { + .get_functions_count = ep93xx_pmx_get_funcs_count, + .get_function_name = ep93xx_pmx_get_func_name, + .get_function_groups = ep93xx_pmx_get_groups, + .set_mux = ep93xx_pmx_set_mux, +}; + +static struct pinctrl_desc ep93xx_pmx_desc = { + .name = DRIVER_NAME, + .pctlops = &ep93xx_pctrl_ops, + .pmxops = &ep93xx_pmx_ops, + .owner = THIS_MODULE, +}; + +static int ep93xx_pmx_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct ep93xx_regmap_adev *rdev = to_ep93xx_regmap_adev(adev); + struct device *dev = &adev->dev; + struct ep93xx_pmx *pmx; + + /* Create state holders etc for this driver */ + pmx = devm_kzalloc(dev, sizeof(*pmx), GFP_KERNEL); + if (!pmx) + return -ENOMEM; + + pmx->dev = dev; + pmx->map = rdev->map; + pmx->aux_dev = rdev; + pmx->model = (enum ep93xx_pinctrl_model)(uintptr_t)id->driver_data; + switch (pmx->model) { + case EP93XX_9301_PINCTRL: + ep93xx_pmx_desc.pins = ep9301_pins; + ep93xx_pmx_desc.npins = ARRAY_SIZE(ep9301_pins); + dev_info(dev, "detected 9301/9302 chip variant\n"); + break; + case EP93XX_9307_PINCTRL: + ep93xx_pmx_desc.pins = ep9307_pins; + ep93xx_pmx_desc.npins = ARRAY_SIZE(ep9307_pins); + dev_info(dev, "detected 9307 chip variant\n"); + break; + case EP93XX_9312_PINCTRL: + ep93xx_pmx_desc.pins = ep9312_pins; + ep93xx_pmx_desc.npins = ARRAY_SIZE(ep9312_pins); + dev_info(dev, "detected 9312/9315 chip variant\n"); + break; + default: + return dev_err_probe(dev, -EINVAL, "unknown pin control model: %u\n", pmx->model); + } + + /* using parent of_node to match in get_pinctrl_dev_from_of_node() */ + device_set_node(dev, dev_fwnode(adev->dev.parent)); + pmx->pctl = devm_pinctrl_register(dev, &ep93xx_pmx_desc, pmx); + if (IS_ERR(pmx->pctl)) + return dev_err_probe(dev, PTR_ERR(pmx->pctl), "could not register pinmux driver\n"); + + return 0; +}; + +static const struct auxiliary_device_id ep93xx_pinctrl_ids[] = { + { + .name = "soc_ep93xx.pinctrl-ep9301", + .driver_data = (kernel_ulong_t)EP93XX_9301_PINCTRL, + }, + { + .name = "soc_ep93xx.pinctrl-ep9307", + .driver_data = (kernel_ulong_t)EP93XX_9307_PINCTRL, + }, + { + .name = "soc_ep93xx.pinctrl-ep9312", + .driver_data = (kernel_ulong_t)EP93XX_9312_PINCTRL, + }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(auxiliary, ep93xx_pinctrl_ids); + +static struct auxiliary_driver ep93xx_pmx_driver = { + .probe = ep93xx_pmx_probe, + .id_table = ep93xx_pinctrl_ids, +}; +module_auxiliary_driver(ep93xx_pmx_driver); From 9fa7cdb4368f5da184e5050856ca3329318de1ed Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:30 +0300 Subject: [PATCH 216/655] power: reset: Add a driver for the ep93xx reset Implement the reset behaviour of the various EP93xx SoCS in drivers/power/reset. It used to be located in arch/arm/mach-ep93xx. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Acked-by: Sebastian Reichel Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- drivers/power/reset/Kconfig | 10 ++++ drivers/power/reset/Makefile | 1 + drivers/power/reset/ep93xx-restart.c | 84 ++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 drivers/power/reset/ep93xx-restart.c diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index fece990af4a7..389d5a193e5d 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -75,6 +75,16 @@ config POWER_RESET_BRCMSTB Say Y here if you have a Broadcom STB board and you wish to have restart support. +config POWER_RESET_EP93XX + bool "Cirrus EP93XX reset driver" if COMPILE_TEST + depends on MFD_SYSCON + default ARCH_EP93XX + help + This driver provides restart support for Cirrus EP93XX SoC. + + Say Y here if you have a Cirrus EP93XX SoC and you wish + to have restart support. + config POWER_RESET_GEMINI_POWEROFF bool "Cortina Gemini power-off driver" depends on ARCH_GEMINI || COMPILE_TEST diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile index a95d1bd275d1..10782d32e1da 100644 --- a/drivers/power/reset/Makefile +++ b/drivers/power/reset/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_POWER_RESET_ATC260X) += atc260x-poweroff.o obj-$(CONFIG_POWER_RESET_AXXIA) += axxia-reset.o obj-$(CONFIG_POWER_RESET_BRCMKONA) += brcm-kona-reset.o obj-$(CONFIG_POWER_RESET_BRCMSTB) += brcmstb-reboot.o +obj-$(CONFIG_POWER_RESET_EP93XX) += ep93xx-restart.o obj-$(CONFIG_POWER_RESET_GEMINI_POWEROFF) += gemini-poweroff.o obj-$(CONFIG_POWER_RESET_GPIO) += gpio-poweroff.o obj-$(CONFIG_POWER_RESET_GPIO_RESTART) += gpio-restart.o diff --git a/drivers/power/reset/ep93xx-restart.c b/drivers/power/reset/ep93xx-restart.c new file mode 100644 index 000000000000..57cfb8620faf --- /dev/null +++ b/drivers/power/reset/ep93xx-restart.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Cirrus EP93xx SoC reset driver + * + * Copyright (C) 2021 Nikita Shubin + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define EP93XX_SYSCON_DEVCFG 0x80 +#define EP93XX_SYSCON_DEVCFG_SWRST BIT(31) + +struct ep93xx_restart { + struct ep93xx_regmap_adev *aux_dev; + struct notifier_block restart_handler; +}; + +static int ep93xx_restart_handle(struct notifier_block *this, + unsigned long mode, void *cmd) +{ + struct ep93xx_restart *priv = + container_of(this, struct ep93xx_restart, restart_handler); + struct ep93xx_regmap_adev *aux = priv->aux_dev; + + /* Issue the reboot */ + aux->update_bits(aux->map, aux->lock, EP93XX_SYSCON_DEVCFG, + EP93XX_SYSCON_DEVCFG_SWRST, EP93XX_SYSCON_DEVCFG_SWRST); + aux->update_bits(aux->map, aux->lock, EP93XX_SYSCON_DEVCFG, + EP93XX_SYSCON_DEVCFG_SWRST, 0); + + return NOTIFY_DONE; +} + +static int ep93xx_reboot_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct ep93xx_regmap_adev *rdev = to_ep93xx_regmap_adev(adev); + struct device *dev = &adev->dev; + struct ep93xx_restart *priv; + int err; + + if (!rdev->update_bits) + return -ENODEV; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->aux_dev = rdev; + + priv->restart_handler.notifier_call = ep93xx_restart_handle; + priv->restart_handler.priority = 128; + + err = register_restart_handler(&priv->restart_handler); + if (err) + return dev_err_probe(dev, err, "can't register restart notifier\n"); + + return 0; +} + +static const struct auxiliary_device_id ep93xx_reboot_ids[] = { + { + .name = "soc_ep93xx.reset-ep93xx", + }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(auxiliary, ep93xx_reboot_ids); + +static struct auxiliary_driver ep93xx_reboot_driver = { + .probe = ep93xx_reboot_probe, + .id_table = ep93xx_reboot_ids, +}; +module_auxiliary_driver(ep93xx_reboot_driver); From eeb3dd5b32e68989bae1a3d4420204cf3a90ce73 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:31 +0300 Subject: [PATCH 217/655] dt-bindings: soc: Add Cirrus EP93xx Add device tree bindings for the Cirrus Logic EP93xx SoC. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Stephen Boyd Reviewed-by: Andy Shevchenko Reviewed-by: Guenter Roeck Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- .../bindings/arm/cirrus/cirrus,ep9301.yaml | 38 ++++++++ .../soc/cirrus/cirrus,ep9301-syscon.yaml | 94 +++++++++++++++++++ .../dt-bindings/clock/cirrus,ep9301-syscon.h | 46 +++++++++ 3 files changed, 178 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/cirrus/cirrus,ep9301.yaml create mode 100644 Documentation/devicetree/bindings/soc/cirrus/cirrus,ep9301-syscon.yaml create mode 100644 include/dt-bindings/clock/cirrus,ep9301-syscon.h diff --git a/Documentation/devicetree/bindings/arm/cirrus/cirrus,ep9301.yaml b/Documentation/devicetree/bindings/arm/cirrus/cirrus,ep9301.yaml new file mode 100644 index 000000000000..170aad5dd7ed --- /dev/null +++ b/Documentation/devicetree/bindings/arm/cirrus/cirrus,ep9301.yaml @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/cirrus/cirrus,ep9301.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cirrus Logic EP93xx platforms + +description: + The EP93xx SoC is a ARMv4T-based with 200 MHz ARM9 CPU. + +maintainers: + - Alexander Sverdlin + - Nikita Shubin + +properties: + $nodename: + const: '/' + compatible: + oneOf: + - description: The TS-7250 is a compact, full-featured Single Board + Computer (SBC) based upon the Cirrus EP9302 ARM9 CPU + items: + - const: technologic,ts7250 + - const: cirrus,ep9301 + + - description: The Liebherr BK3 is a derivate from ts7250 board + items: + - const: liebherr,bk3 + - const: cirrus,ep9301 + + - description: EDB302 is an evaluation board by Cirrus Logic, + based on a Cirrus Logic EP9302 CPU + items: + - const: cirrus,edb9302 + - const: cirrus,ep9301 + +additionalProperties: true diff --git a/Documentation/devicetree/bindings/soc/cirrus/cirrus,ep9301-syscon.yaml b/Documentation/devicetree/bindings/soc/cirrus/cirrus,ep9301-syscon.yaml new file mode 100644 index 000000000000..7cb1b4114985 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/cirrus/cirrus,ep9301-syscon.yaml @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/soc/cirrus/cirrus,ep9301-syscon.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cirrus Logic EP93xx Platforms System Controller + +maintainers: + - Alexander Sverdlin + - Nikita Shubin + +description: | + Central resources are controlled by a set of software-locked registers, + which can be used to prevent accidental accesses. Syscon generates + the various bus and peripheral clocks and controls the system startup + configuration. + + The System Controller (Syscon) provides: + - Clock control + - Power management + - System configuration management + + Syscon registers are common for all EP93xx SoC's, through some actual peripheral + may be missing depending on actual SoC model. + +properties: + compatible: + oneOf: + - items: + - enum: + - cirrus,ep9302-syscon + - cirrus,ep9307-syscon + - cirrus,ep9312-syscon + - cirrus,ep9315-syscon + - const: cirrus,ep9301-syscon + - const: syscon + - items: + - const: cirrus,ep9301-syscon + - const: syscon + + reg: + maxItems: 1 + + "#clock-cells": + const: 1 + + clocks: + items: + - description: reference clock + +patternProperties: + '^pins-': + type: object + description: pin node + $ref: /schemas/pinctrl/pinmux-node.yaml + + properties: + function: + enum: [ spi, ac97, i2s, pwm, keypad, pata, lcd, gpio ] + + groups: + enum: [ ssp, ac97, i2s_on_ssp, i2s_on_ac97, pwm1, gpio1agrp, + gpio2agrp, gpio3agrp, gpio4agrp, gpio6agrp, gpio7agrp, + rasteronsdram0grp, rasteronsdram3grp, keypadgrp, idegrp ] + + required: + - function + - groups + + unevaluatedProperties: false + +required: + - compatible + - reg + - "#clock-cells" + - clocks + +additionalProperties: false + +examples: + - | + syscon@80930000 { + compatible = "cirrus,ep9301-syscon", "syscon"; + reg = <0x80930000 0x1000>; + + #clock-cells = <1>; + clocks = <&xtali>; + + spi_default_pins: pins-spi { + function = "spi"; + groups = "ssp"; + }; + }; diff --git a/include/dt-bindings/clock/cirrus,ep9301-syscon.h b/include/dt-bindings/clock/cirrus,ep9301-syscon.h new file mode 100644 index 000000000000..6bb8f532e7d0 --- /dev/null +++ b/include/dt-bindings/clock/cirrus,ep9301-syscon.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +#ifndef DT_BINDINGS_CIRRUS_EP93XX_CLOCK_H +#define DT_BINDINGS_CIRRUS_EP93XX_CLOCK_H + +#define EP93XX_CLK_PLL1 0 +#define EP93XX_CLK_PLL2 1 + +#define EP93XX_CLK_FCLK 2 +#define EP93XX_CLK_HCLK 3 +#define EP93XX_CLK_PCLK 4 + +#define EP93XX_CLK_UART 5 +#define EP93XX_CLK_SPI 6 +#define EP93XX_CLK_PWM 7 +#define EP93XX_CLK_USB 8 + +#define EP93XX_CLK_M2M0 9 +#define EP93XX_CLK_M2M1 10 + +#define EP93XX_CLK_M2P0 11 +#define EP93XX_CLK_M2P1 12 +#define EP93XX_CLK_M2P2 13 +#define EP93XX_CLK_M2P3 14 +#define EP93XX_CLK_M2P4 15 +#define EP93XX_CLK_M2P5 16 +#define EP93XX_CLK_M2P6 17 +#define EP93XX_CLK_M2P7 18 +#define EP93XX_CLK_M2P8 19 +#define EP93XX_CLK_M2P9 20 + +#define EP93XX_CLK_UART1 21 +#define EP93XX_CLK_UART2 22 +#define EP93XX_CLK_UART3 23 + +#define EP93XX_CLK_ADC 24 +#define EP93XX_CLK_ADC_EN 25 + +#define EP93XX_CLK_KEYPAD 26 + +#define EP93XX_CLK_VIDEO 27 + +#define EP93XX_CLK_I2S_MCLK 28 +#define EP93XX_CLK_I2S_SCLK 29 +#define EP93XX_CLK_I2S_LRCLK 30 + +#endif /* DT_BINDINGS_CIRRUS_EP93XX_CLOCK_H */ From 6eab0ce6e1c6358f4fb3d9f301bfcf3d527f3da9 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:32 +0300 Subject: [PATCH 218/655] soc: Add SoC driver for Cirrus ep93xx Add an SoC driver for the ep93xx. Currently there is only one thing not fitting into any other framework, and that is the swlock setting. Used for clock settings, pinctrl and restart. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Linus Walleij Acked-by: Alexander Sverdlin Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- drivers/soc/Kconfig | 1 + drivers/soc/Makefile | 1 + drivers/soc/cirrus/Kconfig | 17 +++ drivers/soc/cirrus/Makefile | 2 + drivers/soc/cirrus/soc-ep93xx.c | 252 ++++++++++++++++++++++++++++++++ 5 files changed, 273 insertions(+) create mode 100644 drivers/soc/cirrus/Kconfig create mode 100644 drivers/soc/cirrus/Makefile create mode 100644 drivers/soc/cirrus/soc-ep93xx.c diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig index 5d924e946507..6a8daeb8c4b9 100644 --- a/drivers/soc/Kconfig +++ b/drivers/soc/Kconfig @@ -7,6 +7,7 @@ source "drivers/soc/aspeed/Kconfig" source "drivers/soc/atmel/Kconfig" source "drivers/soc/bcm/Kconfig" source "drivers/soc/canaan/Kconfig" +source "drivers/soc/cirrus/Kconfig" source "drivers/soc/fsl/Kconfig" source "drivers/soc/fujitsu/Kconfig" source "drivers/soc/hisilicon/Kconfig" diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile index fb2bd31387d0..b10a52a91fe4 100644 --- a/drivers/soc/Makefile +++ b/drivers/soc/Makefile @@ -8,6 +8,7 @@ obj-y += aspeed/ obj-$(CONFIG_ARCH_AT91) += atmel/ obj-y += bcm/ obj-$(CONFIG_ARCH_CANAAN) += canaan/ +obj-$(CONFIG_EP93XX_SOC) += cirrus/ obj-$(CONFIG_ARCH_DOVE) += dove/ obj-$(CONFIG_MACH_DOVE) += dove/ obj-y += fsl/ diff --git a/drivers/soc/cirrus/Kconfig b/drivers/soc/cirrus/Kconfig new file mode 100644 index 000000000000..f2fd0e16a196 --- /dev/null +++ b/drivers/soc/cirrus/Kconfig @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-only + +if ARCH_EP93XX + +config EP93XX_SOC + bool "Cirrus EP93xx chips SoC" + select SOC_BUS + select AUXILIARY_BUS + default y if !EP93XX_SOC_COMMON + help + Enable support SoC for Cirrus EP93xx chips. + + Cirrus EP93xx chips have several swlocked registers, + this driver provides locked access for reset, pinctrl + and clk devices implemented as auxiliary devices. + +endif diff --git a/drivers/soc/cirrus/Makefile b/drivers/soc/cirrus/Makefile new file mode 100644 index 000000000000..9e6608b67f76 --- /dev/null +++ b/drivers/soc/cirrus/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += soc-ep93xx.o diff --git a/drivers/soc/cirrus/soc-ep93xx.c b/drivers/soc/cirrus/soc-ep93xx.c new file mode 100644 index 000000000000..3e79b3b13aef --- /dev/null +++ b/drivers/soc/cirrus/soc-ep93xx.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * SoC driver for Cirrus EP93xx chips. + * Copyright (C) 2022 Nikita Shubin + * + * Based on a rewrite of arch/arm/mach-ep93xx/core.c + * Copyright (C) 2006 Lennert Buytenhek + * Copyright (C) 2007 Herbert Valerio Riedel + * + * Thanks go to Michael Burian and Ray Lehtiniemi for their key + * role in the ep93xx Linux community. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define EP93XX_SYSCON_DEVCFG 0x80 + +#define EP93XX_SWLOCK_MAGICK 0xaa +#define EP93XX_SYSCON_SWLOCK 0xc0 +#define EP93XX_SYSCON_SYSCFG 0x9c +#define EP93XX_SYSCON_SYSCFG_REV_MASK GENMASK(31, 28) +#define EP93XX_SYSCON_SYSCFG_REV_SHIFT 28 + +struct ep93xx_map_info { + spinlock_t lock; + void __iomem *base; + struct regmap *map; +}; + +/* + * EP93xx System Controller software locked register write + * + * Logic safeguards are included to condition the control signals for + * power connection to the matrix to prevent part damage. In addition, a + * software lock register is included that must be written with 0xAA + * before each register write to change the values of the four switch + * matrix control registers. + */ +static void ep93xx_regmap_write(struct regmap *map, spinlock_t *lock, + unsigned int reg, unsigned int val) +{ + guard(spinlock_irqsave)(lock); + + regmap_write(map, EP93XX_SYSCON_SWLOCK, EP93XX_SWLOCK_MAGICK); + regmap_write(map, reg, val); +} + +static void ep93xx_regmap_update_bits(struct regmap *map, spinlock_t *lock, + unsigned int reg, unsigned int mask, + unsigned int val) +{ + guard(spinlock_irqsave)(lock); + + regmap_write(map, EP93XX_SYSCON_SWLOCK, EP93XX_SWLOCK_MAGICK); + /* force write is required to clear swlock if no changes are made */ + regmap_update_bits_base(map, reg, mask, val, NULL, false, true); +} + +static void ep93xx_unregister_adev(void *_adev) +{ + struct auxiliary_device *adev = _adev; + + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); +} + +static void ep93xx_adev_release(struct device *dev) +{ + struct auxiliary_device *adev = to_auxiliary_dev(dev); + struct ep93xx_regmap_adev *rdev = to_ep93xx_regmap_adev(adev); + + kfree(rdev); +} + +static struct auxiliary_device __init *ep93xx_adev_alloc(struct device *parent, + const char *name, + struct ep93xx_map_info *info) +{ + struct ep93xx_regmap_adev *rdev __free(kfree) = NULL; + struct auxiliary_device *adev; + int ret; + + rdev = kzalloc(sizeof(*rdev), GFP_KERNEL); + if (!rdev) + return ERR_PTR(-ENOMEM); + + rdev->map = info->map; + rdev->base = info->base; + rdev->lock = &info->lock; + rdev->write = ep93xx_regmap_write; + rdev->update_bits = ep93xx_regmap_update_bits; + + adev = &rdev->adev; + adev->name = name; + adev->dev.parent = parent; + adev->dev.release = ep93xx_adev_release; + + ret = auxiliary_device_init(adev); + if (ret) + return ERR_PTR(ret); + + return &no_free_ptr(rdev)->adev; +} + +static int __init ep93xx_controller_register(struct device *parent, const char *name, + struct ep93xx_map_info *info) +{ + struct auxiliary_device *adev; + int ret; + + adev = ep93xx_adev_alloc(parent, name, info); + if (IS_ERR(adev)) + return PTR_ERR(adev); + + ret = auxiliary_device_add(adev); + if (ret) { + auxiliary_device_uninit(adev); + return ret; + } + + return devm_add_action_or_reset(parent, ep93xx_unregister_adev, adev); +} + +static unsigned int __init ep93xx_soc_revision(struct regmap *map) +{ + unsigned int val; + + regmap_read(map, EP93XX_SYSCON_SYSCFG, &val); + val &= EP93XX_SYSCON_SYSCFG_REV_MASK; + val >>= EP93XX_SYSCON_SYSCFG_REV_SHIFT; + return val; +} + +static const char __init *ep93xx_get_soc_rev(unsigned int rev) +{ + switch (rev) { + case EP93XX_CHIP_REV_D0: + return "D0"; + case EP93XX_CHIP_REV_D1: + return "D1"; + case EP93XX_CHIP_REV_E0: + return "E0"; + case EP93XX_CHIP_REV_E1: + return "E1"; + case EP93XX_CHIP_REV_E2: + return "E2"; + default: + return "unknown"; + } +} + +static const char *pinctrl_names[] __initconst = { + "pinctrl-ep9301", /* EP93XX_9301_SOC */ + "pinctrl-ep9307", /* EP93XX_9307_SOC */ + "pinctrl-ep9312", /* EP93XX_9312_SOC */ +}; + +static int __init ep93xx_syscon_probe(struct platform_device *pdev) +{ + enum ep93xx_soc_model model; + struct ep93xx_map_info *map_info; + struct soc_device_attribute *attrs; + struct soc_device *soc_dev; + struct device *dev = &pdev->dev; + struct regmap *map; + void __iomem *base; + unsigned int rev; + int ret; + + model = (enum ep93xx_soc_model)(uintptr_t)device_get_match_data(dev); + + map = device_node_to_regmap(dev->of_node); + if (IS_ERR(map)) + return PTR_ERR(map); + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + attrs = devm_kzalloc(dev, sizeof(*attrs), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + rev = ep93xx_soc_revision(map); + + attrs->machine = of_flat_dt_get_machine_name(); + attrs->family = "Cirrus Logic EP93xx"; + attrs->revision = ep93xx_get_soc_rev(rev); + + soc_dev = soc_device_register(attrs); + if (IS_ERR(soc_dev)) + return PTR_ERR(soc_dev); + + map_info = devm_kzalloc(dev, sizeof(*map_info), GFP_KERNEL); + if (!map_info) + return -ENOMEM; + + spin_lock_init(&map_info->lock); + map_info->map = map; + map_info->base = base; + + ret = ep93xx_controller_register(dev, pinctrl_names[model], map_info); + if (ret) + dev_err(dev, "registering pinctrl controller failed\n"); + + /* + * EP93xx SSP clock rate was doubled in version E2. For more information + * see section 6 "2x SSP (Synchronous Serial Port) Clock – Revision E2 only": + * http://www.cirrus.com/en/pubs/appNote/AN273REV4.pdf + */ + if (rev == EP93XX_CHIP_REV_E2) + ret = ep93xx_controller_register(dev, "clk-ep93xx.e2", map_info); + else + ret = ep93xx_controller_register(dev, "clk-ep93xx", map_info); + if (ret) + dev_err(dev, "registering clock controller failed\n"); + + ret = ep93xx_controller_register(dev, "reset-ep93xx", map_info); + if (ret) + dev_err(dev, "registering reset controller failed\n"); + + return 0; +} + +static const struct of_device_id ep9301_syscon_of_device_ids[] = { + { .compatible = "cirrus,ep9301-syscon", .data = (void *)EP93XX_9301_SOC }, + { .compatible = "cirrus,ep9302-syscon", .data = (void *)EP93XX_9301_SOC }, + { .compatible = "cirrus,ep9307-syscon", .data = (void *)EP93XX_9307_SOC }, + { .compatible = "cirrus,ep9312-syscon", .data = (void *)EP93XX_9312_SOC }, + { .compatible = "cirrus,ep9315-syscon", .data = (void *)EP93XX_9312_SOC }, + { /* sentinel */ } +}; + +static struct platform_driver ep9301_syscon_driver = { + .driver = { + .name = "ep9301-syscon", + .of_match_table = ep9301_syscon_of_device_ids, + }, +}; +builtin_platform_driver_probe(ep9301_syscon_driver, ep93xx_syscon_probe); From 581e2ff84f2d708885da10414c65cb41b3c13dc3 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:33 +0300 Subject: [PATCH 219/655] dt-bindings: dma: Add Cirrus EP93xx Add YAML bindings for ep93xx SoC DMA. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andy Shevchenko Reviewed-by: Guenter Roeck Acked-by: Vinod Koul Acked-by: Miquel Raynal Signed-off-by: Arnd Bergmann --- .../bindings/dma/cirrus,ep9301-dma-m2m.yaml | 84 ++++++++++ .../bindings/dma/cirrus,ep9301-dma-m2p.yaml | 144 ++++++++++++++++++ 2 files changed, 228 insertions(+) create mode 100644 Documentation/devicetree/bindings/dma/cirrus,ep9301-dma-m2m.yaml create mode 100644 Documentation/devicetree/bindings/dma/cirrus,ep9301-dma-m2p.yaml diff --git a/Documentation/devicetree/bindings/dma/cirrus,ep9301-dma-m2m.yaml b/Documentation/devicetree/bindings/dma/cirrus,ep9301-dma-m2m.yaml new file mode 100644 index 000000000000..871b76ddf90f --- /dev/null +++ b/Documentation/devicetree/bindings/dma/cirrus,ep9301-dma-m2m.yaml @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/cirrus,ep9301-dma-m2m.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cirrus Logic ep93xx SoC DMA controller + +maintainers: + - Alexander Sverdlin + - Nikita Shubin + +allOf: + - $ref: dma-controller.yaml# + +properties: + compatible: + oneOf: + - const: cirrus,ep9301-dma-m2m + - items: + - enum: + - cirrus,ep9302-dma-m2m + - cirrus,ep9307-dma-m2m + - cirrus,ep9312-dma-m2m + - cirrus,ep9315-dma-m2m + - const: cirrus,ep9301-dma-m2m + + reg: + items: + - description: m2m0 channel registers + - description: m2m1 channel registers + + clocks: + items: + - description: m2m0 channel gate clock + - description: m2m1 channel gate clock + + clock-names: + items: + - const: m2m0 + - const: m2m1 + + interrupts: + items: + - description: m2m0 channel interrupt + - description: m2m1 channel interrupt + + '#dma-cells': + const: 2 + description: | + The first cell is the unique device channel number as indicated by this + table for ep93xx: + + 10: SPI controller + 11: IDE controller + + The second cell is the DMA direction line number: + + 1: Memory to device + 2: Device to memory + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + +additionalProperties: false + +examples: + - | + #include + dma-controller@80000100 { + compatible = "cirrus,ep9301-dma-m2m"; + reg = <0x80000100 0x0040>, + <0x80000140 0x0040>; + clocks = <&syscon EP93XX_CLK_M2M0>, + <&syscon EP93XX_CLK_M2M1>; + clock-names = "m2m0", "m2m1"; + interrupt-parent = <&vic0>; + interrupts = <17>, <18>; + #dma-cells = <2>; + }; diff --git a/Documentation/devicetree/bindings/dma/cirrus,ep9301-dma-m2p.yaml b/Documentation/devicetree/bindings/dma/cirrus,ep9301-dma-m2p.yaml new file mode 100644 index 000000000000..d14c31553543 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/cirrus,ep9301-dma-m2p.yaml @@ -0,0 +1,144 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/cirrus,ep9301-dma-m2p.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cirrus Logic ep93xx SoC M2P DMA controller + +maintainers: + - Alexander Sverdlin + - Nikita Shubin + +allOf: + - $ref: dma-controller.yaml# + +properties: + compatible: + oneOf: + - const: cirrus,ep9301-dma-m2p + - items: + - enum: + - cirrus,ep9302-dma-m2p + - cirrus,ep9307-dma-m2p + - cirrus,ep9312-dma-m2p + - cirrus,ep9315-dma-m2p + - const: cirrus,ep9301-dma-m2p + + reg: + items: + - description: m2p0 channel registers + - description: m2p1 channel registers + - description: m2p2 channel registers + - description: m2p3 channel registers + - description: m2p4 channel registers + - description: m2p5 channel registers + - description: m2p6 channel registers + - description: m2p7 channel registers + - description: m2p8 channel registers + - description: m2p9 channel registers + + clocks: + items: + - description: m2p0 channel gate clock + - description: m2p1 channel gate clock + - description: m2p2 channel gate clock + - description: m2p3 channel gate clock + - description: m2p4 channel gate clock + - description: m2p5 channel gate clock + - description: m2p6 channel gate clock + - description: m2p7 channel gate clock + - description: m2p8 channel gate clock + - description: m2p9 channel gate clock + + clock-names: + items: + - const: m2p0 + - const: m2p1 + - const: m2p2 + - const: m2p3 + - const: m2p4 + - const: m2p5 + - const: m2p6 + - const: m2p7 + - const: m2p8 + - const: m2p9 + + interrupts: + items: + - description: m2p0 channel interrupt + - description: m2p1 channel interrupt + - description: m2p2 channel interrupt + - description: m2p3 channel interrupt + - description: m2p4 channel interrupt + - description: m2p5 channel interrupt + - description: m2p6 channel interrupt + - description: m2p7 channel interrupt + - description: m2p8 channel interrupt + - description: m2p9 channel interrupt + + '#dma-cells': + const: 2 + description: | + The first cell is the unique device channel number as indicated by this + table for ep93xx: + + 0: I2S channel 1 + 1: I2S channel 2 (unused) + 2: AC97 channel 1 (unused) + 3: AC97 channel 2 (unused) + 4: AC97 channel 3 (unused) + 5: I2S channel 3 (unused) + 6: UART1 (unused) + 7: UART2 (unused) + 8: UART3 (unused) + 9: IRDA (unused) + + The second cell is the DMA direction line number: + + 1: Memory to device + 2: Device to memory + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + +additionalProperties: false + +examples: + - | + #include + dma-controller@80000000 { + compatible = "cirrus,ep9301-dma-m2p"; + reg = <0x80000000 0x0040>, + <0x80000040 0x0040>, + <0x80000080 0x0040>, + <0x800000c0 0x0040>, + <0x80000240 0x0040>, + <0x80000200 0x0040>, + <0x800002c0 0x0040>, + <0x80000280 0x0040>, + <0x80000340 0x0040>, + <0x80000300 0x0040>; + clocks = <&syscon EP93XX_CLK_M2P0>, + <&syscon EP93XX_CLK_M2P1>, + <&syscon EP93XX_CLK_M2P2>, + <&syscon EP93XX_CLK_M2P3>, + <&syscon EP93XX_CLK_M2P4>, + <&syscon EP93XX_CLK_M2P5>, + <&syscon EP93XX_CLK_M2P6>, + <&syscon EP93XX_CLK_M2P7>, + <&syscon EP93XX_CLK_M2P8>, + <&syscon EP93XX_CLK_M2P9>; + clock-names = "m2p0", "m2p1", + "m2p2", "m2p3", + "m2p4", "m2p5", + "m2p6", "m2p7", + "m2p8", "m2p9"; + interrupt-parent = <&vic0>; + interrupts = <7>, <8>, <9>, <10>, <11>, <12>, <13>, <14>, <15>, <16>; + #dma-cells = <2>; + }; From 2e7f55ce430240a5547b8a94b4c532fc8c20b18b Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:34 +0300 Subject: [PATCH 220/655] dmaengine: cirrus: Convert to DT for Cirrus EP93xx Convert Cirrus EP93xx DMA to device tree usage: - add OF ID match table with data - add of_probe for device tree - add xlate for m2m/m2p - drop subsys_initcall code - drop platform probe - drop platform structs usage >From now on it only supports device tree probing. Co-developed-by: Alexander Sverdlin Signed-off-by: Alexander Sverdlin Acked-by: Vinod Koul Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Signed-off-by: Arnd Bergmann --- drivers/dma/ep93xx_dma.c | 239 ++++++++++++++++++----- include/linux/platform_data/dma-ep93xx.h | 6 + 2 files changed, 191 insertions(+), 54 deletions(-) diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index d6c60635e90d..17c8e2badee2 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include @@ -104,6 +106,11 @@ #define DMA_MAX_CHAN_BYTES 0xffff #define DMA_MAX_CHAN_DESCRIPTORS 32 +enum ep93xx_dma_type { + M2P_DMA, + M2M_DMA, +}; + struct ep93xx_dma_engine; static int ep93xx_dma_slave_config_write(struct dma_chan *chan, enum dma_transfer_direction dir, @@ -129,11 +136,17 @@ struct ep93xx_dma_desc { struct list_head node; }; +struct ep93xx_dma_chan_cfg { + u8 port; + enum dma_transfer_direction dir; +}; + /** * struct ep93xx_dma_chan - an EP93xx DMA M2P/M2M channel * @chan: dmaengine API channel * @edma: pointer to the engine device * @regs: memory mapped registers + * @dma_cfg: channel number, direction * @irq: interrupt number of the channel * @clk: clock used by this channel * @tasklet: channel specific tasklet used for callbacks @@ -157,14 +170,12 @@ struct ep93xx_dma_desc { * descriptor in the chain. When a descriptor is moved to the @active queue, * the first and chained descriptors are flattened into a single list. * - * @chan.private holds pointer to &struct ep93xx_dma_data which contains - * necessary channel configuration information. For memcpy channels this must - * be %NULL. */ struct ep93xx_dma_chan { struct dma_chan chan; const struct ep93xx_dma_engine *edma; void __iomem *regs; + struct ep93xx_dma_chan_cfg dma_cfg; int irq; struct clk *clk; struct tasklet_struct tasklet; @@ -216,6 +227,11 @@ struct ep93xx_dma_engine { struct ep93xx_dma_chan channels[] __counted_by(num_channels); }; +struct ep93xx_edma_data { + u32 id; + size_t num_channels; +}; + static inline struct device *chan2dev(struct ep93xx_dma_chan *edmac) { return &edmac->chan.dev->device; @@ -318,10 +334,9 @@ static void m2p_set_control(struct ep93xx_dma_chan *edmac, u32 control) static int m2p_hw_setup(struct ep93xx_dma_chan *edmac) { - struct ep93xx_dma_data *data = edmac->chan.private; u32 control; - writel(data->port & 0xf, edmac->regs + M2P_PPALLOC); + writel(edmac->dma_cfg.port & 0xf, edmac->regs + M2P_PPALLOC); control = M2P_CONTROL_CH_ERROR_INT | M2P_CONTROL_ICE | M2P_CONTROL_ENABLE; @@ -458,16 +473,15 @@ static int m2p_hw_interrupt(struct ep93xx_dma_chan *edmac) static int m2m_hw_setup(struct ep93xx_dma_chan *edmac) { - const struct ep93xx_dma_data *data = edmac->chan.private; u32 control = 0; - if (!data) { + if (edmac->dma_cfg.dir == DMA_MEM_TO_MEM) { /* This is memcpy channel, nothing to configure */ writel(control, edmac->regs + M2M_CONTROL); return 0; } - switch (data->port) { + switch (edmac->dma_cfg.port) { case EP93XX_DMA_SSP: /* * This was found via experimenting - anything less than 5 @@ -477,7 +491,7 @@ static int m2m_hw_setup(struct ep93xx_dma_chan *edmac) control = (5 << M2M_CONTROL_PWSC_SHIFT); control |= M2M_CONTROL_NO_HDSK; - if (data->direction == DMA_MEM_TO_DEV) { + if (edmac->dma_cfg.dir == DMA_MEM_TO_DEV) { control |= M2M_CONTROL_DAH; control |= M2M_CONTROL_TM_TX; control |= M2M_CONTROL_RSS_SSPTX; @@ -493,7 +507,7 @@ static int m2m_hw_setup(struct ep93xx_dma_chan *edmac) * This IDE part is totally untested. Values below are taken * from the EP93xx Users's Guide and might not be correct. */ - if (data->direction == DMA_MEM_TO_DEV) { + if (edmac->dma_cfg.dir == DMA_MEM_TO_DEV) { /* Worst case from the UG */ control = (3 << M2M_CONTROL_PWSC_SHIFT); control |= M2M_CONTROL_DAH; @@ -548,7 +562,6 @@ static void m2m_fill_desc(struct ep93xx_dma_chan *edmac) static void m2m_hw_submit(struct ep93xx_dma_chan *edmac) { - struct ep93xx_dma_data *data = edmac->chan.private; u32 control = readl(edmac->regs + M2M_CONTROL); /* @@ -574,7 +587,7 @@ static void m2m_hw_submit(struct ep93xx_dma_chan *edmac) control |= M2M_CONTROL_ENABLE; writel(control, edmac->regs + M2M_CONTROL); - if (!data) { + if (edmac->dma_cfg.dir == DMA_MEM_TO_MEM) { /* * For memcpy channels the software trigger must be asserted * in order to start the memcpy operation. @@ -636,7 +649,7 @@ static int m2m_hw_interrupt(struct ep93xx_dma_chan *edmac) */ if (ep93xx_dma_advance_active(edmac)) { m2m_fill_desc(edmac); - if (done && !edmac->chan.private) { + if (done && edmac->dma_cfg.dir == DMA_MEM_TO_MEM) { /* Software trigger for memcpy channel */ control = readl(edmac->regs + M2M_CONTROL); control |= M2M_CONTROL_START; @@ -867,25 +880,22 @@ static dma_cookie_t ep93xx_dma_tx_submit(struct dma_async_tx_descriptor *tx) static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan) { struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); - struct ep93xx_dma_data *data = chan->private; const char *name = dma_chan_name(chan); int ret, i; /* Sanity check the channel parameters */ if (!edmac->edma->m2m) { - if (!data) + if (edmac->dma_cfg.port < EP93XX_DMA_I2S1 || + edmac->dma_cfg.port > EP93XX_DMA_IRDA) return -EINVAL; - if (data->port < EP93XX_DMA_I2S1 || - data->port > EP93XX_DMA_IRDA) - return -EINVAL; - if (data->direction != ep93xx_dma_chan_direction(chan)) + if (edmac->dma_cfg.dir != ep93xx_dma_chan_direction(chan)) return -EINVAL; } else { - if (data) { - switch (data->port) { + if (edmac->dma_cfg.dir != DMA_MEM_TO_MEM) { + switch (edmac->dma_cfg.port) { case EP93XX_DMA_SSP: case EP93XX_DMA_IDE: - if (!is_slave_direction(data->direction)) + if (!is_slave_direction(edmac->dma_cfg.dir)) return -EINVAL; break; default: @@ -894,9 +904,6 @@ static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan) } } - if (data && data->name) - name = data->name; - ret = clk_prepare_enable(edmac->clk); if (ret) return ret; @@ -1315,36 +1322,53 @@ static void ep93xx_dma_issue_pending(struct dma_chan *chan) ep93xx_dma_advance_work(to_ep93xx_dma_chan(chan)); } -static int __init ep93xx_dma_probe(struct platform_device *pdev) +static struct ep93xx_dma_engine *ep93xx_dma_of_probe(struct platform_device *pdev) { - struct ep93xx_dma_platform_data *pdata = dev_get_platdata(&pdev->dev); + const struct ep93xx_edma_data *data; + struct device *dev = &pdev->dev; struct ep93xx_dma_engine *edma; struct dma_device *dma_dev; - int ret, i; + char dma_clk_name[5]; + int i; - edma = kzalloc(struct_size(edma, channels, pdata->num_channels), GFP_KERNEL); + data = device_get_match_data(dev); + if (!data) + return ERR_PTR(dev_err_probe(dev, -ENODEV, "No device match found\n")); + + edma = devm_kzalloc(dev, struct_size(edma, channels, data->num_channels), + GFP_KERNEL); if (!edma) - return -ENOMEM; + return ERR_PTR(-ENOMEM); + edma->m2m = data->id; + edma->num_channels = data->num_channels; dma_dev = &edma->dma_dev; - edma->m2m = platform_get_device_id(pdev)->driver_data; - edma->num_channels = pdata->num_channels; INIT_LIST_HEAD(&dma_dev->channels); - for (i = 0; i < pdata->num_channels; i++) { - const struct ep93xx_dma_chan_data *cdata = &pdata->channels[i]; + for (i = 0; i < edma->num_channels; i++) { struct ep93xx_dma_chan *edmac = &edma->channels[i]; edmac->chan.device = dma_dev; - edmac->regs = cdata->base; - edmac->irq = cdata->irq; + edmac->regs = devm_platform_ioremap_resource(pdev, i); + if (IS_ERR(edmac->regs)) + return edmac->regs; + + edmac->irq = fwnode_irq_get(dev_fwnode(dev), i); + if (edmac->irq < 0) + return ERR_PTR(edmac->irq); + edmac->edma = edma; - edmac->clk = clk_get(NULL, cdata->name); + if (edma->m2m) + sprintf(dma_clk_name, "m2m%u", i); + else + sprintf(dma_clk_name, "m2p%u", i); + + edmac->clk = devm_clk_get(dev, dma_clk_name); if (IS_ERR(edmac->clk)) { - dev_warn(&pdev->dev, "failed to get clock for %s\n", - cdata->name); - continue; + dev_err_probe(dev, PTR_ERR(edmac->clk), + "no %s clock found\n", dma_clk_name); + return ERR_CAST(edmac->clk); } spin_lock_init(&edmac->lock); @@ -1357,6 +1381,90 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev) &dma_dev->channels); } + return edma; +} + +static bool ep93xx_m2p_dma_filter(struct dma_chan *chan, void *filter_param) +{ + struct ep93xx_dma_chan *echan = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_chan_cfg *cfg = filter_param; + + if (cfg->dir != ep93xx_dma_chan_direction(chan)) + return false; + + echan->dma_cfg = *cfg; + return true; +} + +static struct dma_chan *ep93xx_m2p_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct ep93xx_dma_engine *edma = ofdma->of_dma_data; + dma_cap_mask_t mask = edma->dma_dev.cap_mask; + struct ep93xx_dma_chan_cfg dma_cfg; + u8 port = dma_spec->args[0]; + u8 direction = dma_spec->args[1]; + + if (port > EP93XX_DMA_IRDA) + return NULL; + + if (!is_slave_direction(direction)) + return NULL; + + dma_cfg.port = port; + dma_cfg.dir = direction; + + return __dma_request_channel(&mask, ep93xx_m2p_dma_filter, &dma_cfg, ofdma->of_node); +} + +static bool ep93xx_m2m_dma_filter(struct dma_chan *chan, void *filter_param) +{ + struct ep93xx_dma_chan *echan = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_chan_cfg *cfg = filter_param; + + echan->dma_cfg = *cfg; + + return true; +} + +static struct dma_chan *ep93xx_m2m_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct ep93xx_dma_engine *edma = ofdma->of_dma_data; + dma_cap_mask_t mask = edma->dma_dev.cap_mask; + struct ep93xx_dma_chan_cfg dma_cfg; + u8 port = dma_spec->args[0]; + u8 direction = dma_spec->args[1]; + + if (!is_slave_direction(direction)) + return NULL; + + switch (port) { + case EP93XX_DMA_SSP: + case EP93XX_DMA_IDE: + break; + default: + return NULL; + } + + dma_cfg.port = port; + dma_cfg.dir = direction; + + return __dma_request_channel(&mask, ep93xx_m2m_dma_filter, &dma_cfg, ofdma->of_node); +} + +static int ep93xx_dma_probe(struct platform_device *pdev) +{ + struct ep93xx_dma_engine *edma; + struct dma_device *dma_dev; + int ret; + + edma = ep93xx_dma_of_probe(pdev); + if (!edma) + return PTR_ERR(edma); + + dma_dev = &edma->dma_dev; + dma_cap_zero(dma_dev->cap_mask); dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask); @@ -1393,21 +1501,46 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev) } ret = dma_async_device_register(dma_dev); - if (unlikely(ret)) { - for (i = 0; i < edma->num_channels; i++) { - struct ep93xx_dma_chan *edmac = &edma->channels[i]; - if (!IS_ERR_OR_NULL(edmac->clk)) - clk_put(edmac->clk); - } - kfree(edma); + if (ret) + return ret; + + if (edma->m2m) { + ret = of_dma_controller_register(pdev->dev.of_node, ep93xx_m2m_dma_of_xlate, + edma); } else { - dev_info(dma_dev->dev, "EP93xx M2%s DMA ready\n", - edma->m2m ? "M" : "P"); + ret = of_dma_controller_register(pdev->dev.of_node, ep93xx_m2p_dma_of_xlate, + edma); } + if (ret) + goto err_dma_unregister; + + dev_info(dma_dev->dev, "EP93xx M2%s DMA ready\n", edma->m2m ? "M" : "P"); + + return 0; + +err_dma_unregister: + dma_async_device_unregister(dma_dev); return ret; } +static const struct ep93xx_edma_data edma_m2p = { + .id = M2P_DMA, + .num_channels = 10, +}; + +static const struct ep93xx_edma_data edma_m2m = { + .id = M2M_DMA, + .num_channels = 2, +}; + +static const struct of_device_id ep93xx_dma_of_ids[] = { + { .compatible = "cirrus,ep9301-dma-m2p", .data = &edma_m2p }, + { .compatible = "cirrus,ep9301-dma-m2m", .data = &edma_m2m }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, ep93xx_dma_of_ids); + static const struct platform_device_id ep93xx_dma_driver_ids[] = { { "ep93xx-dma-m2p", 0 }, { "ep93xx-dma-m2m", 1 }, @@ -1417,15 +1550,13 @@ static const struct platform_device_id ep93xx_dma_driver_ids[] = { static struct platform_driver ep93xx_dma_driver = { .driver = { .name = "ep93xx-dma", + .of_match_table = ep93xx_dma_of_ids, }, .id_table = ep93xx_dma_driver_ids, + .probe = ep93xx_dma_probe, }; -static int __init ep93xx_dma_module_init(void) -{ - return platform_driver_probe(&ep93xx_dma_driver, ep93xx_dma_probe); -} -subsys_initcall(ep93xx_dma_module_init); +module_platform_driver(ep93xx_dma_driver); MODULE_AUTHOR("Mika Westerberg "); MODULE_DESCRIPTION("EP93xx DMA driver"); diff --git a/include/linux/platform_data/dma-ep93xx.h b/include/linux/platform_data/dma-ep93xx.h index eb9805bb3fe8..9ec5cdd5a1eb 100644 --- a/include/linux/platform_data/dma-ep93xx.h +++ b/include/linux/platform_data/dma-ep93xx.h @@ -3,8 +3,11 @@ #define __ASM_ARCH_DMA_H #include +#include #include #include +#include +#include /* * M2P channels. @@ -70,6 +73,9 @@ struct ep93xx_dma_platform_data { static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan) { + if (device_is_compatible(chan->device->dev, "cirrus,ep9301-dma-m2p")) + return true; + return !strcmp(dev_name(chan->device->dev), "ep93xx-dma-m2p"); } From 824ccabd73aa2aea35ec5ef979ef67be6b691d15 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:37 +0300 Subject: [PATCH 221/655] dt-bindings: pwm: Add Cirrus EP93xx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add YAML bindings for ep93xx SoC PWM. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andy Shevchenko Reviewed-by: Guenter Roeck Acked-by: Uwe Kleine-König Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- .../bindings/pwm/cirrus,ep9301-pwm.yaml | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 Documentation/devicetree/bindings/pwm/cirrus,ep9301-pwm.yaml diff --git a/Documentation/devicetree/bindings/pwm/cirrus,ep9301-pwm.yaml b/Documentation/devicetree/bindings/pwm/cirrus,ep9301-pwm.yaml new file mode 100644 index 000000000000..903210ef9c31 --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/cirrus,ep9301-pwm.yaml @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pwm/cirrus,ep9301-pwm.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cirrus Logic ep93xx PWM controller + +maintainers: + - Alexander Sverdlin + - Nikita Shubin + +allOf: + - $ref: pwm.yaml# + +properties: + compatible: + oneOf: + - const: cirrus,ep9301-pwm + - items: + - enum: + - cirrus,ep9302-pwm + - cirrus,ep9307-pwm + - cirrus,ep9312-pwm + - cirrus,ep9315-pwm + - const: cirrus,ep9301-pwm + + reg: + maxItems: 1 + + clocks: + items: + - description: SoC PWM clock + + "#pwm-cells": + const: 3 + +required: + - compatible + - reg + - clocks + +unevaluatedProperties: false + +examples: + - | + #include + pwm@80910000 { + compatible = "cirrus,ep9301-pwm"; + reg = <0x80910000 0x10>; + clocks = <&syscon EP93XX_CLK_PWM>; + #pwm-cells = <3>; + }; From 4a0f1f0993f532890e7746a30dee8a826149cd3b Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:38 +0300 Subject: [PATCH 222/655] pwm: ep93xx: add DT support for Cirrus EP93xx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add OF ID match table. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Tested-by: Michael Peters Reviewed-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Reviewed-by: Mark Brown Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Reviewed-by: Kris Bahnsen Reviewed-by: Andrew Lunn Reviewed-by: Sergey Shtylyov Acked-by: Uwe Kleine-König Acked-by: Miquel Raynal Acked-by: Alexander Sverdlin Acked-by: Damien Le Moal Acked-by: Sebastian Reichel Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- drivers/pwm/pwm-ep93xx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pwm/pwm-ep93xx.c b/drivers/pwm/pwm-ep93xx.c index 666f2954133c..67c3fdbf7ae3 100644 --- a/drivers/pwm/pwm-ep93xx.c +++ b/drivers/pwm/pwm-ep93xx.c @@ -17,6 +17,7 @@ */ #include +#include #include #include #include @@ -188,9 +189,16 @@ static int ep93xx_pwm_probe(struct platform_device *pdev) return 0; } +static const struct of_device_id ep93xx_pwm_of_ids[] = { + { .compatible = "cirrus,ep9301-pwm" }, + { /* sentinel */} +}; +MODULE_DEVICE_TABLE(of, ep93xx_pwm_of_ids); + static struct platform_driver ep93xx_pwm_driver = { .driver = { .name = "ep93xx-pwm", + .of_match_table = ep93xx_pwm_of_ids, }, .probe = ep93xx_pwm_probe, }; From cb0291776fa691027ad1de8da6e8678a20ca89c3 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:39 +0300 Subject: [PATCH 223/655] dt-bindings: spi: Add Cirrus EP93xx Add YAML bindings for ep93xx SoC SPI. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Mark Brown Reviewed-by: Andy Shevchenko Reviewed-by: Guenter Roeck Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- .../bindings/spi/cirrus,ep9301-spi.yaml | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 Documentation/devicetree/bindings/spi/cirrus,ep9301-spi.yaml diff --git a/Documentation/devicetree/bindings/spi/cirrus,ep9301-spi.yaml b/Documentation/devicetree/bindings/spi/cirrus,ep9301-spi.yaml new file mode 100644 index 000000000000..73980a27dc00 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/cirrus,ep9301-spi.yaml @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/spi/cirrus,ep9301-spi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: EP93xx SoC SPI controller + +maintainers: + - Alexander Sverdlin + - Nikita Shubin + +allOf: + - $ref: spi-controller.yaml# + +properties: + compatible: + oneOf: + - const: cirrus,ep9301-spi + - items: + - enum: + - cirrus,ep9302-spi + - cirrus,ep9307-spi + - cirrus,ep9312-spi + - cirrus,ep9315-spi + - const: cirrus,ep9301-spi + + reg: + items: + - description: SPI registers region + + interrupts: + maxItems: 1 + + clocks: + items: + - description: SPI Controller reference clock source + + dmas: + items: + - description: rx DMA channel + - description: tx DMA channel + + dma-names: + items: + - const: rx + - const: tx + +required: + - compatible + - reg + - interrupts + - clocks + +unevaluatedProperties: false + +examples: + - | + #include + #include + spi@808a0000 { + compatible = "cirrus,ep9301-spi"; + reg = <0x808a0000 0x18>; + interrupt-parent = <&vic1>; + interrupts = <21>; + clocks = <&syscon EP93XX_CLK_SPI>; + dmas = <&dma1 10 2>, <&dma1 10 1>; + dma-names = "rx", "tx"; + cs-gpios = <&gpio5 2 GPIO_ACTIVE_HIGH>; + }; From e79e7c2df6277ee1ad9364a231f6183da4492415 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:40 +0300 Subject: [PATCH 224/655] spi: ep93xx: add DT support for Cirrus EP93xx - add OF ID match table - add device tree DMA request, so we can probe defer, in case DMA is not ready yet - drop DMA platform code Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Linus Walleij Reviewed-by: Mark Brown Acked-by: Alexander Sverdlin Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- drivers/spi/spi-ep93xx.c | 66 ++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c index a1d60e51c053..ffbe0d522bce 100644 --- a/drivers/spi/spi-ep93xx.c +++ b/drivers/spi/spi-ep93xx.c @@ -18,18 +18,18 @@ #include #include #include +#include +#include #include #include #include #include +#include #include #include #include #include -#include -#include - #define SSPCR0 0x0000 #define SSPCR0_SPO BIT(6) #define SSPCR0_SPH BIT(7) @@ -92,8 +92,6 @@ struct ep93xx_spi { size_t fifo_level; struct dma_chan *dma_rx; struct dma_chan *dma_tx; - struct ep93xx_dma_data dma_rx_data; - struct ep93xx_dma_data dma_tx_data; struct sg_table rx_sgt; struct sg_table tx_sgt; void *zeropage; @@ -575,46 +573,23 @@ static int ep93xx_spi_unprepare_hardware(struct spi_controller *host) return 0; } -static bool ep93xx_spi_dma_filter(struct dma_chan *chan, void *filter_param) +static int ep93xx_spi_setup_dma(struct device *dev, struct ep93xx_spi *espi) { - if (ep93xx_dma_chan_is_m2p(chan)) - return false; - - chan->private = filter_param; - return true; -} - -static int ep93xx_spi_setup_dma(struct ep93xx_spi *espi) -{ - dma_cap_mask_t mask; int ret; espi->zeropage = (void *)get_zeroed_page(GFP_KERNEL); if (!espi->zeropage) return -ENOMEM; - dma_cap_zero(mask); - dma_cap_set(DMA_SLAVE, mask); - - espi->dma_rx_data.port = EP93XX_DMA_SSP; - espi->dma_rx_data.direction = DMA_DEV_TO_MEM; - espi->dma_rx_data.name = "ep93xx-spi-rx"; - - espi->dma_rx = dma_request_channel(mask, ep93xx_spi_dma_filter, - &espi->dma_rx_data); - if (!espi->dma_rx) { - ret = -ENODEV; + espi->dma_rx = dma_request_chan(dev, "rx"); + if (IS_ERR(espi->dma_rx)) { + ret = dev_err_probe(dev, PTR_ERR(espi->dma_rx), "rx DMA setup failed"); goto fail_free_page; } - espi->dma_tx_data.port = EP93XX_DMA_SSP; - espi->dma_tx_data.direction = DMA_MEM_TO_DEV; - espi->dma_tx_data.name = "ep93xx-spi-tx"; - - espi->dma_tx = dma_request_channel(mask, ep93xx_spi_dma_filter, - &espi->dma_tx_data); - if (!espi->dma_tx) { - ret = -ENODEV; + espi->dma_tx = dma_request_chan(dev, "tx"); + if (IS_ERR(espi->dma_tx)) { + ret = dev_err_probe(dev, PTR_ERR(espi->dma_tx), "tx DMA setup failed"); goto fail_release_rx; } @@ -647,18 +622,11 @@ static void ep93xx_spi_release_dma(struct ep93xx_spi *espi) static int ep93xx_spi_probe(struct platform_device *pdev) { struct spi_controller *host; - struct ep93xx_spi_info *info; struct ep93xx_spi *espi; struct resource *res; int irq; int error; - info = dev_get_platdata(&pdev->dev); - if (!info) { - dev_err(&pdev->dev, "missing platform data\n"); - return -EINVAL; - } - irq = platform_get_irq(pdev, 0); if (irq < 0) return irq; @@ -713,12 +681,17 @@ static int ep93xx_spi_probe(struct platform_device *pdev) goto fail_release_host; } - if (info->use_dma && ep93xx_spi_setup_dma(espi)) + error = ep93xx_spi_setup_dma(&pdev->dev, espi); + if (error == -EPROBE_DEFER) + goto fail_release_host; + + if (error) dev_warn(&pdev->dev, "DMA setup failed. Falling back to PIO\n"); /* make sure that the hardware is disabled */ writel(0, espi->mmio + SSPCR1); + device_set_node(&host->dev, dev_fwnode(&pdev->dev)); error = devm_spi_register_controller(&pdev->dev, host); if (error) { dev_err(&pdev->dev, "failed to register SPI host\n"); @@ -746,9 +719,16 @@ static void ep93xx_spi_remove(struct platform_device *pdev) ep93xx_spi_release_dma(espi); } +static const struct of_device_id ep93xx_spi_of_ids[] = { + { .compatible = "cirrus,ep9301-spi" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, ep93xx_spi_of_ids); + static struct platform_driver ep93xx_spi_driver = { .driver = { .name = "ep93xx-spi", + .of_match_table = ep93xx_spi_of_ids, }, .probe = ep93xx_spi_probe, .remove_new = ep93xx_spi_remove, From 099747ceb0226a3e99fceba16ee4b3f49e598bcc Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:41 +0300 Subject: [PATCH 225/655] dt-bindings: net: Add Cirrus EP93xx Add YAML bindings for ep93xx SoC Ethernet Controller. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andy Shevchenko Reviewed-by: Guenter Roeck Reviewed-by: Mark Brown Reviewed-by: Andy Shevchenko Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- .../bindings/net/cirrus,ep9301-eth.yaml | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/cirrus,ep9301-eth.yaml diff --git a/Documentation/devicetree/bindings/net/cirrus,ep9301-eth.yaml b/Documentation/devicetree/bindings/net/cirrus,ep9301-eth.yaml new file mode 100644 index 000000000000..ad0915307095 --- /dev/null +++ b/Documentation/devicetree/bindings/net/cirrus,ep9301-eth.yaml @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/cirrus,ep9301-eth.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: EP93xx SoC Ethernet Controller + +maintainers: + - Alexander Sverdlin + - Nikita Shubin + +allOf: + - $ref: ethernet-controller.yaml# + +properties: + compatible: + oneOf: + - const: cirrus,ep9301-eth + - items: + - enum: + - cirrus,ep9302-eth + - cirrus,ep9307-eth + - cirrus,ep9312-eth + - cirrus,ep9315-eth + - const: cirrus,ep9301-eth + + reg: + items: + - description: The physical base address and size of IO range + + interrupts: + items: + - description: Combined signal for various interrupt events + + phy-handle: true + + mdio: + $ref: mdio.yaml# + unevaluatedProperties: false + description: optional node for embedded MDIO controller + +required: + - compatible + - reg + - interrupts + - phy-handle + +additionalProperties: false + +examples: + - | + ethernet@80010000 { + compatible = "cirrus,ep9301-eth"; + reg = <0x80010000 0x10000>; + interrupt-parent = <&vic1>; + interrupts = <7>; + phy-handle = <&phy0>; + }; From 770e709e38bf90d3144d82218550730290bc1918 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:42 +0300 Subject: [PATCH 226/655] net: cirrus: add DT support for Cirrus EP93xx - add OF ID match table - get phy_id from the device tree, as part of mdio - copy_addr is now always used, as there is no SoC/board that aren't - dropped platform header Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Andrew Lunn Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- drivers/net/ethernet/cirrus/ep93xx_eth.c | 63 ++++++++++++------------ 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index 1f495cfd7959..2523d9c9d1b8 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -16,13 +16,12 @@ #include #include #include +#include #include #include #include #include -#include - #define DRV_MODULE_NAME "ep93xx-eth" #define RX_QUEUE_ENTRIES 64 @@ -738,25 +737,6 @@ static const struct net_device_ops ep93xx_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, }; -static struct net_device *ep93xx_dev_alloc(struct ep93xx_eth_data *data) -{ - struct net_device *dev; - - dev = alloc_etherdev(sizeof(struct ep93xx_priv)); - if (dev == NULL) - return NULL; - - eth_hw_addr_set(dev, data->dev_addr); - - dev->ethtool_ops = &ep93xx_ethtool_ops; - dev->netdev_ops = &ep93xx_netdev_ops; - - dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; - - return dev; -} - - static void ep93xx_eth_remove(struct platform_device *pdev) { struct net_device *dev; @@ -786,27 +766,47 @@ static void ep93xx_eth_remove(struct platform_device *pdev) static int ep93xx_eth_probe(struct platform_device *pdev) { - struct ep93xx_eth_data *data; struct net_device *dev; struct ep93xx_priv *ep; struct resource *mem; + void __iomem *base_addr; + struct device_node *np; + u32 phy_id; int irq; int err; if (pdev == NULL) return -ENODEV; - data = dev_get_platdata(&pdev->dev); mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); irq = platform_get_irq(pdev, 0); if (!mem || irq < 0) return -ENXIO; - dev = ep93xx_dev_alloc(data); + base_addr = ioremap(mem->start, resource_size(mem)); + if (!base_addr) + return dev_err_probe(&pdev->dev, -EIO, "Failed to ioremap ethernet registers\n"); + + np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); + if (!np) + return dev_err_probe(&pdev->dev, -ENODEV, "Please provide \"phy-handle\"\n"); + + err = of_property_read_u32(np, "reg", &phy_id); + of_node_put(np); + if (err) + return dev_err_probe(&pdev->dev, -ENOENT, "Failed to locate \"phy_id\"\n"); + + dev = alloc_etherdev(sizeof(struct ep93xx_priv)); if (dev == NULL) { err = -ENOMEM; goto err_out; } + + eth_hw_addr_set(dev, base_addr + 0x50); + dev->ethtool_ops = &ep93xx_ethtool_ops; + dev->netdev_ops = &ep93xx_netdev_ops; + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; + ep = netdev_priv(dev); ep->dev = dev; SET_NETDEV_DEV(dev, &pdev->dev); @@ -822,15 +822,10 @@ static int ep93xx_eth_probe(struct platform_device *pdev) goto err_out; } - ep->base_addr = ioremap(mem->start, resource_size(mem)); - if (ep->base_addr == NULL) { - dev_err(&pdev->dev, "Failed to ioremap ethernet registers\n"); - err = -EIO; - goto err_out; - } + ep->base_addr = base_addr; ep->irq = irq; - ep->mii.phy_id = data->phy_id; + ep->mii.phy_id = phy_id; ep->mii.phy_id_mask = 0x1f; ep->mii.reg_num_mask = 0x1f; ep->mii.dev = dev; @@ -857,12 +852,18 @@ static int ep93xx_eth_probe(struct platform_device *pdev) return err; } +static const struct of_device_id ep93xx_eth_of_ids[] = { + { .compatible = "cirrus,ep9301-eth" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, ep93xx_eth_of_ids); static struct platform_driver ep93xx_eth_driver = { .probe = ep93xx_eth_probe, .remove_new = ep93xx_eth_remove, .driver = { .name = "ep93xx-eth", + .of_match_table = ep93xx_eth_of_ids, }, }; From 1d4f2ff1bbed825c5dc3fb47800639a6856a2ebb Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:43 +0300 Subject: [PATCH 227/655] dt-bindings: mtd: Add ts7200 nand-controller Add YAML bindings for ts7200 NAND Controller. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andy Shevchenko Reviewed-by: Guenter Roeck Reviewed-by: Mark Brown Reviewed-by: Andy Shevchenko Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- .../bindings/mtd/technologic,nand.yaml | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 Documentation/devicetree/bindings/mtd/technologic,nand.yaml diff --git a/Documentation/devicetree/bindings/mtd/technologic,nand.yaml b/Documentation/devicetree/bindings/mtd/technologic,nand.yaml new file mode 100644 index 000000000000..f9d87c46094b --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/technologic,nand.yaml @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mtd/technologic,nand.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Technologic Systems NAND controller + +maintainers: + - Nikita Shubin + +allOf: + - $ref: nand-controller.yaml + +properties: + compatible: + oneOf: + - const: technologic,ts7200-nand + - items: + - enum: + - technologic,ts7300-nand + - technologic,ts7260-nand + - technologic,ts7250-nand + - const: technologic,ts7200-nand + + reg: + maxItems: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + nand-controller@60000000 { + compatible = "technologic,ts7200-nand"; + reg = <0x60000000 0x8000000>; + #address-cells = <1>; + #size-cells = <0>; + nand@0 { + reg = <0>; + }; + }; From 853034c7d8c0022f0cf84cf041572975eb3c2dba Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:44 +0300 Subject: [PATCH 228/655] mtd: rawnand: add support for ts72xx Technologic Systems has it's own nand controller implementation in CPLD. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- drivers/mtd/nand/raw/Kconfig | 6 + drivers/mtd/nand/raw/Makefile | 1 + .../nand/raw/technologic-nand-controller.c | 222 ++++++++++++++++++ 3 files changed, 229 insertions(+) create mode 100644 drivers/mtd/nand/raw/technologic-nand-controller.c diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig index 614257308516..d0aaccf72d78 100644 --- a/drivers/mtd/nand/raw/Kconfig +++ b/drivers/mtd/nand/raw/Kconfig @@ -448,6 +448,12 @@ config MTD_NAND_RENESAS Enables support for the NAND controller found on Renesas R-Car Gen3 and RZ/N1 SoC families. +config MTD_NAND_TS72XX + tristate "ts72xx NAND controller" + depends on ARCH_EP93XX && HAS_IOMEM + help + Enables support for NAND controller on ts72xx SBCs. + comment "Misc" config MTD_SM_COMMON diff --git a/drivers/mtd/nand/raw/Makefile b/drivers/mtd/nand/raw/Makefile index 25120a4afada..d0b0e6b83568 100644 --- a/drivers/mtd/nand/raw/Makefile +++ b/drivers/mtd/nand/raw/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_MTD_NAND_MLC_LPC32XX) += lpc32xx_mlc.o obj-$(CONFIG_MTD_NAND_SH_FLCTL) += sh_flctl.o obj-$(CONFIG_MTD_NAND_MXC) += mxc_nand.o obj-$(CONFIG_MTD_NAND_SOCRATES) += socrates_nand.o +obj-$(CONFIG_MTD_NAND_TS72XX) += technologic-nand-controller.o obj-$(CONFIG_MTD_NAND_TXX9NDFMC) += txx9ndfmc.o obj-$(CONFIG_MTD_NAND_MPC5121_NFC) += mpc5121_nfc.o obj-$(CONFIG_MTD_NAND_VF610_NFC) += vf610_nfc.o diff --git a/drivers/mtd/nand/raw/technologic-nand-controller.c b/drivers/mtd/nand/raw/technologic-nand-controller.c new file mode 100644 index 000000000000..0e45a6fd91dd --- /dev/null +++ b/drivers/mtd/nand/raw/technologic-nand-controller.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Technologic Systems TS72xx NAND controller driver + * + * Copyright (C) 2023 Nikita Shubin + * + * Derived from: plat_nand.c + * Author: Vitaly Wool + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define TS72XX_NAND_CONTROL_ADDR_LINE BIT(22) /* 0xN0400000 */ +#define TS72XX_NAND_BUSY_ADDR_LINE BIT(23) /* 0xN0800000 */ + +#define TS72XX_NAND_ALE BIT(0) +#define TS72XX_NAND_CLE BIT(1) +#define TS72XX_NAND_NCE BIT(2) + +#define TS72XX_NAND_CTRL_CLE (TS72XX_NAND_NCE | TS72XX_NAND_CLE) +#define TS72XX_NAND_CTRL_ALE (TS72XX_NAND_NCE | TS72XX_NAND_ALE) + +struct ts72xx_nand_data { + struct nand_controller controller; + struct nand_chip chip; + void __iomem *base; + void __iomem *ctrl; + void __iomem *busy; +}; + +static inline struct ts72xx_nand_data *chip_to_ts72xx(struct nand_chip *chip) +{ + return container_of(chip, struct ts72xx_nand_data, chip); +} + +static int ts72xx_nand_attach_chip(struct nand_chip *chip) +{ + switch (chip->ecc.engine_type) { + case NAND_ECC_ENGINE_TYPE_ON_HOST: + return -EINVAL; + case NAND_ECC_ENGINE_TYPE_SOFT: + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; + fallthrough; + default: + return 0; + } +} + +static void ts72xx_nand_ctrl(struct nand_chip *chip, u8 value) +{ + struct ts72xx_nand_data *data = chip_to_ts72xx(chip); + unsigned char bits = ioread8(data->ctrl) & ~GENMASK(2, 0); + + iowrite8(bits | value, data->ctrl); +} + +static int ts72xx_nand_exec_instr(struct nand_chip *chip, + const struct nand_op_instr *instr) +{ + struct ts72xx_nand_data *data = chip_to_ts72xx(chip); + unsigned int timeout_us; + u32 status; + int ret; + + switch (instr->type) { + case NAND_OP_CMD_INSTR: + ts72xx_nand_ctrl(chip, TS72XX_NAND_CTRL_CLE); + iowrite8(instr->ctx.cmd.opcode, data->base); + ts72xx_nand_ctrl(chip, TS72XX_NAND_NCE); + break; + + case NAND_OP_ADDR_INSTR: + ts72xx_nand_ctrl(chip, TS72XX_NAND_CTRL_ALE); + iowrite8_rep(data->base, instr->ctx.addr.addrs, instr->ctx.addr.naddrs); + ts72xx_nand_ctrl(chip, TS72XX_NAND_NCE); + break; + + case NAND_OP_DATA_IN_INSTR: + ioread8_rep(data->base, instr->ctx.data.buf.in, instr->ctx.data.len); + break; + + case NAND_OP_DATA_OUT_INSTR: + iowrite8_rep(data->base, instr->ctx.data.buf.in, instr->ctx.data.len); + break; + + case NAND_OP_WAITRDY_INSTR: + timeout_us = instr->ctx.waitrdy.timeout_ms * 1000; + ret = readb_poll_timeout(data->busy, status, status & BIT(5), 0, timeout_us); + if (ret) + return ret; + + break; + } + + if (instr->delay_ns) + ndelay(instr->delay_ns); + + return 0; +} + +static int ts72xx_nand_exec_op(struct nand_chip *chip, + const struct nand_operation *op, bool check_only) +{ + unsigned int i; + int ret; + + if (check_only) + return 0; + + for (i = 0; i < op->ninstrs; i++) { + ret = ts72xx_nand_exec_instr(chip, &op->instrs[i]); + if (ret) + return ret; + } + + return 0; +} + +static const struct nand_controller_ops ts72xx_nand_ops = { + .attach_chip = ts72xx_nand_attach_chip, + .exec_op = ts72xx_nand_exec_op, +}; + +static int ts72xx_nand_probe(struct platform_device *pdev) +{ + struct ts72xx_nand_data *data; + struct fwnode_handle *child; + struct mtd_info *mtd; + int err; + + data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + nand_controller_init(&data->controller); + data->controller.ops = &ts72xx_nand_ops; + data->chip.controller = &data->controller; + + data->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(data->base)) + return PTR_ERR(data->base); + data->ctrl = data->base + TS72XX_NAND_CONTROL_ADDR_LINE; + data->busy = data->base + TS72XX_NAND_BUSY_ADDR_LINE; + + child = fwnode_get_next_child_node(dev_fwnode(&pdev->dev), NULL); + if (!child) + return dev_err_probe(&pdev->dev, -ENXIO, + "ts72xx controller node should have exactly one child\n"); + + nand_set_flash_node(&data->chip, to_of_node(child)); + mtd = nand_to_mtd(&data->chip); + mtd->dev.parent = &pdev->dev; + platform_set_drvdata(pdev, data); + + /* + * This driver assumes that the default ECC engine should be TYPE_SOFT. + * Set ->engine_type before registering the NAND devices in order to + * provide a driver specific default value. + */ + data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + + /* Scan to find existence of the device */ + err = nand_scan(&data->chip, 1); + if (err) + goto err_handle_put; + + err = mtd_device_parse_register(mtd, NULL, NULL, NULL, 0); + if (err) + goto err_clean_nand; + + return 0; + +err_clean_nand: + nand_cleanup(&data->chip); +err_handle_put: + fwnode_handle_put(child); + return err; +} + +static void ts72xx_nand_remove(struct platform_device *pdev) +{ + struct ts72xx_nand_data *data = platform_get_drvdata(pdev); + struct fwnode_handle *fwnode = dev_fwnode(&pdev->dev); + struct nand_chip *chip = &data->chip; + int ret; + + ret = mtd_device_unregister(nand_to_mtd(chip)); + WARN_ON(ret); + nand_cleanup(chip); + fwnode_handle_put(fwnode); +} + +static const struct of_device_id ts72xx_id_table[] = { + { .compatible = "technologic,ts7200-nand" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, ts72xx_id_table); + +static struct platform_driver ts72xx_nand_driver = { + .driver = { + .name = "ts72xx-nand", + .of_match_table = ts72xx_id_table, + }, + .probe = ts72xx_nand_probe, + .remove_new = ts72xx_nand_remove, +}; +module_platform_driver(ts72xx_nand_driver); + +MODULE_AUTHOR("Nikita Shubin "); +MODULE_DESCRIPTION("Technologic Systems TS72xx NAND controller driver"); +MODULE_LICENSE("GPL"); From f4da2b6055635738e5ce5ac9352cdb7d5bfd7ce3 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:45 +0300 Subject: [PATCH 229/655] dt-bindings: ata: Add Cirrus EP93xx Add YAML bindings for ep93xx SoC PATA. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andy Shevchenko Reviewed-by: Guenter Roeck Reviewed-by: Mark Brown Reviewed-by: Andy Shevchenko Acked-by: Damien Le Moal Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- .../bindings/ata/cirrus,ep9312-pata.yaml | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 Documentation/devicetree/bindings/ata/cirrus,ep9312-pata.yaml diff --git a/Documentation/devicetree/bindings/ata/cirrus,ep9312-pata.yaml b/Documentation/devicetree/bindings/ata/cirrus,ep9312-pata.yaml new file mode 100644 index 000000000000..8130923fdc72 --- /dev/null +++ b/Documentation/devicetree/bindings/ata/cirrus,ep9312-pata.yaml @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/ata/cirrus,ep9312-pata.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cirrus Logic EP9312 PATA controller + +maintainers: + - Damien Le Moal + +properties: + compatible: + oneOf: + - const: cirrus,ep9312-pata + - items: + - const: cirrus,ep9315-pata + - const: cirrus,ep9312-pata + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + +additionalProperties: false + +examples: + - | + ide@800a0000 { + compatible = "cirrus,ep9312-pata"; + reg = <0x800a0000 0x38>; + interrupt-parent = <&vic1>; + interrupts = <8>; + pinctrl-names = "default"; + pinctrl-0 = <&ide_default_pins>; + }; From 9963113e3a9248785518ef8add920a4acb8c3ca4 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:46 +0300 Subject: [PATCH 230/655] ata: pata_ep93xx: add device tree support - add OF ID match table - drop platform DMA and filters - change DMA setup to OF, so we can defer probe Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Sergey Shtylyov Acked-by: Damien Le Moal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- drivers/ata/pata_ep93xx.c | 80 ++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c index c84a20892f1b..13246a92e29f 100644 --- a/drivers/ata/pata_ep93xx.c +++ b/drivers/ata/pata_ep93xx.c @@ -44,8 +44,8 @@ #include #include #include +#include -#include #include #define DRV_NAME "ep93xx-ide" @@ -126,7 +126,7 @@ enum { }; struct ep93xx_pata_data { - const struct platform_device *pdev; + struct platform_device *pdev; void __iomem *ide_base; struct ata_timing t; bool iordy; @@ -135,9 +135,7 @@ struct ep93xx_pata_data { unsigned long udma_out_phys; struct dma_chan *dma_rx_channel; - struct ep93xx_dma_data dma_rx_data; struct dma_chan *dma_tx_channel; - struct ep93xx_dma_data dma_tx_data; }; static void ep93xx_pata_clear_regs(void __iomem *base) @@ -637,20 +635,13 @@ static void ep93xx_pata_release_dma(struct ep93xx_pata_data *drv_data) } } -static bool ep93xx_pata_dma_filter(struct dma_chan *chan, void *filter_param) +static int ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) { - if (ep93xx_dma_chan_is_m2p(chan)) - return false; - - chan->private = filter_param; - return true; -} - -static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) -{ - const struct platform_device *pdev = drv_data->pdev; + struct platform_device *pdev = drv_data->pdev; + struct device *dev = &pdev->dev; dma_cap_mask_t mask; struct dma_slave_config conf; + int ret; dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); @@ -660,22 +651,16 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) * to request only one channel, and reprogram it's direction at * start of new transfer. */ - drv_data->dma_rx_data.port = EP93XX_DMA_IDE; - drv_data->dma_rx_data.direction = DMA_DEV_TO_MEM; - drv_data->dma_rx_data.name = "ep93xx-pata-rx"; - drv_data->dma_rx_channel = dma_request_channel(mask, - ep93xx_pata_dma_filter, &drv_data->dma_rx_data); - if (!drv_data->dma_rx_channel) - return; + drv_data->dma_rx_channel = dma_request_chan(dev, "rx"); + if (IS_ERR(drv_data->dma_rx_channel)) + return dev_err_probe(dev, PTR_ERR(drv_data->dma_rx_channel), + "rx DMA setup failed\n"); - drv_data->dma_tx_data.port = EP93XX_DMA_IDE; - drv_data->dma_tx_data.direction = DMA_MEM_TO_DEV; - drv_data->dma_tx_data.name = "ep93xx-pata-tx"; - drv_data->dma_tx_channel = dma_request_channel(mask, - ep93xx_pata_dma_filter, &drv_data->dma_tx_data); - if (!drv_data->dma_tx_channel) { - dma_release_channel(drv_data->dma_rx_channel); - return; + drv_data->dma_tx_channel = dma_request_chan(&pdev->dev, "tx"); + if (IS_ERR(drv_data->dma_tx_channel)) { + ret = dev_err_probe(dev, PTR_ERR(drv_data->dma_tx_channel), + "tx DMA setup failed\n"); + goto fail_release_rx; } /* Configure receive channel direction and source address */ @@ -683,10 +668,10 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) conf.direction = DMA_DEV_TO_MEM; conf.src_addr = drv_data->udma_in_phys; conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - if (dmaengine_slave_config(drv_data->dma_rx_channel, &conf)) { - dev_err(&pdev->dev, "failed to configure rx dma channel\n"); - ep93xx_pata_release_dma(drv_data); - return; + ret = dmaengine_slave_config(drv_data->dma_rx_channel, &conf); + if (ret) { + dev_err_probe(dev, ret, "failed to configure rx dma channel"); + goto fail_release_dma; } /* Configure transmit channel direction and destination address */ @@ -694,10 +679,20 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) conf.direction = DMA_MEM_TO_DEV; conf.dst_addr = drv_data->udma_out_phys; conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - if (dmaengine_slave_config(drv_data->dma_tx_channel, &conf)) { - dev_err(&pdev->dev, "failed to configure tx dma channel\n"); - ep93xx_pata_release_dma(drv_data); + ret = dmaengine_slave_config(drv_data->dma_tx_channel, &conf); + if (ret) { + dev_err_probe(dev, ret, "failed to configure tx dma channel"); + goto fail_release_dma; } + + return 0; + +fail_release_rx: + dma_release_channel(drv_data->dma_rx_channel); +fail_release_dma: + ep93xx_pata_release_dma(drv_data); + + return ret; } static void ep93xx_pata_dma_start(struct ata_queued_cmd *qc) @@ -954,7 +949,9 @@ static int ep93xx_pata_probe(struct platform_device *pdev) drv_data->ide_base = ide_base; drv_data->udma_in_phys = mem_res->start + IDEUDMADATAIN; drv_data->udma_out_phys = mem_res->start + IDEUDMADATAOUT; - ep93xx_pata_dma_init(drv_data); + err = ep93xx_pata_dma_init(drv_data); + if (err) + return err; /* allocate host */ host = ata_host_alloc(&pdev->dev, 1); @@ -1021,9 +1018,16 @@ static void ep93xx_pata_remove(struct platform_device *pdev) ep93xx_ide_release_gpio(pdev); } +static const struct of_device_id ep93xx_pata_of_ids[] = { + { .compatible = "cirrus,ep9312-pata" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, ep93xx_pata_of_ids); + static struct platform_driver ep93xx_pata_platform_driver = { .driver = { .name = DRV_NAME, + .of_match_table = ep93xx_pata_of_ids, }, .probe = ep93xx_pata_probe, .remove_new = ep93xx_pata_remove, From 9cefdd1a952aa1d103ce5f14f4b54e0011cf11e5 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:47 +0300 Subject: [PATCH 231/655] dt-bindings: input: Add Cirrus EP93xx keypad Add YAML bindings for ep93xx SoC keypad. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andy Shevchenko Reviewed-by: Guenter Roeck Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- .../bindings/input/cirrus,ep9307-keypad.yaml | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 Documentation/devicetree/bindings/input/cirrus,ep9307-keypad.yaml diff --git a/Documentation/devicetree/bindings/input/cirrus,ep9307-keypad.yaml b/Documentation/devicetree/bindings/input/cirrus,ep9307-keypad.yaml new file mode 100644 index 000000000000..a0d2460c55ab --- /dev/null +++ b/Documentation/devicetree/bindings/input/cirrus,ep9307-keypad.yaml @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/cirrus,ep9307-keypad.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cirrus ep93xx keypad + +maintainers: + - Alexander Sverdlin + +allOf: + - $ref: /schemas/input/matrix-keymap.yaml# + +description: + The KPP is designed to interface with a keypad matrix with 2-point contact + or 3-point contact keys. The KPP is designed to simplify the software task + of scanning a keypad matrix. The KPP is capable of detecting, debouncing, + and decoding one or multiple keys pressed simultaneously on a keypad. + +properties: + compatible: + oneOf: + - const: cirrus,ep9307-keypad + - items: + - enum: + - cirrus,ep9312-keypad + - cirrus,ep9315-keypad + - const: cirrus,ep9307-keypad + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + debounce-delay-ms: + description: | + Time in microseconds that key must be pressed or + released for state change interrupt to trigger. + + cirrus,prescale: + description: row/column counter pre-scaler load value + $ref: /schemas/types.yaml#/definitions/uint16 + maximum: 1023 + +required: + - compatible + - reg + - interrupts + - clocks + - linux,keymap + +unevaluatedProperties: false + +examples: + - | + #include + #include + keypad@800f0000 { + compatible = "cirrus,ep9307-keypad"; + reg = <0x800f0000 0x0c>; + interrupt-parent = <&vic0>; + interrupts = <29>; + clocks = <&eclk EP93XX_CLK_KEYPAD>; + pinctrl-names = "default"; + pinctrl-0 = <&keypad_default_pins>; + linux,keymap = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + }; From b3ab5787e7acb02874ae86cbe13969d4dd01a585 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:48 +0300 Subject: [PATCH 232/655] input: keypad: ep93xx: add DT support for Cirrus EP93xx - drop flags, they were not used anyway - add OF ID match table - process "autorepeat", "debounce-delay-ms", prescale from device tree - drop platform data usage and it's header - keymap goes from device tree now on Signed-off-by: Nikita Shubin Acked-by: Dmitry Torokhov Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- arch/arm/mach-ep93xx/core.c | 46 ---------------- drivers/input/keyboard/ep93xx_keypad.c | 74 ++++++++------------------ include/linux/soc/cirrus/ep93xx.h | 4 -- 3 files changed, 22 insertions(+), 102 deletions(-) diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 03bce5e9d1f1..b99c46d22c4d 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -697,52 +697,6 @@ void __init ep93xx_register_keypad(struct ep93xx_keypad_platform_data *data) platform_device_register(&ep93xx_keypad_device); } -int ep93xx_keypad_acquire_gpio(struct platform_device *pdev) -{ - int err; - int i; - - for (i = 0; i < 8; i++) { - err = gpio_request(EP93XX_GPIO_LINE_C(i), dev_name(&pdev->dev)); - if (err) - goto fail_gpio_c; - err = gpio_request(EP93XX_GPIO_LINE_D(i), dev_name(&pdev->dev)); - if (err) - goto fail_gpio_d; - } - - /* Enable the keypad controller; GPIO ports C and D used for keypad */ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_KEYS | - EP93XX_SYSCON_DEVCFG_GONK); - - return 0; - -fail_gpio_d: - gpio_free(EP93XX_GPIO_LINE_C(i)); -fail_gpio_c: - for (--i; i >= 0; --i) { - gpio_free(EP93XX_GPIO_LINE_C(i)); - gpio_free(EP93XX_GPIO_LINE_D(i)); - } - return err; -} -EXPORT_SYMBOL(ep93xx_keypad_acquire_gpio); - -void ep93xx_keypad_release_gpio(struct platform_device *pdev) -{ - int i; - - for (i = 0; i < 8; i++) { - gpio_free(EP93XX_GPIO_LINE_C(i)); - gpio_free(EP93XX_GPIO_LINE_D(i)); - } - - /* Disable the keypad controller; GPIO ports C and D used for GPIO */ - ep93xx_devcfg_set_bits(EP93XX_SYSCON_DEVCFG_KEYS | - EP93XX_SYSCON_DEVCFG_GONK); -} -EXPORT_SYMBOL(ep93xx_keypad_release_gpio); - /************************************************************************* * EP93xx I2S audio peripheral handling *************************************************************************/ diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c index 6b811d6bf625..dcbc50304a5a 100644 --- a/drivers/input/keyboard/ep93xx_keypad.c +++ b/drivers/input/keyboard/ep93xx_keypad.c @@ -6,20 +6,13 @@ * * Based on the pxa27x matrix keypad controller by Rodolfo Giometti. * - * NOTE: - * - * The 3-key reset is triggered by pressing the 3 keys in - * Row 0, Columns 2, 4, and 7 at the same time. This action can - * be disabled by setting the EP93XX_KEYPAD_DISABLE_3_KEY flag. - * - * Normal operation for the matrix does not autorepeat the key press. - * This action can be enabled by setting the EP93XX_KEYPAD_AUTOREPEAT - * flag. */ #include +#include #include #include +#include #include #include #include @@ -27,7 +20,6 @@ #include #include #include -#include #include /* @@ -61,12 +53,16 @@ #define KEY_REG_KEY1_MASK GENMASK(5, 0) #define KEY_REG_KEY1_SHIFT 0 +#define EP93XX_MATRIX_ROWS (8) +#define EP93XX_MATRIX_COLS (8) + #define EP93XX_MATRIX_SIZE (EP93XX_MATRIX_ROWS * EP93XX_MATRIX_COLS) struct ep93xx_keypad { - struct ep93xx_keypad_platform_data *pdata; struct input_dev *input_dev; struct clk *clk; + unsigned int debounce; + u16 prescale; void __iomem *mmio_base; @@ -133,23 +129,11 @@ static irqreturn_t ep93xx_keypad_irq_handler(int irq, void *dev_id) static void ep93xx_keypad_config(struct ep93xx_keypad *keypad) { - struct ep93xx_keypad_platform_data *pdata = keypad->pdata; unsigned int val = 0; - clk_set_rate(keypad->clk, pdata->clk_rate); + val |= (keypad->debounce << KEY_INIT_DBNC_SHIFT) & KEY_INIT_DBNC_MASK; - if (pdata->flags & EP93XX_KEYPAD_DISABLE_3_KEY) - val |= KEY_INIT_DIS3KY; - if (pdata->flags & EP93XX_KEYPAD_DIAG_MODE) - val |= KEY_INIT_DIAG; - if (pdata->flags & EP93XX_KEYPAD_BACK_DRIVE) - val |= KEY_INIT_BACK; - if (pdata->flags & EP93XX_KEYPAD_TEST_MODE) - val |= KEY_INIT_T2; - - val |= ((pdata->debounce << KEY_INIT_DBNC_SHIFT) & KEY_INIT_DBNC_MASK); - - val |= ((pdata->prescale << KEY_INIT_PRSCL_SHIFT) & KEY_INIT_PRSCL_MASK); + val |= (keypad->prescale << KEY_INIT_PRSCL_SHIFT) & KEY_INIT_PRSCL_MASK; __raw_writel(val, keypad->mmio_base + KEY_INIT); } @@ -220,17 +204,10 @@ static int ep93xx_keypad_resume(struct device *dev) static DEFINE_SIMPLE_DEV_PM_OPS(ep93xx_keypad_pm_ops, ep93xx_keypad_suspend, ep93xx_keypad_resume); -static void ep93xx_keypad_release_gpio_action(void *_pdev) -{ - struct platform_device *pdev = _pdev; - - ep93xx_keypad_release_gpio(pdev); -} - static int ep93xx_keypad_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct ep93xx_keypad *keypad; - const struct matrix_keymap_data *keymap_data; struct input_dev *input_dev; int err; @@ -238,14 +215,6 @@ static int ep93xx_keypad_probe(struct platform_device *pdev) if (!keypad) return -ENOMEM; - keypad->pdata = dev_get_platdata(&pdev->dev); - if (!keypad->pdata) - return -EINVAL; - - keymap_data = keypad->pdata->keymap_data; - if (!keymap_data) - return -EINVAL; - keypad->irq = platform_get_irq(pdev, 0); if (keypad->irq < 0) return keypad->irq; @@ -254,19 +223,13 @@ static int ep93xx_keypad_probe(struct platform_device *pdev) if (IS_ERR(keypad->mmio_base)) return PTR_ERR(keypad->mmio_base); - err = ep93xx_keypad_acquire_gpio(pdev); - if (err) - return err; - - err = devm_add_action_or_reset(&pdev->dev, - ep93xx_keypad_release_gpio_action, pdev); - if (err) - return err; - keypad->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(keypad->clk)) return PTR_ERR(keypad->clk); + device_property_read_u32(dev, "debounce-delay-ms", &keypad->debounce); + device_property_read_u16(dev, "cirrus,prescale", &keypad->prescale); + input_dev = devm_input_allocate_device(&pdev->dev); if (!input_dev) return -ENOMEM; @@ -278,13 +241,13 @@ static int ep93xx_keypad_probe(struct platform_device *pdev) input_dev->open = ep93xx_keypad_open; input_dev->close = ep93xx_keypad_close; - err = matrix_keypad_build_keymap(keymap_data, NULL, + err = matrix_keypad_build_keymap(NULL, NULL, EP93XX_MATRIX_ROWS, EP93XX_MATRIX_COLS, keypad->keycodes, input_dev); if (err) return err; - if (keypad->pdata->flags & EP93XX_KEYPAD_AUTOREPEAT) + if (device_property_read_bool(&pdev->dev, "autorepeat")) __set_bit(EV_REP, input_dev->evbit); input_set_drvdata(input_dev, keypad); @@ -313,10 +276,17 @@ static void ep93xx_keypad_remove(struct platform_device *pdev) dev_pm_clear_wake_irq(&pdev->dev); } +static const struct of_device_id ep93xx_keypad_of_ids[] = { + { .compatible = "cirrus,ep9307-keypad" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, ep93xx_keypad_of_ids); + static struct platform_driver ep93xx_keypad_driver = { .driver = { .name = "ep93xx-keypad", .pm = pm_sleep_ptr(&ep93xx_keypad_pm_ops), + .of_match_table = ep93xx_keypad_of_ids, }, .probe = ep93xx_keypad_probe, .remove_new = ep93xx_keypad_remove, diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h index a27447971302..8942bfaf1545 100644 --- a/include/linux/soc/cirrus/ep93xx.h +++ b/include/linux/soc/cirrus/ep93xx.h @@ -41,8 +41,6 @@ int ep93xx_pwm_acquire_gpio(struct platform_device *pdev); void ep93xx_pwm_release_gpio(struct platform_device *pdev); int ep93xx_ide_acquire_gpio(struct platform_device *pdev); void ep93xx_ide_release_gpio(struct platform_device *pdev); -int ep93xx_keypad_acquire_gpio(struct platform_device *pdev); -void ep93xx_keypad_release_gpio(struct platform_device *pdev); int ep93xx_i2s_acquire(void); void ep93xx_i2s_release(void); unsigned int ep93xx_chip_revision(void); @@ -52,8 +50,6 @@ static inline int ep93xx_pwm_acquire_gpio(struct platform_device *pdev) { return static inline void ep93xx_pwm_release_gpio(struct platform_device *pdev) {} static inline int ep93xx_ide_acquire_gpio(struct platform_device *pdev) { return 0; } static inline void ep93xx_ide_release_gpio(struct platform_device *pdev) {} -static inline int ep93xx_keypad_acquire_gpio(struct platform_device *pdev) { return 0; } -static inline void ep93xx_keypad_release_gpio(struct platform_device *pdev) {} static inline int ep93xx_i2s_acquire(void) { return 0; } static inline void ep93xx_i2s_release(void) {} static inline unsigned int ep93xx_chip_revision(void) { return 0; } From 177c20d761c538fd3c8e7f35c8036c0e551f5e0e Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:49 +0300 Subject: [PATCH 233/655] wdt: ts72xx: add DT support for ts72xx Add OF ID match table. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Guenter Roeck Reviewed-by: Andy Shevchenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Mark Brown Reviewed-by: Andy Shevchenko Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- drivers/watchdog/ts72xx_wdt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/watchdog/ts72xx_wdt.c b/drivers/watchdog/ts72xx_wdt.c index 3d57670befe1..ac709dc31a65 100644 --- a/drivers/watchdog/ts72xx_wdt.c +++ b/drivers/watchdog/ts72xx_wdt.c @@ -12,6 +12,7 @@ */ #include +#include #include #include #include @@ -160,10 +161,17 @@ static int ts72xx_wdt_probe(struct platform_device *pdev) return 0; } +static const struct of_device_id ts72xx_wdt_of_ids[] = { + { .compatible = "technologic,ts7200-wdt" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, ts72xx_wdt_of_ids); + static struct platform_driver ts72xx_wdt_driver = { .probe = ts72xx_wdt_probe, .driver = { .name = "ts72xx-wdt", + .of_match_table = ts72xx_wdt_of_ids, }, }; From 8f67b1f028190d679a2ad3254cf8da41c8b41f49 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:50 +0300 Subject: [PATCH 234/655] gpio: ep93xx: add DT support for gpio-ep93xx Add OF ID match table. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Reviewed-by: Mark Brown Reviewed-by: Andy Shevchenko Acked-by: Bartosz Golaszewski Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- drivers/gpio/gpio-ep93xx.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpio/gpio-ep93xx.c b/drivers/gpio/gpio-ep93xx.c index a55f635585f4..ab798c848215 100644 --- a/drivers/gpio/gpio-ep93xx.c +++ b/drivers/gpio/gpio-ep93xx.c @@ -12,13 +12,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include struct ep93xx_gpio_irq_chip { void __iomem *base; @@ -138,7 +138,8 @@ static void ep93xx_gpio_irq_mask_ack(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); - int port_mask = BIT(irqd_to_hwirq(d)); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + int port_mask = BIT(hwirq); if (irqd_get_trigger_type(d) == IRQ_TYPE_EDGE_BOTH) eic->int_type2 ^= port_mask; /* switch edge direction */ @@ -147,26 +148,28 @@ static void ep93xx_gpio_irq_mask_ack(struct irq_data *d) ep93xx_gpio_update_int_params(eic); writeb(port_mask, eic->base + EP93XX_INT_EOI_OFFSET); - gpiochip_disable_irq(gc, irqd_to_hwirq(d)); + gpiochip_disable_irq(gc, hwirq); } static void ep93xx_gpio_irq_mask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); + irq_hw_number_t hwirq = irqd_to_hwirq(d); - eic->int_unmasked &= ~BIT(irqd_to_hwirq(d)); + eic->int_unmasked &= ~BIT(hwirq); ep93xx_gpio_update_int_params(eic); - gpiochip_disable_irq(gc, irqd_to_hwirq(d)); + gpiochip_disable_irq(gc, hwirq); } static void ep93xx_gpio_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); + irq_hw_number_t hwirq = irqd_to_hwirq(d); - gpiochip_enable_irq(gc, irqd_to_hwirq(d)); - eic->int_unmasked |= BIT(irqd_to_hwirq(d)); + gpiochip_enable_irq(gc, hwirq); + eic->int_unmasked |= BIT(hwirq); ep93xx_gpio_update_int_params(eic); } @@ -179,11 +182,11 @@ static int ep93xx_gpio_irq_type(struct irq_data *d, unsigned int type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); - irq_hw_number_t offset = irqd_to_hwirq(d); - int port_mask = BIT(offset); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + int port_mask = BIT(hwirq); irq_flow_handler_t handler; - gc->direction_input(gc, offset); + gc->direction_input(gc, hwirq); switch (type) { case IRQ_TYPE_EDGE_RISING: @@ -209,7 +212,7 @@ static int ep93xx_gpio_irq_type(struct irq_data *d, unsigned int type) case IRQ_TYPE_EDGE_BOTH: eic->int_type1 |= port_mask; /* set initial polarity based on current input level */ - if (gc->get(gc, offset)) + if (gc->get(gc, hwirq)) eic->int_type2 &= ~port_mask; /* falling */ else eic->int_type2 |= port_mask; /* rising */ @@ -285,9 +288,8 @@ static int ep93xx_setup_irqs(struct platform_device *pdev, if (girq->num_parents == 0) return -EINVAL; - girq->parents = devm_kcalloc(dev, girq->num_parents, - sizeof(*girq->parents), - GFP_KERNEL); + girq->parents = devm_kcalloc(dev, girq->num_parents, sizeof(*girq->parents), + GFP_KERNEL); if (!girq->parents) return -ENOMEM; @@ -306,7 +308,7 @@ static int ep93xx_setup_irqs(struct platform_device *pdev, girq->parent_handler = ep93xx_gpio_f_irq_handler; for (i = 0; i < girq->num_parents; i++) { - irq = platform_get_irq(pdev, i); + irq = platform_get_irq_optional(pdev, i); if (irq < 0) continue; @@ -359,9 +361,15 @@ static int ep93xx_gpio_probe(struct platform_device *pdev) return devm_gpiochip_add_data(&pdev->dev, gc, egc); } +static const struct of_device_id ep93xx_gpio_match[] = { + { .compatible = "cirrus,ep9301-gpio" }, + { /* sentinel */ } +}; + static struct platform_driver ep93xx_gpio_driver = { .driver = { .name = "gpio-ep93xx", + .of_match_table = ep93xx_gpio_match, }, .probe = ep93xx_gpio_probe, }; From bae9f789b6c4c365e6a6daa2dfd64a51d2c712de Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:51 +0300 Subject: [PATCH 235/655] ASoC: dt-bindings: ep93xx: Document DMA support Document DMA support in binding document. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Mark Brown Reviewed-by: Andy Shevchenko Reviewed-by: Guenter Roeck Reviewed-by: Andy Shevchenko Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- .../devicetree/bindings/sound/cirrus,ep9301-i2s.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/devicetree/bindings/sound/cirrus,ep9301-i2s.yaml b/Documentation/devicetree/bindings/sound/cirrus,ep9301-i2s.yaml index 453d493c941f..36a320ddf534 100644 --- a/Documentation/devicetree/bindings/sound/cirrus,ep9301-i2s.yaml +++ b/Documentation/devicetree/bindings/sound/cirrus,ep9301-i2s.yaml @@ -40,6 +40,16 @@ properties: - const: sclk - const: lrclk + dmas: + items: + - description: out DMA channel + - description: in DMA channel + + dma-names: + items: + - const: tx + - const: rx + required: - compatible - '#sound-dai-cells' @@ -61,6 +71,8 @@ examples: <&syscon 30>, <&syscon 31>; clock-names = "mclk", "sclk", "lrclk"; + dmas = <&dma0 0 1>, <&dma0 0 2>; + dma-names = "tx", "rx"; }; ... From fb37c3a9c20c2b9650f283c2f4e9b656182dfee8 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:52 +0300 Subject: [PATCH 236/655] ASoC: dt-bindings: ep93xx: Document Audio Port support Document Audio Graph Port support in binding document. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andy Shevchenko Reviewed-by: Guenter Roeck Reviewed-by: Mark Brown Reviewed-by: Andy Shevchenko Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- .../devicetree/bindings/sound/cirrus,ep9301-i2s.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/sound/cirrus,ep9301-i2s.yaml b/Documentation/devicetree/bindings/sound/cirrus,ep9301-i2s.yaml index 36a320ddf534..4693e85aed37 100644 --- a/Documentation/devicetree/bindings/sound/cirrus,ep9301-i2s.yaml +++ b/Documentation/devicetree/bindings/sound/cirrus,ep9301-i2s.yaml @@ -50,6 +50,10 @@ properties: - const: tx - const: rx + port: + $ref: audio-graph-port.yaml# + unevaluatedProperties: false + required: - compatible - '#sound-dai-cells' From fae4d65a042d488c2234352e6a10e1d5ab499bcc Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 9 Sep 2024 11:10:53 +0300 Subject: [PATCH 237/655] ASoC: ep93xx: Drop legacy DMA support And rely on OF DMA. Signed-off-by: Alexander Sverdlin Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Andy Shevchenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Reviewed-by: Mark Brown Reviewed-by: Andy Shevchenko Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- sound/soc/cirrus/ep93xx-i2s.c | 19 ------------------- sound/soc/cirrus/ep93xx-pcm.c | 19 +------------------ 2 files changed, 1 insertion(+), 37 deletions(-) diff --git a/sound/soc/cirrus/ep93xx-i2s.c b/sound/soc/cirrus/ep93xx-i2s.c index 522de4b80293..9e83cdf8a943 100644 --- a/sound/soc/cirrus/ep93xx-i2s.c +++ b/sound/soc/cirrus/ep93xx-i2s.c @@ -24,7 +24,6 @@ #include #include -#include #include #include "ep93xx-pcm.h" @@ -80,19 +79,6 @@ struct ep93xx_i2s_info { struct snd_dmaengine_dai_dma_data dma_params_tx; }; -static struct ep93xx_dma_data ep93xx_i2s_dma_data[] = { - [SNDRV_PCM_STREAM_PLAYBACK] = { - .name = "i2s-pcm-out", - .port = EP93XX_DMA_I2S1, - .direction = DMA_MEM_TO_DEV, - }, - [SNDRV_PCM_STREAM_CAPTURE] = { - .name = "i2s-pcm-in", - .port = EP93XX_DMA_I2S1, - .direction = DMA_DEV_TO_MEM, - }, -}; - static inline void ep93xx_i2s_write_reg(struct ep93xx_i2s_info *info, unsigned reg, unsigned val) { @@ -198,11 +184,6 @@ static int ep93xx_i2s_dai_probe(struct snd_soc_dai *dai) { struct ep93xx_i2s_info *info = snd_soc_dai_get_drvdata(dai); - info->dma_params_tx.filter_data = - &ep93xx_i2s_dma_data[SNDRV_PCM_STREAM_PLAYBACK]; - info->dma_params_rx.filter_data = - &ep93xx_i2s_dma_data[SNDRV_PCM_STREAM_CAPTURE]; - snd_soc_dai_init_dma_data(dai, &info->dma_params_tx, &info->dma_params_rx); diff --git a/sound/soc/cirrus/ep93xx-pcm.c b/sound/soc/cirrus/ep93xx-pcm.c index fa72acd8d334..5ecb4671cbba 100644 --- a/sound/soc/cirrus/ep93xx-pcm.c +++ b/sound/soc/cirrus/ep93xx-pcm.c @@ -18,8 +18,6 @@ #include #include -#include - #include "ep93xx-pcm.h" static const struct snd_pcm_hardware ep93xx_pcm_hardware = { @@ -35,30 +33,15 @@ static const struct snd_pcm_hardware ep93xx_pcm_hardware = { .fifo_size = 32, }; -static bool ep93xx_pcm_dma_filter(struct dma_chan *chan, void *filter_param) -{ - struct ep93xx_dma_data *data = filter_param; - - if (data->direction == ep93xx_dma_chan_direction(chan)) { - chan->private = data; - return true; - } - - return false; -} - static const struct snd_dmaengine_pcm_config ep93xx_dmaengine_pcm_config = { .pcm_hardware = &ep93xx_pcm_hardware, - .compat_filter_fn = ep93xx_pcm_dma_filter, .prealloc_buffer_size = 131072, }; int devm_ep93xx_pcm_platform_register(struct device *dev) { return devm_snd_dmaengine_pcm_register(dev, - &ep93xx_dmaengine_pcm_config, - SND_DMAENGINE_PCM_FLAG_NO_DT | - SND_DMAENGINE_PCM_FLAG_COMPAT); + &ep93xx_dmaengine_pcm_config, 0); } EXPORT_SYMBOL_GPL(devm_ep93xx_pcm_platform_register); From ed5244a1d63796deb56ca1822637b94e8c7cd424 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:54 +0300 Subject: [PATCH 238/655] ARM: dts: add Cirrus EP93XX SoC .dtsi Add support for Cirrus Logic EP93XX SoC's family. Co-developed-by: Alexander Sverdlin Signed-off-by: Alexander Sverdlin Signed-off-by: Nikita Shubin Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/cirrus/ep93xx.dtsi | 444 +++++++++++++++++++++++++++ 1 file changed, 444 insertions(+) create mode 100644 arch/arm/boot/dts/cirrus/ep93xx.dtsi diff --git a/arch/arm/boot/dts/cirrus/ep93xx.dtsi b/arch/arm/boot/dts/cirrus/ep93xx.dtsi new file mode 100644 index 000000000000..0dd1eee346ca --- /dev/null +++ b/arch/arm/boot/dts/cirrus/ep93xx.dtsi @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Device Tree file for Cirrus Logic systems EP93XX SoC + */ +#include +#include +#include +#include +/ { + soc: soc { + compatible = "simple-bus"; + ranges; + #address-cells = <1>; + #size-cells = <1>; + + syscon: syscon@80930000 { + compatible = "cirrus,ep9301-syscon", "syscon"; + reg = <0x80930000 0x1000>; + + #clock-cells = <1>; + clocks = <&xtali>; + + spi_default_pins: pins-spi { + function = "spi"; + groups = "ssp"; + }; + + ac97_default_pins: pins-ac97 { + function = "ac97"; + groups = "ac97"; + }; + + i2s_on_ssp_pins: pins-i2sonssp { + function = "i2s"; + groups = "i2s_on_ssp"; + }; + + i2s_on_ac97_pins: pins-i2sonac97 { + function = "i2s"; + groups = "i2s_on_ac97"; + }; + + gpio1_default_pins: pins-gpio1 { + function = "gpio"; + groups = "gpio1agrp"; + }; + + pwm1_default_pins: pins-pwm1 { + function = "pwm"; + groups = "pwm1"; + }; + + gpio2_default_pins: pins-gpio2 { + function = "gpio"; + groups = "gpio2agrp"; + }; + + gpio3_default_pins: pins-gpio3 { + function = "gpio"; + groups = "gpio3agrp"; + }; + + keypad_default_pins: pins-keypad { + function = "keypad"; + groups = "keypadgrp"; + }; + + gpio4_default_pins: pins-gpio4 { + function = "gpio"; + groups = "gpio4agrp"; + }; + + gpio6_default_pins: pins-gpio6 { + function = "gpio"; + groups = "gpio6agrp"; + }; + + gpio7_default_pins: pins-gpio7 { + function = "gpio"; + groups = "gpio7agrp"; + }; + + ide_default_pins: pins-ide { + function = "pata"; + groups = "idegrp"; + }; + + lcd_on_dram0_pins: pins-rasteronsdram0 { + function = "lcd"; + groups = "rasteronsdram0grp"; + }; + + lcd_on_dram3_pins: pins-rasteronsdram3 { + function = "lcd"; + groups = "rasteronsdram3grp"; + }; + }; + + adc: adc@80900000 { + compatible = "cirrus,ep9301-adc"; + reg = <0x80900000 0x28>; + clocks = <&syscon EP93XX_CLK_ADC>; + interrupt-parent = <&vic0>; + interrupts = <30>; + status = "disabled"; + }; + + /* + * The EP93XX expansion bus is a set of up to 7 each up to 16MB + * windows in the 256MB space from 0x50000000 to 0x5fffffff. + * But since we don't require to setup it in any way, we can + * represent it as a simple-bus. + */ + ebi: bus@80080000 { + compatible = "simple-bus"; + reg = <0x80080000 0x20>; + native-endian; + #address-cells = <1>; + #size-cells = <1>; + ranges; + }; + + dma0: dma-controller@80000000 { + compatible = "cirrus,ep9301-dma-m2p"; + reg = <0x80000000 0x0040>, + <0x80000040 0x0040>, + <0x80000080 0x0040>, + <0x800000c0 0x0040>, + <0x80000240 0x0040>, + <0x80000200 0x0040>, + <0x800002c0 0x0040>, + <0x80000280 0x0040>, + <0x80000340 0x0040>, + <0x80000300 0x0040>; + clocks = <&syscon EP93XX_CLK_M2P0>, + <&syscon EP93XX_CLK_M2P1>, + <&syscon EP93XX_CLK_M2P2>, + <&syscon EP93XX_CLK_M2P3>, + <&syscon EP93XX_CLK_M2P4>, + <&syscon EP93XX_CLK_M2P5>, + <&syscon EP93XX_CLK_M2P6>, + <&syscon EP93XX_CLK_M2P7>, + <&syscon EP93XX_CLK_M2P8>, + <&syscon EP93XX_CLK_M2P9>; + clock-names = "m2p0", "m2p1", + "m2p2", "m2p3", + "m2p4", "m2p5", + "m2p6", "m2p7", + "m2p8", "m2p9"; + interrupt-parent = <&vic0>; + interrupts = <7>, <8>, <9>, <10>, <11>, + <12>, <13>, <14>, <15>, <16>; + #dma-cells = <2>; + }; + + dma1: dma-controller@80000100 { + compatible = "cirrus,ep9301-dma-m2m"; + reg = <0x80000100 0x0040>, + <0x80000140 0x0040>; + clocks = <&syscon EP93XX_CLK_M2M0>, + <&syscon EP93XX_CLK_M2M1>; + clock-names = "m2m0", "m2m1"; + interrupt-parent = <&vic0>; + interrupts = <17>, <18>; + #dma-cells = <2>; + }; + + eth0: ethernet@80010000 { + compatible = "cirrus,ep9301-eth"; + reg = <0x80010000 0x10000>; + interrupt-parent = <&vic1>; + interrupts = <7>; + mdio0: mdio { + #address-cells = <1>; + #size-cells = <0>; + }; + }; + + gpio0: gpio@80840000 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840000 0x04>, + <0x80840010 0x04>, + <0x80840090 0x1c>; + reg-names = "data", "dir", "intr"; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&vic1>; + interrupts = <27>; + }; + + gpio1: gpio@80840004 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840004 0x04>, + <0x80840014 0x04>, + <0x808400ac 0x1c>; + reg-names = "data", "dir", "intr"; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&vic1>; + interrupts = <27>; + }; + + gpio2: gpio@80840008 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840008 0x04>, + <0x80840018 0x04>; + reg-names = "data", "dir"; + gpio-controller; + #gpio-cells = <2>; + pinctrl-names = "default"; + pinctrl-0 = <&gpio2_default_pins>; + }; + + gpio3: gpio@8084000c { + compatible = "cirrus,ep9301-gpio"; + reg = <0x8084000c 0x04>, + <0x8084001c 0x04>; + reg-names = "data", "dir"; + gpio-controller; + #gpio-cells = <2>; + pinctrl-names = "default"; + pinctrl-0 = <&gpio3_default_pins>; + }; + + gpio4: gpio@80840020 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840020 0x04>, + <0x80840024 0x04>; + reg-names = "data", "dir"; + gpio-controller; + #gpio-cells = <2>; + pinctrl-names = "default"; + pinctrl-0 = <&gpio4_default_pins>; + }; + + gpio5: gpio@80840030 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840030 0x04>, + <0x80840034 0x04>, + <0x8084004c 0x1c>; + reg-names = "data", "dir", "intr"; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + interrupts-extended = <&vic0 19>, <&vic0 20>, + <&vic0 21>, <&vic0 22>, + <&vic1 15>, <&vic1 16>, + <&vic1 17>, <&vic1 18>; + }; + + gpio6: gpio@80840038 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840038 0x04>, + <0x8084003c 0x04>; + reg-names = "data", "dir"; + gpio-controller; + #gpio-cells = <2>; + pinctrl-names = "default"; + pinctrl-0 = <&gpio6_default_pins>; + }; + + gpio7: gpio@80840040 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840040 0x04>, + <0x80840044 0x04>; + reg-names = "data", "dir"; + gpio-controller; + #gpio-cells = <2>; + pinctrl-names = "default"; + pinctrl-0 = <&gpio7_default_pins>; + }; + + i2s: i2s@80820000 { + compatible = "cirrus,ep9301-i2s"; + reg = <0x80820000 0x100>; + #sound-dai-cells = <0>; + interrupt-parent = <&vic1>; + interrupts = <28>; + clocks = <&syscon EP93XX_CLK_I2S_MCLK>, + <&syscon EP93XX_CLK_I2S_SCLK>, + <&syscon EP93XX_CLK_I2S_LRCLK>; + clock-names = "mclk", "sclk", "lrclk"; + dmas = <&dma0 0 1>, <&dma0 0 2>; + dma-names = "tx", "rx"; + status = "disabled"; + }; + + ide: ide@800a0000 { + compatible = "cirrus,ep9312-pata"; + reg = <0x800a0000 0x38>; + interrupt-parent = <&vic1>; + interrupts = <8>; + pinctrl-names = "default"; + pinctrl-0 = <&ide_default_pins>; + status = "disabled"; + }; + + vic0: interrupt-controller@800b0000 { + compatible = "arm,pl192-vic"; + reg = <0x800b0000 0x1000>; + interrupt-controller; + #interrupt-cells = <1>; + valid-mask = <0x7ffffffc>; + valid-wakeup-mask = <0x0>; + }; + + vic1: interrupt-controller@800c0000 { + compatible = "arm,pl192-vic"; + reg = <0x800c0000 0x1000>; + interrupt-controller; + #interrupt-cells = <1>; + valid-mask = <0x1fffffff>; + valid-wakeup-mask = <0x0>; + }; + + keypad: keypad@800f0000 { + compatible = "cirrus,ep9307-keypad"; + reg = <0x800f0000 0x0c>; + interrupt-parent = <&vic0>; + interrupts = <29>; + clocks = <&syscon EP93XX_CLK_KEYPAD>; + pinctrl-names = "default"; + pinctrl-0 = <&keypad_default_pins>; + linux,keymap = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + }; + + pwm0: pwm@80910000 { + compatible = "cirrus,ep9301-pwm"; + reg = <0x80910000 0x10>; + clocks = <&syscon EP93XX_CLK_PWM>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm1: pwm@80910020 { + compatible = "cirrus,ep9301-pwm"; + reg = <0x80910020 0x10>; + clocks = <&syscon EP93XX_CLK_PWM>; + #pwm-cells = <3>; + pinctrl-names = "default"; + pinctrl-0 = <&pwm1_default_pins>; + status = "disabled"; + }; + + rtc0: rtc@80920000 { + compatible = "cirrus,ep9301-rtc"; + reg = <0x80920000 0x100>; + }; + + spi0: spi@808a0000 { + compatible = "cirrus,ep9301-spi"; + reg = <0x808a0000 0x18>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <&vic1>; + interrupts = <21>; + clocks = <&syscon EP93XX_CLK_SPI>; + pinctrl-names = "default"; + pinctrl-0 = <&spi_default_pins>; + status = "disabled"; + }; + + timer: timer@80810000 { + compatible = "cirrus,ep9301-timer"; + reg = <0x80810000 0x100>; + interrupt-parent = <&vic1>; + interrupts = <19>; + }; + + uart0: serial@808c0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x808c0000 0x1000>; + arm,primecell-periphid = <0x00041010>; + clocks = <&syscon EP93XX_CLK_UART1>, <&syscon EP93XX_CLK_UART>; + clock-names = "uartclk", "apb_pclk"; + interrupt-parent = <&vic1>; + interrupts = <20>; + status = "disabled"; + }; + + uart1: uart@808d0000 { + compatible = "arm,primecell"; + reg = <0x808d0000 0x1000>; + arm,primecell-periphid = <0x00041010>; + clocks = <&syscon EP93XX_CLK_UART2>, <&syscon EP93XX_CLK_UART>; + clock-names = "apb:uart2", "apb_pclk"; + interrupt-parent = <&vic1>; + interrupts = <22>; + status = "disabled"; + }; + + uart2: uart@808b0000 { + compatible = "arm,primecell"; + reg = <0x808b0000 0x1000>; + arm,primecell-periphid = <0x00041010>; + clocks = <&syscon EP93XX_CLK_UART3>, <&syscon EP93XX_CLK_UART>; + clock-names = "apb:uart3", "apb_pclk"; + interrupt-parent = <&vic1>; + interrupts = <23>; + status = "disabled"; + }; + + usb0: usb@80020000 { + compatible = "generic-ohci"; + reg = <0x80020000 0x10000>; + interrupt-parent = <&vic1>; + interrupts = <24>; + clocks = <&syscon EP93XX_CLK_USB>; + status = "disabled"; + }; + + watchdog0: watchdog@80940000 { + compatible = "cirrus,ep9301-wdt"; + reg = <0x80940000 0x08>; + }; + }; + + xtali: oscillator { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <14745600>; + clock-output-names = "xtali"; + }; +}; From 454b61d84484a5f761c86b89966eaac93177ca6f Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:55 +0300 Subject: [PATCH 239/655] ARM: dts: ep93xx: add ts7250 board Add device tree file for Technologic Systems ts7250 board and Liebherr bk3 board which have many in common, both are based on ep9302 SoC variant. Signed-off-by: Nikita Shubin Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/cirrus/Makefile | 3 + arch/arm/boot/dts/cirrus/ep93xx-bk3.dts | 125 ++++++++++++++++++ arch/arm/boot/dts/cirrus/ep93xx-ts7250.dts | 145 +++++++++++++++++++++ 3 files changed, 273 insertions(+) create mode 100644 arch/arm/boot/dts/cirrus/ep93xx-bk3.dts create mode 100644 arch/arm/boot/dts/cirrus/ep93xx-ts7250.dts diff --git a/arch/arm/boot/dts/cirrus/Makefile b/arch/arm/boot/dts/cirrus/Makefile index e944d3e2129d..211a7e2f2115 100644 --- a/arch/arm/boot/dts/cirrus/Makefile +++ b/arch/arm/boot/dts/cirrus/Makefile @@ -3,3 +3,6 @@ dtb-$(CONFIG_ARCH_CLPS711X) += \ ep7211-edb7211.dtb dtb-$(CONFIG_ARCH_CLPS711X) += \ ep7211-edb7211.dtb +dtb-$(CONFIG_ARCH_EP93XX) += \ + ep93xx-bk3.dtb \ + ep93xx-ts7250.dtb diff --git a/arch/arm/boot/dts/cirrus/ep93xx-bk3.dts b/arch/arm/boot/dts/cirrus/ep93xx-bk3.dts new file mode 100644 index 000000000000..40bc9b2a6ba8 --- /dev/null +++ b/arch/arm/boot/dts/cirrus/ep93xx-bk3.dts @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Device Tree file for Liebherr controller BK3.1 based on Cirrus EP9302 SoC + */ +/dts-v1/; +#include "ep93xx.dtsi" + +/ { + model = "Liebherr controller BK3.1"; + compatible = "liebherr,bk3", "cirrus,ep9301"; + #address-cells = <1>; + #size-cells = <1>; + + chosen { + }; + + memory@0 { + device_type = "memory"; + /* should be set from ATAGS */ + reg = <0x00000000 0x02000000>, + <0x000530c0 0x01fdd000>; + }; + + leds { + compatible = "gpio-leds"; + led-0 { + label = "grled"; + gpios = <&gpio4 0 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "heartbeat"; + function = LED_FUNCTION_HEARTBEAT; + }; + + led-1 { + label = "rdled"; + gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>; + function = LED_FUNCTION_FAULT; + }; + }; +}; + +&ebi { + nand-controller@60000000 { + compatible = "technologic,ts7200-nand"; + reg = <0x60000000 0x8000000>; + #address-cells = <1>; + #size-cells = <0>; + + nand@0 { + reg = <0>; + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "System"; + reg = <0x00000000 0x01e00000>; + read-only; + }; + + partition@1e00000 { + label = "Data"; + reg = <0x01e00000 0x05f20000>; + }; + + partition@7d20000 { + label = "RedBoot"; + reg = <0x07d20000 0x002e0000>; + read-only; + }; + }; + }; + }; +}; + +ð0 { + phy-handle = <&phy0>; +}; + +&i2s { + dmas = <&dma0 0 1>, <&dma0 0 2>; + dma-names = "tx", "rx"; + pinctrl-names = "default"; + pinctrl-0 = <&i2s_on_ac97_pins>; + status = "okay"; +}; + +&gpio1 { + /* PWM */ + gpio-ranges = <&syscon 6 163 1>; +}; + +&gpio4 { + gpio-ranges = <&syscon 0 97 2>; + status = "okay"; +}; + +&gpio6 { + gpio-ranges = <&syscon 0 87 2>; + status = "okay"; +}; + +&gpio7 { + gpio-ranges = <&syscon 2 199 4>; + status = "okay"; +}; + +&mdio0 { + phy0: ethernet-phy@1 { + reg = <1>; + device_type = "ethernet-phy"; + }; +}; + +&uart0 { + status = "okay"; +}; + +&uart1 { + status = "okay"; +}; + +&usb0 { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/cirrus/ep93xx-ts7250.dts b/arch/arm/boot/dts/cirrus/ep93xx-ts7250.dts new file mode 100644 index 000000000000..9e03f93d9fc8 --- /dev/null +++ b/arch/arm/boot/dts/cirrus/ep93xx-ts7250.dts @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Device Tree file for Technologic Systems ts7250 board based on Cirrus EP9302 SoC + */ +/dts-v1/; +#include "ep93xx.dtsi" + +/ { + compatible = "technologic,ts7250", "cirrus,ep9301"; + model = "TS-7250 SBC"; + #address-cells = <1>; + #size-cells = <1>; + + chosen { + }; + + memory@0 { + device_type = "memory"; + /* should be set from ATAGS */ + reg = <0x00000000 0x02000000>, + <0x000530c0 0x01fdd000>; + }; + + leds { + compatible = "gpio-leds"; + led-0 { + label = "grled"; + gpios = <&gpio4 0 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "heartbeat"; + function = LED_FUNCTION_HEARTBEAT; + }; + + led-1 { + label = "rdled"; + gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>; + function = LED_FUNCTION_FAULT; + }; + }; +}; + +&ebi { + nand-controller@60000000 { + compatible = "technologic,ts7200-nand"; + reg = <0x60000000 0x8000000>; + #address-cells = <1>; + #size-cells = <0>; + + nand@0 { + reg = <0>; + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "TS-BOOTROM"; + reg = <0x00000000 0x00020000>; + read-only; + }; + + partition@20000 { + label = "Linux"; + reg = <0x00020000 0x07d00000>; + }; + + partition@7d20000 { + label = "RedBoot"; + reg = <0x07d20000 0x002e0000>; + read-only; + }; + }; + }; + }; + + rtc@10800000 { + compatible = "st,m48t86"; + reg = <0x10800000 0x1>, + <0x11700000 0x1>; + }; + + watchdog@23800000 { + compatible = "technologic,ts7200-wdt"; + reg = <0x23800000 0x01>, + <0x23c00000 0x01>; + timeout-sec = <30>; + }; +}; + +ð0 { + phy-handle = <&phy0>; +}; + +&gpio1 { + /* PWM */ + gpio-ranges = <&syscon 6 163 1>; +}; + +/* ts7250 doesn't have GPIO Port D present */ +&gpio3 { + status = "disabled"; +}; + +&gpio4 { + gpio-ranges = <&syscon 0 97 2>; +}; + +&gpio6 { + gpio-ranges = <&syscon 0 87 2>; +}; + +&gpio7 { + gpio-ranges = <&syscon 2 199 4>; +}; + +&spi0 { + cs-gpios = <&gpio5 2 GPIO_ACTIVE_HIGH>; + dmas = <&dma1 10 2>, <&dma1 10 1>; + dma-names = "rx", "tx"; + status = "okay"; + + tmp122: temperature-sensor@0 { + compatible = "ti,tmp122"; + reg = <0>; + spi-max-frequency = <2000000>; + }; +}; + +&mdio0 { + phy0: ethernet-phy@1 { + reg = <1>; + device_type = "ethernet-phy"; + }; +}; + +&uart0 { + status = "okay"; +}; + +&uart1 { + status = "okay"; +}; + +&usb0 { + status = "okay"; +}; From bd8511fba09ae399fc6812bcf4f8d60a803a6871 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 9 Sep 2024 11:10:56 +0300 Subject: [PATCH 240/655] ARM: dts: ep93xx: Add EDB9302 DT Add device tree for Cirrus EDB9302. Signed-off-by: Alexander Sverdlin Signed-off-by: Nikita Shubin Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/cirrus/Makefile | 1 + arch/arm/boot/dts/cirrus/ep93xx-edb9302.dts | 181 ++++++++++++++++++++ 2 files changed, 182 insertions(+) create mode 100644 arch/arm/boot/dts/cirrus/ep93xx-edb9302.dts diff --git a/arch/arm/boot/dts/cirrus/Makefile b/arch/arm/boot/dts/cirrus/Makefile index 211a7e2f2115..e6015983e464 100644 --- a/arch/arm/boot/dts/cirrus/Makefile +++ b/arch/arm/boot/dts/cirrus/Makefile @@ -4,5 +4,6 @@ dtb-$(CONFIG_ARCH_CLPS711X) += \ dtb-$(CONFIG_ARCH_CLPS711X) += \ ep7211-edb7211.dtb dtb-$(CONFIG_ARCH_EP93XX) += \ + ep93xx-edb9302.dtb \ ep93xx-bk3.dtb \ ep93xx-ts7250.dtb diff --git a/arch/arm/boot/dts/cirrus/ep93xx-edb9302.dts b/arch/arm/boot/dts/cirrus/ep93xx-edb9302.dts new file mode 100644 index 000000000000..312b2be1c638 --- /dev/null +++ b/arch/arm/boot/dts/cirrus/ep93xx-edb9302.dts @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +/* + * Device Tree file for Cirrus Logic EDB9302 board based on EP9302 SoC + */ +/dts-v1/; +#include "ep93xx.dtsi" + +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cirrus,edb9302", "cirrus,ep9301"; + model = "cirrus,edb9302"; + + chosen { + }; + + memory@0 { + device_type = "memory"; + /* should be set from ATAGS */ + reg = <0x0000000 0x800000>, + <0x1000000 0x800000>, + <0x4000000 0x800000>, + <0x5000000 0x800000>; + }; + + sound { + compatible = "audio-graph-card2"; + label = "EDB93XX"; + links = <&i2s_port>; + }; + + leds { + compatible = "gpio-leds"; + led-0 { + label = "grled"; + gpios = <&gpio4 0 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "heartbeat"; + function = LED_FUNCTION_HEARTBEAT; + }; + + led-1 { + label = "rdled"; + gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>; + function = LED_FUNCTION_FAULT; + }; + }; +}; + +&adc { + status = "okay"; +}; + +&ebi { + flash@60000000 { + compatible = "cfi-flash"; + reg = <0x60000000 0x1000000>; + bank-width = <2>; + }; +}; + +ð0 { + phy-handle = <&phy0>; +}; + +&gpio0 { + gpio-ranges = <&syscon 0 153 1>, + <&syscon 1 152 1>, + <&syscon 2 151 1>, + <&syscon 3 148 1>, + <&syscon 4 147 1>, + <&syscon 5 146 1>, + <&syscon 6 145 1>, + <&syscon 7 144 1>; +}; + +&gpio1 { + gpio-ranges = <&syscon 0 143 1>, + <&syscon 1 142 1>, + <&syscon 2 141 1>, + <&syscon 3 140 1>, + <&syscon 4 165 1>, + <&syscon 5 164 1>, + <&syscon 6 163 1>, + <&syscon 7 160 1>; +}; + +&gpio2 { + gpio-ranges = <&syscon 0 115 1>; +}; + +/* edb9302 doesn't have GPIO Port D present */ +&gpio3 { + status = "disabled"; +}; + +&gpio4 { + gpio-ranges = <&syscon 0 97 2>; +}; + +&gpio5 { + gpio-ranges = <&syscon 1 170 1>, + <&syscon 2 169 1>, + <&syscon 3 168 1>; +}; + +&gpio6 { + gpio-ranges = <&syscon 0 87 2>; +}; + +&gpio7 { + gpio-ranges = <&syscon 2 199 4>; +}; + +&i2s { + pinctrl-names = "default"; + pinctrl-0 = <&i2s_on_ac97_pins>; + status = "okay"; + i2s_port: port { + i2s_ep: endpoint { + system-clock-direction-out; + frame-master; + bitclock-master; + mclk-fs = <256>; + dai-format = "i2s"; + convert-channels = <2>; + convert-sample-format = "s32_le"; + remote-endpoint = <&codec_ep>; + }; + }; +}; + +&mdio0 { + phy0: ethernet-phy@1 { + reg = <1>; + device_type = "ethernet-phy"; + }; +}; + +&spi0 { + cs-gpios = <&gpio0 6 GPIO_ACTIVE_LOW + &gpio0 7 GPIO_ACTIVE_LOW>; + dmas = <&dma1 10 2>, <&dma1 10 1>; + dma-names = "rx", "tx"; + status = "okay"; + + cs4271: codec@0 { + compatible = "cirrus,cs4271"; + reg = <0>; + #sound-dai-cells = <0>; + spi-max-frequency = <6000000>; + spi-cpol; + spi-cpha; + reset-gpios = <&gpio0 1 GPIO_ACTIVE_LOW>; + port { + codec_ep: endpoint { + remote-endpoint = <&i2s_ep>; + }; + }; + }; + + at25f1024: eeprom@1 { + compatible = "atmel,at25"; + reg = <1>; + address-width = <8>; + size = <0x20000>; + pagesize = <256>; + spi-max-frequency = <20000000>; + }; +}; + +&uart0 { + status = "okay"; +}; + +&uart1 { + status = "okay"; +}; + +&usb0 { + status = "okay"; +}; From 046322f1e1d9879b8a598a0c57fcb81f87fd3f59 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:57 +0300 Subject: [PATCH 241/655] ARM: ep93xx: DT for the Cirrus ep93xx SoC platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add compulsory device tree support to the Cirrus ep93xx ARMv4 platform. - select PINCTRL_EP93xx - select COMMON_CLK_EP93XX, as clock driver moved out of platform code - select ARCH_HAS_RESET_CONTROLLER Select ARM_ATAG_DTB_COMPAT to update device tree with information about memory passed from bootloader. We have to leave all MACH options as they are used for board checking before decomp, to turn off watchdog and ethernet DMA. Tested-by: Alexander Sverdlin Signed-off-by: Nikita Shubin Tested-by: Michael Peters Reviewed-by: Andy Shevchenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Reviewed-by: Mark Brown Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Reviewed-by: Kris Bahnsen Reviewed-by: Andrew Lunn Reviewed-by: Sergey Shtylyov Acked-by: Miquel Raynal Acked-by: Alexander Sverdlin Acked-by: Uwe Kleine-König Acked-by: Damien Le Moal Acked-by: Sebastian Reichel Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- arch/arm/Makefile | 1 - arch/arm/mach-ep93xx/Kconfig | 20 ++++++++++---------- arch/arm/mach-ep93xx/Makefile | 11 ----------- 3 files changed, 10 insertions(+), 22 deletions(-) delete mode 100644 arch/arm/mach-ep93xx/Makefile diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 71afdd98ddf2..aafebf145738 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -183,7 +183,6 @@ machine-$(CONFIG_ARCH_CLPS711X) += clps711x machine-$(CONFIG_ARCH_DAVINCI) += davinci machine-$(CONFIG_ARCH_DIGICOLOR) += digicolor machine-$(CONFIG_ARCH_DOVE) += dove -machine-$(CONFIG_ARCH_EP93XX) += ep93xx machine-$(CONFIG_ARCH_EXYNOS) += exynos machine-$(CONFIG_ARCH_FOOTBRIDGE) += footbridge machine-$(CONFIG_ARCH_GEMINI) += gemini diff --git a/arch/arm/mach-ep93xx/Kconfig b/arch/arm/mach-ep93xx/Kconfig index 703f3d232a60..812b71dcf60e 100644 --- a/arch/arm/mach-ep93xx/Kconfig +++ b/arch/arm/mach-ep93xx/Kconfig @@ -3,27 +3,27 @@ menuconfig ARCH_EP93XX bool "EP93xx-based" depends on ATAGS depends on ARCH_MULTI_V4T + # CONFIG_ARCH_MULTI_V7 is not set depends on CPU_LITTLE_ENDIAN + select ARCH_HAS_RESET_CONTROLLER select ARCH_SPARSEMEM_ENABLE select ARM_AMBA select ARM_VIC + select ARM_APPENDED_DTB # Old Redboot bootloaders deployed + select ARM_ATAG_DTB_COMPAT # we need this to update dt memory node + select COMMON_CLK_EP93XX + select EP93XX_TIMER select CLKSRC_MMIO select CPU_ARM920T select GPIOLIB + select PINCTRL + select PINCTRL_EP93XX help This enables support for the Cirrus EP93xx series of CPUs. if ARCH_EP93XX -menu "Cirrus EP93xx Implementation Options" - -config EP93XX_SOC_COMMON - bool - default y - select SOC_BUS - select LEDS_GPIO_REGISTER - -comment "EP93xx Platforms" +# menu "EP93xx Platforms" config MACH_BK3 bool "Support Liebherr BK3.1" @@ -103,6 +103,6 @@ config MACH_VISION_EP9307 Say 'Y' here if you want your kernel to support the Vision Engraving Systems EP9307 SoM. -endmenu +# endmenu endif diff --git a/arch/arm/mach-ep93xx/Makefile b/arch/arm/mach-ep93xx/Makefile deleted file mode 100644 index 62e37403df14..000000000000 --- a/arch/arm/mach-ep93xx/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the linux kernel. -# -obj-y := core.o clock.o timer-ep93xx.o - -obj-$(CONFIG_EP93XX_DMA) += dma.o - -obj-$(CONFIG_MACH_EDB93XX) += edb93xx.o -obj-$(CONFIG_MACH_TS72XX) += ts72xx.o -obj-$(CONFIG_MACH_VISION_EP9307)+= vision_ep9307.o From a48ac3dc569771c18fcafbc8351d820cc343c54a Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:58 +0300 Subject: [PATCH 242/655] pwm: ep93xx: drop legacy pinctrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop legacy gpio request/free since we are using pinctrl for this now. Signed-off-by: Nikita Shubin Acked-by: Uwe Kleine-König Acked-by: Thierry Reding Acked-by: Linus Walleij Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- arch/arm/mach-ep93xx/core.c | 42 ------------------------------- drivers/pwm/pwm-ep93xx.c | 18 ------------- include/linux/soc/cirrus/ep93xx.h | 4 --- 3 files changed, 64 deletions(-) diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index b99c46d22c4d..4ddf1a4cba33 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -577,48 +577,6 @@ void __init ep93xx_register_pwm(int pwm0, int pwm1) platform_device_register(&ep93xx_pwm1_device); } -int ep93xx_pwm_acquire_gpio(struct platform_device *pdev) -{ - int err; - - if (pdev->id == 0) { - err = 0; - } else if (pdev->id == 1) { - err = gpio_request(EP93XX_GPIO_LINE_EGPIO14, - dev_name(&pdev->dev)); - if (err) - return err; - err = gpio_direction_output(EP93XX_GPIO_LINE_EGPIO14, 0); - if (err) - goto fail; - - /* PWM 1 output on EGPIO[14] */ - ep93xx_devcfg_set_bits(EP93XX_SYSCON_DEVCFG_PONG); - } else { - err = -ENODEV; - } - - return err; - -fail: - gpio_free(EP93XX_GPIO_LINE_EGPIO14); - return err; -} -EXPORT_SYMBOL(ep93xx_pwm_acquire_gpio); - -void ep93xx_pwm_release_gpio(struct platform_device *pdev) -{ - if (pdev->id == 1) { - gpio_direction_input(EP93XX_GPIO_LINE_EGPIO14); - gpio_free(EP93XX_GPIO_LINE_EGPIO14); - - /* EGPIO[14] used for GPIO */ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_PONG); - } -} -EXPORT_SYMBOL(ep93xx_pwm_release_gpio); - - /************************************************************************* * EP93xx video peripheral handling *************************************************************************/ diff --git a/drivers/pwm/pwm-ep93xx.c b/drivers/pwm/pwm-ep93xx.c index 67c3fdbf7ae3..994f89ac43b4 100644 --- a/drivers/pwm/pwm-ep93xx.c +++ b/drivers/pwm/pwm-ep93xx.c @@ -27,8 +27,6 @@ #include -#include /* for ep93xx_pwm_{acquire,release}_gpio() */ - #define EP93XX_PWMx_TERM_COUNT 0x00 #define EP93XX_PWMx_DUTY_CYCLE 0x04 #define EP93XX_PWMx_ENABLE 0x08 @@ -44,20 +42,6 @@ static inline struct ep93xx_pwm *to_ep93xx_pwm(struct pwm_chip *chip) return pwmchip_get_drvdata(chip); } -static int ep93xx_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) -{ - struct platform_device *pdev = to_platform_device(pwmchip_parent(chip)); - - return ep93xx_pwm_acquire_gpio(pdev); -} - -static void ep93xx_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) -{ - struct platform_device *pdev = to_platform_device(pwmchip_parent(chip)); - - ep93xx_pwm_release_gpio(pdev); -} - static int ep93xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { @@ -156,8 +140,6 @@ static int ep93xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, } static const struct pwm_ops ep93xx_pwm_ops = { - .request = ep93xx_pwm_request, - .free = ep93xx_pwm_free, .apply = ep93xx_pwm_apply, }; diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h index 8942bfaf1545..f6376edc1b33 100644 --- a/include/linux/soc/cirrus/ep93xx.h +++ b/include/linux/soc/cirrus/ep93xx.h @@ -37,8 +37,6 @@ struct ep93xx_regmap_adev { container_of((_adev), struct ep93xx_regmap_adev, adev) #ifdef CONFIG_ARCH_EP93XX -int ep93xx_pwm_acquire_gpio(struct platform_device *pdev); -void ep93xx_pwm_release_gpio(struct platform_device *pdev); int ep93xx_ide_acquire_gpio(struct platform_device *pdev); void ep93xx_ide_release_gpio(struct platform_device *pdev); int ep93xx_i2s_acquire(void); @@ -46,8 +44,6 @@ void ep93xx_i2s_release(void); unsigned int ep93xx_chip_revision(void); #else -static inline int ep93xx_pwm_acquire_gpio(struct platform_device *pdev) { return 0; } -static inline void ep93xx_pwm_release_gpio(struct platform_device *pdev) {} static inline int ep93xx_ide_acquire_gpio(struct platform_device *pdev) { return 0; } static inline void ep93xx_ide_release_gpio(struct platform_device *pdev) {} static inline int ep93xx_i2s_acquire(void) { return 0; } From a632229be268dde8f6d407638b5cfba8b78201d6 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:59 +0300 Subject: [PATCH 243/655] ata: pata_ep93xx: remove legacy pinctrl use Drop legacy acquire/release since we are using pinctrl for this now. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Sergey Shtylyov Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko Reviewed-by: Mark Brown Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andy Shevchenko Acked-by: Damien Le Moal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- arch/arm/mach-ep93xx/core.c | 72 ------------------------------- drivers/ata/pata_ep93xx.c | 25 +++-------- include/linux/soc/cirrus/ep93xx.h | 4 -- 3 files changed, 6 insertions(+), 95 deletions(-) diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 4ddf1a4cba33..9c6154bb37b5 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -779,78 +779,6 @@ void __init ep93xx_register_ide(void) platform_device_register(&ep93xx_ide_device); } -int ep93xx_ide_acquire_gpio(struct platform_device *pdev) -{ - int err; - int i; - - err = gpio_request(EP93XX_GPIO_LINE_EGPIO2, dev_name(&pdev->dev)); - if (err) - return err; - err = gpio_request(EP93XX_GPIO_LINE_EGPIO15, dev_name(&pdev->dev)); - if (err) - goto fail_egpio15; - for (i = 2; i < 8; i++) { - err = gpio_request(EP93XX_GPIO_LINE_E(i), dev_name(&pdev->dev)); - if (err) - goto fail_gpio_e; - } - for (i = 4; i < 8; i++) { - err = gpio_request(EP93XX_GPIO_LINE_G(i), dev_name(&pdev->dev)); - if (err) - goto fail_gpio_g; - } - for (i = 0; i < 8; i++) { - err = gpio_request(EP93XX_GPIO_LINE_H(i), dev_name(&pdev->dev)); - if (err) - goto fail_gpio_h; - } - - /* GPIO ports E[7:2], G[7:4] and H used by IDE */ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_EONIDE | - EP93XX_SYSCON_DEVCFG_GONIDE | - EP93XX_SYSCON_DEVCFG_HONIDE); - return 0; - -fail_gpio_h: - for (--i; i >= 0; --i) - gpio_free(EP93XX_GPIO_LINE_H(i)); - i = 8; -fail_gpio_g: - for (--i; i >= 4; --i) - gpio_free(EP93XX_GPIO_LINE_G(i)); - i = 8; -fail_gpio_e: - for (--i; i >= 2; --i) - gpio_free(EP93XX_GPIO_LINE_E(i)); - gpio_free(EP93XX_GPIO_LINE_EGPIO15); -fail_egpio15: - gpio_free(EP93XX_GPIO_LINE_EGPIO2); - return err; -} -EXPORT_SYMBOL(ep93xx_ide_acquire_gpio); - -void ep93xx_ide_release_gpio(struct platform_device *pdev) -{ - int i; - - for (i = 2; i < 8; i++) - gpio_free(EP93XX_GPIO_LINE_E(i)); - for (i = 4; i < 8; i++) - gpio_free(EP93XX_GPIO_LINE_G(i)); - for (i = 0; i < 8; i++) - gpio_free(EP93XX_GPIO_LINE_H(i)); - gpio_free(EP93XX_GPIO_LINE_EGPIO15); - gpio_free(EP93XX_GPIO_LINE_EGPIO2); - - - /* GPIO ports E[7:2], G[7:4] and H used by GPIO */ - ep93xx_devcfg_set_bits(EP93XX_SYSCON_DEVCFG_EONIDE | - EP93XX_SYSCON_DEVCFG_GONIDE | - EP93XX_SYSCON_DEVCFG_HONIDE); -} -EXPORT_SYMBOL(ep93xx_ide_release_gpio); - /************************************************************************* * EP93xx ADC *************************************************************************/ diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c index 13246a92e29f..a8555f630097 100644 --- a/drivers/ata/pata_ep93xx.c +++ b/drivers/ata/pata_ep93xx.c @@ -922,28 +922,18 @@ static int ep93xx_pata_probe(struct platform_device *pdev) void __iomem *ide_base; int err; - err = ep93xx_ide_acquire_gpio(pdev); - if (err) - return err; - /* INT[3] (IRQ_EP93XX_EXT3) line connected as pull down */ irq = platform_get_irq(pdev, 0); - if (irq < 0) { - err = irq; - goto err_rel_gpio; - } + if (irq < 0) + return irq; ide_base = devm_platform_get_and_ioremap_resource(pdev, 0, &mem_res); - if (IS_ERR(ide_base)) { - err = PTR_ERR(ide_base); - goto err_rel_gpio; - } + if (IS_ERR(ide_base)) + return PTR_ERR(ide_base); drv_data = devm_kzalloc(&pdev->dev, sizeof(*drv_data), GFP_KERNEL); - if (!drv_data) { - err = -ENOMEM; - goto err_rel_gpio; - } + if (!drv_data) + return -ENOMEM; drv_data->pdev = pdev; drv_data->ide_base = ide_base; @@ -1002,8 +992,6 @@ static int ep93xx_pata_probe(struct platform_device *pdev) err_rel_dma: ep93xx_pata_release_dma(drv_data); -err_rel_gpio: - ep93xx_ide_release_gpio(pdev); return err; } @@ -1015,7 +1003,6 @@ static void ep93xx_pata_remove(struct platform_device *pdev) ata_host_detach(host); ep93xx_pata_release_dma(drv_data); ep93xx_pata_clear_regs(drv_data->ide_base); - ep93xx_ide_release_gpio(pdev); } static const struct of_device_id ep93xx_pata_of_ids[] = { diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h index f6376edc1b33..142c33a2d7db 100644 --- a/include/linux/soc/cirrus/ep93xx.h +++ b/include/linux/soc/cirrus/ep93xx.h @@ -37,15 +37,11 @@ struct ep93xx_regmap_adev { container_of((_adev), struct ep93xx_regmap_adev, adev) #ifdef CONFIG_ARCH_EP93XX -int ep93xx_ide_acquire_gpio(struct platform_device *pdev); -void ep93xx_ide_release_gpio(struct platform_device *pdev); int ep93xx_i2s_acquire(void); void ep93xx_i2s_release(void); unsigned int ep93xx_chip_revision(void); #else -static inline int ep93xx_ide_acquire_gpio(struct platform_device *pdev) { return 0; } -static inline void ep93xx_ide_release_gpio(struct platform_device *pdev) {} static inline int ep93xx_i2s_acquire(void) { return 0; } static inline void ep93xx_i2s_release(void) {} static inline unsigned int ep93xx_chip_revision(void) { return 0; } From e5ef574dda702e62081d5c7991949cbdb7f65c08 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:11:00 +0300 Subject: [PATCH 244/655] ARM: ep93xx: delete all boardfiles Delete the ep93xx board files. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- arch/arm/mach-ep93xx/clock.c | 733 ----------------- arch/arm/mach-ep93xx/core.c | 955 ----------------------- arch/arm/mach-ep93xx/dma.c | 114 --- arch/arm/mach-ep93xx/edb93xx.c | 368 --------- arch/arm/mach-ep93xx/ep93xx-regs.h | 38 - arch/arm/mach-ep93xx/gpio-ep93xx.h | 111 --- arch/arm/mach-ep93xx/hardware.h | 25 - arch/arm/mach-ep93xx/irqs.h | 76 -- arch/arm/mach-ep93xx/platform.h | 42 - arch/arm/mach-ep93xx/soc.h | 212 ----- arch/arm/mach-ep93xx/timer-ep93xx.c | 143 ---- arch/arm/mach-ep93xx/ts72xx.c | 422 ---------- arch/arm/mach-ep93xx/ts72xx.h | 94 --- arch/arm/mach-ep93xx/vision_ep9307.c | 321 -------- include/linux/platform_data/spi-ep93xx.h | 15 - 15 files changed, 3669 deletions(-) delete mode 100644 arch/arm/mach-ep93xx/clock.c delete mode 100644 arch/arm/mach-ep93xx/core.c delete mode 100644 arch/arm/mach-ep93xx/dma.c delete mode 100644 arch/arm/mach-ep93xx/edb93xx.c delete mode 100644 arch/arm/mach-ep93xx/ep93xx-regs.h delete mode 100644 arch/arm/mach-ep93xx/gpio-ep93xx.h delete mode 100644 arch/arm/mach-ep93xx/hardware.h delete mode 100644 arch/arm/mach-ep93xx/irqs.h delete mode 100644 arch/arm/mach-ep93xx/platform.h delete mode 100644 arch/arm/mach-ep93xx/soc.h delete mode 100644 arch/arm/mach-ep93xx/timer-ep93xx.c delete mode 100644 arch/arm/mach-ep93xx/ts72xx.c delete mode 100644 arch/arm/mach-ep93xx/ts72xx.h delete mode 100644 arch/arm/mach-ep93xx/vision_ep9307.c delete mode 100644 include/linux/platform_data/spi-ep93xx.h diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c deleted file mode 100644 index e9f72a529b50..000000000000 --- a/arch/arm/mach-ep93xx/clock.c +++ /dev/null @@ -1,733 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/arm/mach-ep93xx/clock.c - * Clock control for Cirrus EP93xx chips. - * - * Copyright (C) 2006 Lennert Buytenhek - */ - -#define pr_fmt(fmt) "ep93xx " KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "hardware.h" - -#include - -#include "soc.h" - -static DEFINE_SPINLOCK(clk_lock); - -static char fclk_divisors[] = { 1, 2, 4, 8, 16, 1, 1, 1 }; -static char hclk_divisors[] = { 1, 2, 4, 5, 6, 8, 16, 32 }; -static char pclk_divisors[] = { 1, 2, 4, 8 }; - -static char adc_divisors[] = { 16, 4 }; -static char sclk_divisors[] = { 2, 4 }; -static char lrclk_divisors[] = { 32, 64, 128 }; - -static const char * const mux_parents[] = { - "xtali", - "pll1", - "pll2" -}; - -/* - * PLL rate = 14.7456 MHz * (X1FBD + 1) * (X2FBD + 1) / (X2IPD + 1) / 2^PS - */ -static unsigned long calc_pll_rate(unsigned long long rate, u32 config_word) -{ - int i; - - rate *= ((config_word >> 11) & 0x1f) + 1; /* X1FBD */ - rate *= ((config_word >> 5) & 0x3f) + 1; /* X2FBD */ - do_div(rate, (config_word & 0x1f) + 1); /* X2IPD */ - for (i = 0; i < ((config_word >> 16) & 3); i++) /* PS */ - rate >>= 1; - - return (unsigned long)rate; -} - -struct clk_psc { - struct clk_hw hw; - void __iomem *reg; - u8 bit_idx; - u32 mask; - u8 shift; - u8 width; - char *div; - u8 num_div; - spinlock_t *lock; -}; - -#define to_clk_psc(_hw) container_of(_hw, struct clk_psc, hw) - -static int ep93xx_clk_is_enabled(struct clk_hw *hw) -{ - struct clk_psc *psc = to_clk_psc(hw); - u32 val = readl(psc->reg); - - return (val & BIT(psc->bit_idx)) ? 1 : 0; -} - -static int ep93xx_clk_enable(struct clk_hw *hw) -{ - struct clk_psc *psc = to_clk_psc(hw); - unsigned long flags = 0; - u32 val; - - if (psc->lock) - spin_lock_irqsave(psc->lock, flags); - - val = __raw_readl(psc->reg); - val |= BIT(psc->bit_idx); - - ep93xx_syscon_swlocked_write(val, psc->reg); - - if (psc->lock) - spin_unlock_irqrestore(psc->lock, flags); - - return 0; -} - -static void ep93xx_clk_disable(struct clk_hw *hw) -{ - struct clk_psc *psc = to_clk_psc(hw); - unsigned long flags = 0; - u32 val; - - if (psc->lock) - spin_lock_irqsave(psc->lock, flags); - - val = __raw_readl(psc->reg); - val &= ~BIT(psc->bit_idx); - - ep93xx_syscon_swlocked_write(val, psc->reg); - - if (psc->lock) - spin_unlock_irqrestore(psc->lock, flags); -} - -static const struct clk_ops clk_ep93xx_gate_ops = { - .enable = ep93xx_clk_enable, - .disable = ep93xx_clk_disable, - .is_enabled = ep93xx_clk_is_enabled, -}; - -static struct clk_hw *ep93xx_clk_register_gate(const char *name, - const char *parent_name, - void __iomem *reg, - u8 bit_idx) -{ - struct clk_init_data init; - struct clk_psc *psc; - struct clk *clk; - - psc = kzalloc(sizeof(*psc), GFP_KERNEL); - if (!psc) - return ERR_PTR(-ENOMEM); - - init.name = name; - init.ops = &clk_ep93xx_gate_ops; - init.flags = CLK_SET_RATE_PARENT; - init.parent_names = (parent_name ? &parent_name : NULL); - init.num_parents = (parent_name ? 1 : 0); - - psc->reg = reg; - psc->bit_idx = bit_idx; - psc->hw.init = &init; - psc->lock = &clk_lock; - - clk = clk_register(NULL, &psc->hw); - if (IS_ERR(clk)) { - kfree(psc); - return ERR_CAST(clk); - } - - return &psc->hw; -} - -static u8 ep93xx_mux_get_parent(struct clk_hw *hw) -{ - struct clk_psc *psc = to_clk_psc(hw); - u32 val = __raw_readl(psc->reg); - - if (!(val & EP93XX_SYSCON_CLKDIV_ESEL)) - return 0; - - if (!(val & EP93XX_SYSCON_CLKDIV_PSEL)) - return 1; - - return 2; -} - -static int ep93xx_mux_set_parent_lock(struct clk_hw *hw, u8 index) -{ - struct clk_psc *psc = to_clk_psc(hw); - unsigned long flags = 0; - u32 val; - - if (index >= ARRAY_SIZE(mux_parents)) - return -EINVAL; - - if (psc->lock) - spin_lock_irqsave(psc->lock, flags); - - val = __raw_readl(psc->reg); - val &= ~(EP93XX_SYSCON_CLKDIV_ESEL | EP93XX_SYSCON_CLKDIV_PSEL); - - - if (index != 0) { - val |= EP93XX_SYSCON_CLKDIV_ESEL; - val |= (index - 1) ? EP93XX_SYSCON_CLKDIV_PSEL : 0; - } - - ep93xx_syscon_swlocked_write(val, psc->reg); - - if (psc->lock) - spin_unlock_irqrestore(psc->lock, flags); - - return 0; -} - -static bool is_best(unsigned long rate, unsigned long now, - unsigned long best) -{ - return abs(rate - now) < abs(rate - best); -} - -static int ep93xx_mux_determine_rate(struct clk_hw *hw, - struct clk_rate_request *req) -{ - unsigned long rate = req->rate; - struct clk *best_parent = NULL; - unsigned long __parent_rate; - unsigned long best_rate = 0, actual_rate, mclk_rate; - unsigned long best_parent_rate; - int __div = 0, __pdiv = 0; - int i; - - /* - * Try the two pll's and the external clock - * Because the valid predividers are 2, 2.5 and 3, we multiply - * all the clocks by 2 to avoid floating point math. - * - * This is based on the algorithm in the ep93xx raster guide: - * http://be-a-maverick.com/en/pubs/appNote/AN269REV1.pdf - * - */ - for (i = 0; i < ARRAY_SIZE(mux_parents); i++) { - struct clk *parent = clk_get_sys(mux_parents[i], NULL); - - __parent_rate = clk_get_rate(parent); - mclk_rate = __parent_rate * 2; - - /* Try each predivider value */ - for (__pdiv = 4; __pdiv <= 6; __pdiv++) { - __div = mclk_rate / (rate * __pdiv); - if (__div < 2 || __div > 127) - continue; - - actual_rate = mclk_rate / (__pdiv * __div); - if (is_best(rate, actual_rate, best_rate)) { - best_rate = actual_rate; - best_parent_rate = __parent_rate; - best_parent = parent; - } - } - } - - if (!best_parent) - return -EINVAL; - - req->best_parent_rate = best_parent_rate; - req->best_parent_hw = __clk_get_hw(best_parent); - req->rate = best_rate; - - return 0; -} - -static unsigned long ep93xx_ddiv_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) -{ - struct clk_psc *psc = to_clk_psc(hw); - unsigned long rate = 0; - u32 val = __raw_readl(psc->reg); - int __pdiv = ((val >> EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) & 0x03); - int __div = val & 0x7f; - - if (__div > 0) - rate = (parent_rate * 2) / ((__pdiv + 3) * __div); - - return rate; -} - -static int ep93xx_ddiv_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate) -{ - struct clk_psc *psc = to_clk_psc(hw); - int pdiv = 0, div = 0; - unsigned long best_rate = 0, actual_rate, mclk_rate; - int __div = 0, __pdiv = 0; - u32 val; - - mclk_rate = parent_rate * 2; - - for (__pdiv = 4; __pdiv <= 6; __pdiv++) { - __div = mclk_rate / (rate * __pdiv); - if (__div < 2 || __div > 127) - continue; - - actual_rate = mclk_rate / (__pdiv * __div); - if (is_best(rate, actual_rate, best_rate)) { - pdiv = __pdiv - 3; - div = __div; - best_rate = actual_rate; - } - } - - if (!best_rate) - return -EINVAL; - - val = __raw_readl(psc->reg); - - /* Clear old dividers */ - val &= ~0x37f; - - /* Set the new pdiv and div bits for the new clock rate */ - val |= (pdiv << EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) | div; - ep93xx_syscon_swlocked_write(val, psc->reg); - - return 0; -} - -static const struct clk_ops clk_ddiv_ops = { - .enable = ep93xx_clk_enable, - .disable = ep93xx_clk_disable, - .is_enabled = ep93xx_clk_is_enabled, - .get_parent = ep93xx_mux_get_parent, - .set_parent = ep93xx_mux_set_parent_lock, - .determine_rate = ep93xx_mux_determine_rate, - .recalc_rate = ep93xx_ddiv_recalc_rate, - .set_rate = ep93xx_ddiv_set_rate, -}; - -static struct clk_hw *clk_hw_register_ddiv(const char *name, - void __iomem *reg, - u8 bit_idx) -{ - struct clk_init_data init; - struct clk_psc *psc; - struct clk *clk; - - psc = kzalloc(sizeof(*psc), GFP_KERNEL); - if (!psc) - return ERR_PTR(-ENOMEM); - - init.name = name; - init.ops = &clk_ddiv_ops; - init.flags = 0; - init.parent_names = mux_parents; - init.num_parents = ARRAY_SIZE(mux_parents); - - psc->reg = reg; - psc->bit_idx = bit_idx; - psc->lock = &clk_lock; - psc->hw.init = &init; - - clk = clk_register(NULL, &psc->hw); - if (IS_ERR(clk)) { - kfree(psc); - return ERR_CAST(clk); - } - return &psc->hw; -} - -static unsigned long ep93xx_div_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) -{ - struct clk_psc *psc = to_clk_psc(hw); - u32 val = __raw_readl(psc->reg); - u8 index = (val & psc->mask) >> psc->shift; - - if (index >= psc->num_div) - return 0; - - return DIV_ROUND_UP_ULL(parent_rate, psc->div[index]); -} - -static long ep93xx_div_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) -{ - struct clk_psc *psc = to_clk_psc(hw); - unsigned long best = 0, now, maxdiv; - int i; - - maxdiv = psc->div[psc->num_div - 1]; - - for (i = 0; i < psc->num_div; i++) { - if ((rate * psc->div[i]) == *parent_rate) - return DIV_ROUND_UP_ULL((u64)*parent_rate, psc->div[i]); - - now = DIV_ROUND_UP_ULL((u64)*parent_rate, psc->div[i]); - - if (is_best(rate, now, best)) - best = now; - } - - if (!best) - best = DIV_ROUND_UP_ULL(*parent_rate, maxdiv); - - return best; -} - -static int ep93xx_div_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate) -{ - struct clk_psc *psc = to_clk_psc(hw); - u32 val = __raw_readl(psc->reg) & ~psc->mask; - int i; - - for (i = 0; i < psc->num_div; i++) - if (rate == parent_rate / psc->div[i]) { - val |= i << psc->shift; - break; - } - - if (i == psc->num_div) - return -EINVAL; - - ep93xx_syscon_swlocked_write(val, psc->reg); - - return 0; -} - -static const struct clk_ops ep93xx_div_ops = { - .enable = ep93xx_clk_enable, - .disable = ep93xx_clk_disable, - .is_enabled = ep93xx_clk_is_enabled, - .recalc_rate = ep93xx_div_recalc_rate, - .round_rate = ep93xx_div_round_rate, - .set_rate = ep93xx_div_set_rate, -}; - -static struct clk_hw *clk_hw_register_div(const char *name, - const char *parent_name, - void __iomem *reg, - u8 enable_bit, - u8 shift, - u8 width, - char *clk_divisors, - u8 num_div) -{ - struct clk_init_data init; - struct clk_psc *psc; - struct clk *clk; - - psc = kzalloc(sizeof(*psc), GFP_KERNEL); - if (!psc) - return ERR_PTR(-ENOMEM); - - init.name = name; - init.ops = &ep93xx_div_ops; - init.flags = 0; - init.parent_names = (parent_name ? &parent_name : NULL); - init.num_parents = 1; - - psc->reg = reg; - psc->bit_idx = enable_bit; - psc->mask = GENMASK(shift + width - 1, shift); - psc->shift = shift; - psc->div = clk_divisors; - psc->num_div = num_div; - psc->lock = &clk_lock; - psc->hw.init = &init; - - clk = clk_register(NULL, &psc->hw); - if (IS_ERR(clk)) { - kfree(psc); - return ERR_CAST(clk); - } - return &psc->hw; -} - -struct ep93xx_gate { - unsigned int bit; - const char *dev_id; - const char *con_id; -}; - -static struct ep93xx_gate ep93xx_uarts[] = { - {EP93XX_SYSCON_DEVCFG_U1EN, "apb:uart1", NULL}, - {EP93XX_SYSCON_DEVCFG_U2EN, "apb:uart2", NULL}, - {EP93XX_SYSCON_DEVCFG_U3EN, "apb:uart3", NULL}, -}; - -static void __init ep93xx_uart_clock_init(void) -{ - unsigned int i; - struct clk_hw *hw; - u32 value; - unsigned int clk_uart_div; - - value = __raw_readl(EP93XX_SYSCON_PWRCNT); - if (value & EP93XX_SYSCON_PWRCNT_UARTBAUD) - clk_uart_div = 1; - else - clk_uart_div = 2; - - hw = clk_hw_register_fixed_factor(NULL, "uart", "xtali", 0, 1, clk_uart_div); - - /* parenting uart gate clocks to uart clock */ - for (i = 0; i < ARRAY_SIZE(ep93xx_uarts); i++) { - hw = ep93xx_clk_register_gate(ep93xx_uarts[i].dev_id, - "uart", - EP93XX_SYSCON_DEVCFG, - ep93xx_uarts[i].bit); - - clk_hw_register_clkdev(hw, NULL, ep93xx_uarts[i].dev_id); - } -} - -static struct ep93xx_gate ep93xx_dmas[] = { - {EP93XX_SYSCON_PWRCNT_DMA_M2P0, NULL, "m2p0"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P1, NULL, "m2p1"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P2, NULL, "m2p2"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P3, NULL, "m2p3"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P4, NULL, "m2p4"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P5, NULL, "m2p5"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P6, NULL, "m2p6"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P7, NULL, "m2p7"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P8, NULL, "m2p8"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P9, NULL, "m2p9"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2M0, NULL, "m2m0"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2M1, NULL, "m2m1"}, -}; - -static void __init ep93xx_dma_clock_init(void) -{ - unsigned int i; - struct clk_hw *hw; - int ret; - - for (i = 0; i < ARRAY_SIZE(ep93xx_dmas); i++) { - hw = clk_hw_register_gate(NULL, ep93xx_dmas[i].con_id, - "hclk", 0, - EP93XX_SYSCON_PWRCNT, - ep93xx_dmas[i].bit, - 0, - &clk_lock); - - ret = clk_hw_register_clkdev(hw, ep93xx_dmas[i].con_id, NULL); - if (ret) - pr_err("%s: failed to register lookup %s\n", - __func__, ep93xx_dmas[i].con_id); - } -} - -static int __init ep93xx_clock_init(void) -{ - u32 value; - struct clk_hw *hw; - unsigned long clk_pll1_rate; - unsigned long clk_f_rate; - unsigned long clk_h_rate; - unsigned long clk_p_rate; - unsigned long clk_pll2_rate; - unsigned int clk_f_div; - unsigned int clk_h_div; - unsigned int clk_p_div; - unsigned int clk_usb_div; - unsigned long clk_spi_div; - - hw = clk_hw_register_fixed_rate(NULL, "xtali", NULL, 0, EP93XX_EXT_CLK_RATE); - clk_hw_register_clkdev(hw, NULL, "xtali"); - - /* Determine the bootloader configured pll1 rate */ - value = __raw_readl(EP93XX_SYSCON_CLKSET1); - if (!(value & EP93XX_SYSCON_CLKSET1_NBYP1)) - clk_pll1_rate = EP93XX_EXT_CLK_RATE; - else - clk_pll1_rate = calc_pll_rate(EP93XX_EXT_CLK_RATE, value); - - hw = clk_hw_register_fixed_rate(NULL, "pll1", "xtali", 0, clk_pll1_rate); - clk_hw_register_clkdev(hw, NULL, "pll1"); - - /* Initialize the pll1 derived clocks */ - clk_f_div = fclk_divisors[(value >> 25) & 0x7]; - clk_h_div = hclk_divisors[(value >> 20) & 0x7]; - clk_p_div = pclk_divisors[(value >> 18) & 0x3]; - - hw = clk_hw_register_fixed_factor(NULL, "fclk", "pll1", 0, 1, clk_f_div); - clk_f_rate = clk_get_rate(hw->clk); - hw = clk_hw_register_fixed_factor(NULL, "hclk", "pll1", 0, 1, clk_h_div); - clk_h_rate = clk_get_rate(hw->clk); - hw = clk_hw_register_fixed_factor(NULL, "pclk", "hclk", 0, 1, clk_p_div); - clk_p_rate = clk_get_rate(hw->clk); - - clk_hw_register_clkdev(hw, "apb_pclk", NULL); - - ep93xx_dma_clock_init(); - - /* Determine the bootloader configured pll2 rate */ - value = __raw_readl(EP93XX_SYSCON_CLKSET2); - if (!(value & EP93XX_SYSCON_CLKSET2_NBYP2)) - clk_pll2_rate = EP93XX_EXT_CLK_RATE; - else if (value & EP93XX_SYSCON_CLKSET2_PLL2_EN) - clk_pll2_rate = calc_pll_rate(EP93XX_EXT_CLK_RATE, value); - else - clk_pll2_rate = 0; - - hw = clk_hw_register_fixed_rate(NULL, "pll2", "xtali", 0, clk_pll2_rate); - clk_hw_register_clkdev(hw, NULL, "pll2"); - - /* Initialize the pll2 derived clocks */ - /* - * These four bits set the divide ratio between the PLL2 - * output and the USB clock. - * 0000 - Divide by 1 - * 0001 - Divide by 2 - * 0010 - Divide by 3 - * 0011 - Divide by 4 - * 0100 - Divide by 5 - * 0101 - Divide by 6 - * 0110 - Divide by 7 - * 0111 - Divide by 8 - * 1000 - Divide by 9 - * 1001 - Divide by 10 - * 1010 - Divide by 11 - * 1011 - Divide by 12 - * 1100 - Divide by 13 - * 1101 - Divide by 14 - * 1110 - Divide by 15 - * 1111 - Divide by 1 - * On power-on-reset these bits are reset to 0000b. - */ - clk_usb_div = (((value >> 28) & 0xf) + 1); - hw = clk_hw_register_fixed_factor(NULL, "usb_clk", "pll2", 0, 1, clk_usb_div); - hw = clk_hw_register_gate(NULL, "ohci-platform", - "usb_clk", 0, - EP93XX_SYSCON_PWRCNT, - EP93XX_SYSCON_PWRCNT_USH_EN, - 0, - &clk_lock); - clk_hw_register_clkdev(hw, NULL, "ohci-platform"); - - /* - * EP93xx SSP clock rate was doubled in version E2. For more information - * see: - * http://www.cirrus.com/en/pubs/appNote/AN273REV4.pdf - */ - clk_spi_div = 1; - if (ep93xx_chip_revision() < EP93XX_CHIP_REV_E2) - clk_spi_div = 2; - hw = clk_hw_register_fixed_factor(NULL, "ep93xx-spi.0", "xtali", 0, 1, clk_spi_div); - clk_hw_register_clkdev(hw, NULL, "ep93xx-spi.0"); - - /* pwm clock */ - hw = clk_hw_register_fixed_factor(NULL, "pwm_clk", "xtali", 0, 1, 1); - clk_hw_register_clkdev(hw, "pwm_clk", NULL); - - pr_info("PLL1 running at %ld MHz, PLL2 at %ld MHz\n", - clk_pll1_rate / 1000000, clk_pll2_rate / 1000000); - pr_info("FCLK %ld MHz, HCLK %ld MHz, PCLK %ld MHz\n", - clk_f_rate / 1000000, clk_h_rate / 1000000, - clk_p_rate / 1000000); - - ep93xx_uart_clock_init(); - - /* touchscreen/adc clock */ - hw = clk_hw_register_div("ep93xx-adc", - "xtali", - EP93XX_SYSCON_KEYTCHCLKDIV, - EP93XX_SYSCON_KEYTCHCLKDIV_TSEN, - EP93XX_SYSCON_KEYTCHCLKDIV_ADIV, - 1, - adc_divisors, - ARRAY_SIZE(adc_divisors)); - - clk_hw_register_clkdev(hw, NULL, "ep93xx-adc"); - - /* keypad clock */ - hw = clk_hw_register_div("ep93xx-keypad", - "xtali", - EP93XX_SYSCON_KEYTCHCLKDIV, - EP93XX_SYSCON_KEYTCHCLKDIV_KEN, - EP93XX_SYSCON_KEYTCHCLKDIV_KDIV, - 1, - adc_divisors, - ARRAY_SIZE(adc_divisors)); - - clk_hw_register_clkdev(hw, NULL, "ep93xx-keypad"); - - /* On reset PDIV and VDIV is set to zero, while PDIV zero - * means clock disable, VDIV shouldn't be zero. - * So i set both dividers to minimum. - */ - /* ENA - Enable CLK divider. */ - /* PDIV - 00 - Disable clock */ - /* VDIV - at least 2 */ - /* Check and enable video clk registers */ - value = __raw_readl(EP93XX_SYSCON_VIDCLKDIV); - value |= (1 << EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) | 2; - ep93xx_syscon_swlocked_write(value, EP93XX_SYSCON_VIDCLKDIV); - - /* check and enable i2s clk registers */ - value = __raw_readl(EP93XX_SYSCON_I2SCLKDIV); - value |= (1 << EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) | 2; - ep93xx_syscon_swlocked_write(value, EP93XX_SYSCON_I2SCLKDIV); - - /* video clk */ - hw = clk_hw_register_ddiv("ep93xx-fb", - EP93XX_SYSCON_VIDCLKDIV, - EP93XX_SYSCON_CLKDIV_ENABLE); - - clk_hw_register_clkdev(hw, NULL, "ep93xx-fb"); - - /* i2s clk */ - hw = clk_hw_register_ddiv("mclk", - EP93XX_SYSCON_I2SCLKDIV, - EP93XX_SYSCON_CLKDIV_ENABLE); - - clk_hw_register_clkdev(hw, "mclk", "ep93xx-i2s"); - - /* i2s sclk */ -#define EP93XX_I2SCLKDIV_SDIV_SHIFT 16 -#define EP93XX_I2SCLKDIV_SDIV_WIDTH 1 - hw = clk_hw_register_div("sclk", - "mclk", - EP93XX_SYSCON_I2SCLKDIV, - EP93XX_SYSCON_I2SCLKDIV_SENA, - EP93XX_I2SCLKDIV_SDIV_SHIFT, - EP93XX_I2SCLKDIV_SDIV_WIDTH, - sclk_divisors, - ARRAY_SIZE(sclk_divisors)); - - clk_hw_register_clkdev(hw, "sclk", "ep93xx-i2s"); - - /* i2s lrclk */ -#define EP93XX_I2SCLKDIV_LRDIV32_SHIFT 17 -#define EP93XX_I2SCLKDIV_LRDIV32_WIDTH 3 - hw = clk_hw_register_div("lrclk", - "sclk", - EP93XX_SYSCON_I2SCLKDIV, - EP93XX_SYSCON_I2SCLKDIV_SENA, - EP93XX_I2SCLKDIV_LRDIV32_SHIFT, - EP93XX_I2SCLKDIV_LRDIV32_WIDTH, - lrclk_divisors, - ARRAY_SIZE(lrclk_divisors)); - - clk_hw_register_clkdev(hw, "lrclk", "ep93xx-i2s"); - - return 0; -} -postcore_initcall(ep93xx_clock_init); diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c deleted file mode 100644 index 9c6154bb37b5..000000000000 --- a/arch/arm/mach-ep93xx/core.c +++ /dev/null @@ -1,955 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/arm/mach-ep93xx/core.c - * Core routines for Cirrus EP93xx chips. - * - * Copyright (C) 2006 Lennert Buytenhek - * Copyright (C) 2007 Herbert Valerio Riedel - * - * Thanks go to Michael Burian and Ray Lehtiniemi for their key - * role in the ep93xx linux community. - */ - -#define pr_fmt(fmt) "ep93xx " KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "hardware.h" -#include -#include -#include -#include - -#include "gpio-ep93xx.h" - -#include -#include - -#include "soc.h" -#include "irqs.h" - -/************************************************************************* - * Static I/O mappings that are needed for all EP93xx platforms - *************************************************************************/ -static struct map_desc ep93xx_io_desc[] __initdata = { - { - .virtual = EP93XX_AHB_VIRT_BASE, - .pfn = __phys_to_pfn(EP93XX_AHB_PHYS_BASE), - .length = EP93XX_AHB_SIZE, - .type = MT_DEVICE, - }, { - .virtual = EP93XX_APB_VIRT_BASE, - .pfn = __phys_to_pfn(EP93XX_APB_PHYS_BASE), - .length = EP93XX_APB_SIZE, - .type = MT_DEVICE, - }, -}; - -void __init ep93xx_map_io(void) -{ - iotable_init(ep93xx_io_desc, ARRAY_SIZE(ep93xx_io_desc)); -} - -/************************************************************************* - * EP93xx IRQ handling - *************************************************************************/ -void __init ep93xx_init_irq(void) -{ - vic_init(EP93XX_VIC1_BASE, IRQ_EP93XX_VIC0, EP93XX_VIC1_VALID_IRQ_MASK, 0); - vic_init(EP93XX_VIC2_BASE, IRQ_EP93XX_VIC1, EP93XX_VIC2_VALID_IRQ_MASK, 0); -} - - -/************************************************************************* - * EP93xx System Controller Software Locked register handling - *************************************************************************/ - -/* - * syscon_swlock prevents anything else from writing to the syscon - * block while a software locked register is being written. - */ -static DEFINE_SPINLOCK(syscon_swlock); - -void ep93xx_syscon_swlocked_write(unsigned int val, void __iomem *reg) -{ - unsigned long flags; - - spin_lock_irqsave(&syscon_swlock, flags); - - __raw_writel(0xaa, EP93XX_SYSCON_SWLOCK); - __raw_writel(val, reg); - - spin_unlock_irqrestore(&syscon_swlock, flags); -} - -void ep93xx_devcfg_set_clear(unsigned int set_bits, unsigned int clear_bits) -{ - unsigned long flags; - unsigned int val; - - spin_lock_irqsave(&syscon_swlock, flags); - - val = __raw_readl(EP93XX_SYSCON_DEVCFG); - val &= ~clear_bits; - val |= set_bits; - __raw_writel(0xaa, EP93XX_SYSCON_SWLOCK); - __raw_writel(val, EP93XX_SYSCON_DEVCFG); - - spin_unlock_irqrestore(&syscon_swlock, flags); -} - -/** - * ep93xx_chip_revision() - returns the EP93xx chip revision - * - * See "platform.h" for more information. - */ -unsigned int ep93xx_chip_revision(void) -{ - unsigned int v; - - v = __raw_readl(EP93XX_SYSCON_SYSCFG); - v &= EP93XX_SYSCON_SYSCFG_REV_MASK; - v >>= EP93XX_SYSCON_SYSCFG_REV_SHIFT; - return v; -} -EXPORT_SYMBOL_GPL(ep93xx_chip_revision); - -/************************************************************************* - * EP93xx GPIO - *************************************************************************/ -/* port A */ -static struct resource ep93xx_a_gpio_resources[] = { - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE, 0x04, "data"), - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x10, 0x04, "dir"), - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x90, 0x1c, "intr"), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO_AB), -}; - -static struct platform_device ep93xx_a_gpio = { - .name = "gpio-ep93xx", - .id = 0, - .num_resources = ARRAY_SIZE(ep93xx_a_gpio_resources), - .resource = ep93xx_a_gpio_resources, -}; - -/* port B */ -static struct resource ep93xx_b_gpio_resources[] = { - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x04, 0x04, "data"), - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x14, 0x04, "dir"), - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0xac, 0x1c, "intr"), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO_AB), -}; - -static struct platform_device ep93xx_b_gpio = { - .name = "gpio-ep93xx", - .id = 1, - .num_resources = ARRAY_SIZE(ep93xx_b_gpio_resources), - .resource = ep93xx_b_gpio_resources, -}; - -/* port C */ -static struct resource ep93xx_c_gpio_resources[] = { - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x08, 0x04, "data"), - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x18, 0x04, "dir"), -}; - -static struct platform_device ep93xx_c_gpio = { - .name = "gpio-ep93xx", - .id = 2, - .num_resources = ARRAY_SIZE(ep93xx_c_gpio_resources), - .resource = ep93xx_c_gpio_resources, -}; - -/* port D */ -static struct resource ep93xx_d_gpio_resources[] = { - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x0c, 0x04, "data"), - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x1c, 0x04, "dir"), -}; - -static struct platform_device ep93xx_d_gpio = { - .name = "gpio-ep93xx", - .id = 3, - .num_resources = ARRAY_SIZE(ep93xx_d_gpio_resources), - .resource = ep93xx_d_gpio_resources, -}; - -/* port E */ -static struct resource ep93xx_e_gpio_resources[] = { - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x20, 0x04, "data"), - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x24, 0x04, "dir"), -}; - -static struct platform_device ep93xx_e_gpio = { - .name = "gpio-ep93xx", - .id = 4, - .num_resources = ARRAY_SIZE(ep93xx_e_gpio_resources), - .resource = ep93xx_e_gpio_resources, -}; - -/* port F */ -static struct resource ep93xx_f_gpio_resources[] = { - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x30, 0x04, "data"), - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x34, 0x04, "dir"), - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x4c, 0x1c, "intr"), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO0MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO1MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO2MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO3MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO4MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO5MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO6MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO7MUX), -}; - -static struct platform_device ep93xx_f_gpio = { - .name = "gpio-ep93xx", - .id = 5, - .num_resources = ARRAY_SIZE(ep93xx_f_gpio_resources), - .resource = ep93xx_f_gpio_resources, -}; - -/* port G */ -static struct resource ep93xx_g_gpio_resources[] = { - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x38, 0x04, "data"), - DEFINE_RES_MEM_NAMED(EP93XX_GPIO_PHYS_BASE + 0x3c, 0x04, "dir"), -}; - -static struct platform_device ep93xx_g_gpio = { - .name = "gpio-ep93xx", - .id = 6, - .num_resources = ARRAY_SIZE(ep93xx_g_gpio_resources), - .resource = ep93xx_g_gpio_resources, -}; - -static struct platform_device *ep93xx_gpio_device[] __initdata = { - &ep93xx_a_gpio, - &ep93xx_b_gpio, - &ep93xx_c_gpio, - &ep93xx_d_gpio, - &ep93xx_e_gpio, - &ep93xx_f_gpio, - &ep93xx_g_gpio, -}; - -/************************************************************************* - * EP93xx peripheral handling - *************************************************************************/ -#define EP93XX_UART_MCR_OFFSET (0x0100) - -static void ep93xx_uart_set_mctrl(struct amba_device *dev, - void __iomem *base, unsigned int mctrl) -{ - unsigned int mcr; - - mcr = 0; - if (mctrl & TIOCM_RTS) - mcr |= 2; - if (mctrl & TIOCM_DTR) - mcr |= 1; - - __raw_writel(mcr, base + EP93XX_UART_MCR_OFFSET); -} - -static struct amba_pl010_data ep93xx_uart_data = { - .set_mctrl = ep93xx_uart_set_mctrl, -}; - -static AMBA_APB_DEVICE(uart1, "apb:uart1", 0x00041010, EP93XX_UART1_PHYS_BASE, - { IRQ_EP93XX_UART1 }, &ep93xx_uart_data); - -static AMBA_APB_DEVICE(uart2, "apb:uart2", 0x00041010, EP93XX_UART2_PHYS_BASE, - { IRQ_EP93XX_UART2 }, NULL); - -static AMBA_APB_DEVICE(uart3, "apb:uart3", 0x00041010, EP93XX_UART3_PHYS_BASE, - { IRQ_EP93XX_UART3 }, &ep93xx_uart_data); - -static struct resource ep93xx_rtc_resource[] = { - DEFINE_RES_MEM(EP93XX_RTC_PHYS_BASE, 0x10c), -}; - -static struct platform_device ep93xx_rtc_device = { - .name = "ep93xx-rtc", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_rtc_resource), - .resource = ep93xx_rtc_resource, -}; - -/************************************************************************* - * EP93xx OHCI USB Host - *************************************************************************/ - -static struct clk *ep93xx_ohci_host_clock; - -static int ep93xx_ohci_power_on(struct platform_device *pdev) -{ - if (!ep93xx_ohci_host_clock) { - ep93xx_ohci_host_clock = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(ep93xx_ohci_host_clock)) - return PTR_ERR(ep93xx_ohci_host_clock); - } - - return clk_prepare_enable(ep93xx_ohci_host_clock); -} - -static void ep93xx_ohci_power_off(struct platform_device *pdev) -{ - clk_disable(ep93xx_ohci_host_clock); -} - -static struct usb_ohci_pdata ep93xx_ohci_pdata = { - .power_on = ep93xx_ohci_power_on, - .power_off = ep93xx_ohci_power_off, - .power_suspend = ep93xx_ohci_power_off, -}; - -static struct resource ep93xx_ohci_resources[] = { - DEFINE_RES_MEM(EP93XX_USB_PHYS_BASE, 0x1000), - DEFINE_RES_IRQ(IRQ_EP93XX_USB), -}; - -static u64 ep93xx_ohci_dma_mask = DMA_BIT_MASK(32); - -static struct platform_device ep93xx_ohci_device = { - .name = "ohci-platform", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_ohci_resources), - .resource = ep93xx_ohci_resources, - .dev = { - .dma_mask = &ep93xx_ohci_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &ep93xx_ohci_pdata, - }, -}; - -/************************************************************************* - * EP93xx physmap'ed flash - *************************************************************************/ -static struct physmap_flash_data ep93xx_flash_data; - -static struct resource ep93xx_flash_resource = { - .flags = IORESOURCE_MEM, -}; - -static struct platform_device ep93xx_flash = { - .name = "physmap-flash", - .id = 0, - .dev = { - .platform_data = &ep93xx_flash_data, - }, - .num_resources = 1, - .resource = &ep93xx_flash_resource, -}; - -/** - * ep93xx_register_flash() - Register the external flash device. - * @width: bank width in octets - * @start: resource start address - * @size: resource size - */ -void __init ep93xx_register_flash(unsigned int width, - resource_size_t start, resource_size_t size) -{ - ep93xx_flash_data.width = width; - - ep93xx_flash_resource.start = start; - ep93xx_flash_resource.end = start + size - 1; - - platform_device_register(&ep93xx_flash); -} - - -/************************************************************************* - * EP93xx ethernet peripheral handling - *************************************************************************/ -static struct ep93xx_eth_data ep93xx_eth_data; - -static struct resource ep93xx_eth_resource[] = { - DEFINE_RES_MEM(EP93XX_ETHERNET_PHYS_BASE, 0x10000), - DEFINE_RES_IRQ(IRQ_EP93XX_ETHERNET), -}; - -static u64 ep93xx_eth_dma_mask = DMA_BIT_MASK(32); - -static struct platform_device ep93xx_eth_device = { - .name = "ep93xx-eth", - .id = -1, - .dev = { - .platform_data = &ep93xx_eth_data, - .coherent_dma_mask = DMA_BIT_MASK(32), - .dma_mask = &ep93xx_eth_dma_mask, - }, - .num_resources = ARRAY_SIZE(ep93xx_eth_resource), - .resource = ep93xx_eth_resource, -}; - -/** - * ep93xx_register_eth - Register the built-in ethernet platform device. - * @data: platform specific ethernet configuration (__initdata) - * @copy_addr: flag indicating that the MAC address should be copied - * from the IndAd registers (as programmed by the bootloader) - */ -void __init ep93xx_register_eth(struct ep93xx_eth_data *data, int copy_addr) -{ - if (copy_addr) - memcpy_fromio(data->dev_addr, EP93XX_ETHERNET_BASE + 0x50, 6); - - ep93xx_eth_data = *data; - platform_device_register(&ep93xx_eth_device); -} - - -/************************************************************************* - * EP93xx i2c peripheral handling - *************************************************************************/ - -/* All EP93xx devices use the same two GPIO pins for I2C bit-banging */ -static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = { - .dev_id = "i2c-gpio.0", - .table = { - /* Use local offsets on gpiochip/port "G" */ - GPIO_LOOKUP_IDX("gpio-ep93xx.6", 1, NULL, 0, - GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), - GPIO_LOOKUP_IDX("gpio-ep93xx.6", 0, NULL, 1, - GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), - { } - }, -}; - -static struct platform_device ep93xx_i2c_device = { - .name = "i2c-gpio", - .id = 0, - .dev = { - .platform_data = NULL, - }, -}; - -/** - * ep93xx_register_i2c - Register the i2c platform device. - * @devices: platform specific i2c bus device information (__initdata) - * @num: the number of devices on the i2c bus - */ -void __init ep93xx_register_i2c(struct i2c_board_info *devices, int num) -{ - /* - * FIXME: this just sets the two pins as non-opendrain, as no - * platforms tries to do that anyway. Flag the applicable lines - * as open drain in the GPIO_LOOKUP above and the driver or - * gpiolib will handle open drain/open drain emulation as need - * be. Right now i2c-gpio emulates open drain which is not - * optimal. - */ - __raw_writel((0 << 1) | (0 << 0), - EP93XX_GPIO_EEDRIVE); - - i2c_register_board_info(0, devices, num); - gpiod_add_lookup_table(&ep93xx_i2c_gpiod_table); - platform_device_register(&ep93xx_i2c_device); -} - -/************************************************************************* - * EP93xx SPI peripheral handling - *************************************************************************/ -static struct ep93xx_spi_info ep93xx_spi_master_data; - -static struct resource ep93xx_spi_resources[] = { - DEFINE_RES_MEM(EP93XX_SPI_PHYS_BASE, 0x18), - DEFINE_RES_IRQ(IRQ_EP93XX_SSP), -}; - -static u64 ep93xx_spi_dma_mask = DMA_BIT_MASK(32); - -static struct platform_device ep93xx_spi_device = { - .name = "ep93xx-spi", - .id = 0, - .dev = { - .platform_data = &ep93xx_spi_master_data, - .coherent_dma_mask = DMA_BIT_MASK(32), - .dma_mask = &ep93xx_spi_dma_mask, - }, - .num_resources = ARRAY_SIZE(ep93xx_spi_resources), - .resource = ep93xx_spi_resources, -}; - -/** - * ep93xx_register_spi() - registers spi platform device - * @info: ep93xx board specific spi master info (__initdata) - * @devices: SPI devices to register (__initdata) - * @num: number of SPI devices to register - * - * This function registers platform device for the EP93xx SPI controller and - * also makes sure that SPI pins are muxed so that I2S is not using those pins. - */ -void __init ep93xx_register_spi(struct ep93xx_spi_info *info, - struct spi_board_info *devices, int num) -{ - /* - * When SPI is used, we need to make sure that I2S is muxed off from - * SPI pins. - */ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_I2SONSSP); - - ep93xx_spi_master_data = *info; - spi_register_board_info(devices, num); - platform_device_register(&ep93xx_spi_device); -} - -/************************************************************************* - * EP93xx LEDs - *************************************************************************/ -static const struct gpio_led ep93xx_led_pins[] __initconst = { - { - .name = "platform:grled", - }, { - .name = "platform:rdled", - }, -}; - -static const struct gpio_led_platform_data ep93xx_led_data __initconst = { - .num_leds = ARRAY_SIZE(ep93xx_led_pins), - .leds = ep93xx_led_pins, -}; - -static struct gpiod_lookup_table ep93xx_leds_gpio_table = { - .dev_id = "leds-gpio", - .table = { - /* Use local offsets on gpiochip/port "E" */ - GPIO_LOOKUP_IDX("gpio-ep93xx.4", 0, NULL, 0, GPIO_ACTIVE_HIGH), - GPIO_LOOKUP_IDX("gpio-ep93xx.4", 1, NULL, 1, GPIO_ACTIVE_HIGH), - { } - }, -}; - -/************************************************************************* - * EP93xx pwm peripheral handling - *************************************************************************/ -static struct resource ep93xx_pwm0_resource[] = { - DEFINE_RES_MEM(EP93XX_PWM_PHYS_BASE, 0x10), -}; - -static struct platform_device ep93xx_pwm0_device = { - .name = "ep93xx-pwm", - .id = 0, - .num_resources = ARRAY_SIZE(ep93xx_pwm0_resource), - .resource = ep93xx_pwm0_resource, -}; - -static struct resource ep93xx_pwm1_resource[] = { - DEFINE_RES_MEM(EP93XX_PWM_PHYS_BASE + 0x20, 0x10), -}; - -static struct platform_device ep93xx_pwm1_device = { - .name = "ep93xx-pwm", - .id = 1, - .num_resources = ARRAY_SIZE(ep93xx_pwm1_resource), - .resource = ep93xx_pwm1_resource, -}; - -void __init ep93xx_register_pwm(int pwm0, int pwm1) -{ - if (pwm0) - platform_device_register(&ep93xx_pwm0_device); - - /* NOTE: EP9307 does not have PWMOUT1 (pin EGPIO14) */ - if (pwm1) - platform_device_register(&ep93xx_pwm1_device); -} - -/************************************************************************* - * EP93xx video peripheral handling - *************************************************************************/ -static struct ep93xxfb_mach_info ep93xxfb_data; - -static struct resource ep93xx_fb_resource[] = { - DEFINE_RES_MEM(EP93XX_RASTER_PHYS_BASE, 0x800), -}; - -static struct platform_device ep93xx_fb_device = { - .name = "ep93xx-fb", - .id = -1, - .dev = { - .platform_data = &ep93xxfb_data, - .coherent_dma_mask = DMA_BIT_MASK(32), - .dma_mask = &ep93xx_fb_device.dev.coherent_dma_mask, - }, - .num_resources = ARRAY_SIZE(ep93xx_fb_resource), - .resource = ep93xx_fb_resource, -}; - -/* The backlight use a single register in the framebuffer's register space */ -#define EP93XX_RASTER_REG_BRIGHTNESS 0x20 - -static struct resource ep93xx_bl_resources[] = { - DEFINE_RES_MEM(EP93XX_RASTER_PHYS_BASE + - EP93XX_RASTER_REG_BRIGHTNESS, 0x04), -}; - -static struct platform_device ep93xx_bl_device = { - .name = "ep93xx-bl", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_bl_resources), - .resource = ep93xx_bl_resources, -}; - -/** - * ep93xx_register_fb - Register the framebuffer platform device. - * @data: platform specific framebuffer configuration (__initdata) - */ -void __init ep93xx_register_fb(struct ep93xxfb_mach_info *data) -{ - ep93xxfb_data = *data; - platform_device_register(&ep93xx_fb_device); - platform_device_register(&ep93xx_bl_device); -} - - -/************************************************************************* - * EP93xx matrix keypad peripheral handling - *************************************************************************/ -static struct ep93xx_keypad_platform_data ep93xx_keypad_data; - -static struct resource ep93xx_keypad_resource[] = { - DEFINE_RES_MEM(EP93XX_KEY_MATRIX_PHYS_BASE, 0x0c), - DEFINE_RES_IRQ(IRQ_EP93XX_KEY), -}; - -static struct platform_device ep93xx_keypad_device = { - .name = "ep93xx-keypad", - .id = -1, - .dev = { - .platform_data = &ep93xx_keypad_data, - }, - .num_resources = ARRAY_SIZE(ep93xx_keypad_resource), - .resource = ep93xx_keypad_resource, -}; - -/** - * ep93xx_register_keypad - Register the keypad platform device. - * @data: platform specific keypad configuration (__initdata) - */ -void __init ep93xx_register_keypad(struct ep93xx_keypad_platform_data *data) -{ - ep93xx_keypad_data = *data; - platform_device_register(&ep93xx_keypad_device); -} - -/************************************************************************* - * EP93xx I2S audio peripheral handling - *************************************************************************/ -static struct resource ep93xx_i2s_resource[] = { - DEFINE_RES_MEM(EP93XX_I2S_PHYS_BASE, 0x100), - DEFINE_RES_IRQ(IRQ_EP93XX_SAI), -}; - -static struct platform_device ep93xx_i2s_device = { - .name = "ep93xx-i2s", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_i2s_resource), - .resource = ep93xx_i2s_resource, -}; - -static struct platform_device ep93xx_pcm_device = { - .name = "ep93xx-pcm-audio", - .id = -1, -}; - -void __init ep93xx_register_i2s(void) -{ - platform_device_register(&ep93xx_i2s_device); - platform_device_register(&ep93xx_pcm_device); -} - -#define EP93XX_SYSCON_DEVCFG_I2S_MASK (EP93XX_SYSCON_DEVCFG_I2SONSSP | \ - EP93XX_SYSCON_DEVCFG_I2SONAC97) - -#define EP93XX_I2SCLKDIV_MASK (EP93XX_SYSCON_I2SCLKDIV_ORIDE | \ - EP93XX_SYSCON_I2SCLKDIV_SPOL) - -int ep93xx_i2s_acquire(void) -{ - unsigned val; - - ep93xx_devcfg_set_clear(EP93XX_SYSCON_DEVCFG_I2SONAC97, - EP93XX_SYSCON_DEVCFG_I2S_MASK); - - /* - * This is potentially racy with the clock api for i2s_mclk, sclk and - * lrclk. Since the i2s driver is the only user of those clocks we - * rely on it to prevent parallel use of this function and the - * clock api for the i2s clocks. - */ - val = __raw_readl(EP93XX_SYSCON_I2SCLKDIV); - val &= ~EP93XX_I2SCLKDIV_MASK; - val |= EP93XX_SYSCON_I2SCLKDIV_ORIDE | EP93XX_SYSCON_I2SCLKDIV_SPOL; - ep93xx_syscon_swlocked_write(val, EP93XX_SYSCON_I2SCLKDIV); - - return 0; -} -EXPORT_SYMBOL(ep93xx_i2s_acquire); - -void ep93xx_i2s_release(void) -{ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_I2S_MASK); -} -EXPORT_SYMBOL(ep93xx_i2s_release); - -/************************************************************************* - * EP93xx AC97 audio peripheral handling - *************************************************************************/ -static struct resource ep93xx_ac97_resources[] = { - DEFINE_RES_MEM(EP93XX_AAC_PHYS_BASE, 0xac), - DEFINE_RES_IRQ(IRQ_EP93XX_AACINTR), -}; - -static struct platform_device ep93xx_ac97_device = { - .name = "ep93xx-ac97", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_ac97_resources), - .resource = ep93xx_ac97_resources, -}; - -void __init ep93xx_register_ac97(void) -{ - /* - * Make sure that the AC97 pins are not used by I2S. - */ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_I2SONAC97); - - platform_device_register(&ep93xx_ac97_device); - platform_device_register(&ep93xx_pcm_device); -} - -/************************************************************************* - * EP93xx Watchdog - *************************************************************************/ -static struct resource ep93xx_wdt_resources[] = { - DEFINE_RES_MEM(EP93XX_WATCHDOG_PHYS_BASE, 0x08), -}; - -static struct platform_device ep93xx_wdt_device = { - .name = "ep93xx-wdt", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_wdt_resources), - .resource = ep93xx_wdt_resources, -}; - -/************************************************************************* - * EP93xx IDE - *************************************************************************/ -static struct resource ep93xx_ide_resources[] = { - DEFINE_RES_MEM(EP93XX_IDE_PHYS_BASE, 0x38), - DEFINE_RES_IRQ(IRQ_EP93XX_EXT3), -}; - -static struct platform_device ep93xx_ide_device = { - .name = "ep93xx-ide", - .id = -1, - .dev = { - .dma_mask = &ep93xx_ide_device.dev.coherent_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, - .num_resources = ARRAY_SIZE(ep93xx_ide_resources), - .resource = ep93xx_ide_resources, -}; - -void __init ep93xx_register_ide(void) -{ - platform_device_register(&ep93xx_ide_device); -} - -/************************************************************************* - * EP93xx ADC - *************************************************************************/ -static struct resource ep93xx_adc_resources[] = { - DEFINE_RES_MEM(EP93XX_ADC_PHYS_BASE, 0x28), - DEFINE_RES_IRQ(IRQ_EP93XX_TOUCH), -}; - -static struct platform_device ep93xx_adc_device = { - .name = "ep93xx-adc", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_adc_resources), - .resource = ep93xx_adc_resources, -}; - -void __init ep93xx_register_adc(void) -{ - /* Power up ADC, deactivate Touch Screen Controller */ - ep93xx_devcfg_set_clear(EP93XX_SYSCON_DEVCFG_TIN, - EP93XX_SYSCON_DEVCFG_ADCPD); - - platform_device_register(&ep93xx_adc_device); -} - -/************************************************************************* - * EP93xx Security peripheral - *************************************************************************/ - -/* - * The Maverick Key is 256 bits of micro fuses blown at the factory during - * manufacturing to uniquely identify a part. - * - * See: http://arm.cirrus.com/forum/viewtopic.php?t=486&highlight=maverick+key - */ -#define EP93XX_SECURITY_REG(x) (EP93XX_SECURITY_BASE + (x)) -#define EP93XX_SECURITY_SECFLG EP93XX_SECURITY_REG(0x2400) -#define EP93XX_SECURITY_FUSEFLG EP93XX_SECURITY_REG(0x2410) -#define EP93XX_SECURITY_UNIQID EP93XX_SECURITY_REG(0x2440) -#define EP93XX_SECURITY_UNIQCHK EP93XX_SECURITY_REG(0x2450) -#define EP93XX_SECURITY_UNIQVAL EP93XX_SECURITY_REG(0x2460) -#define EP93XX_SECURITY_SECID1 EP93XX_SECURITY_REG(0x2500) -#define EP93XX_SECURITY_SECID2 EP93XX_SECURITY_REG(0x2504) -#define EP93XX_SECURITY_SECCHK1 EP93XX_SECURITY_REG(0x2520) -#define EP93XX_SECURITY_SECCHK2 EP93XX_SECURITY_REG(0x2524) -#define EP93XX_SECURITY_UNIQID2 EP93XX_SECURITY_REG(0x2700) -#define EP93XX_SECURITY_UNIQID3 EP93XX_SECURITY_REG(0x2704) -#define EP93XX_SECURITY_UNIQID4 EP93XX_SECURITY_REG(0x2708) -#define EP93XX_SECURITY_UNIQID5 EP93XX_SECURITY_REG(0x270c) - -static char ep93xx_soc_id[33]; - -static const char __init *ep93xx_get_soc_id(void) -{ - unsigned int id, id2, id3, id4, id5; - - if (__raw_readl(EP93XX_SECURITY_UNIQVAL) != 1) - return "bad Hamming code"; - - id = __raw_readl(EP93XX_SECURITY_UNIQID); - id2 = __raw_readl(EP93XX_SECURITY_UNIQID2); - id3 = __raw_readl(EP93XX_SECURITY_UNIQID3); - id4 = __raw_readl(EP93XX_SECURITY_UNIQID4); - id5 = __raw_readl(EP93XX_SECURITY_UNIQID5); - - if (id != id2) - return "invalid"; - - /* Toss the unique ID into the entropy pool */ - add_device_randomness(&id2, 4); - add_device_randomness(&id3, 4); - add_device_randomness(&id4, 4); - add_device_randomness(&id5, 4); - - snprintf(ep93xx_soc_id, sizeof(ep93xx_soc_id), - "%08x%08x%08x%08x", id2, id3, id4, id5); - - return ep93xx_soc_id; -} - -static const char __init *ep93xx_get_soc_rev(void) -{ - int rev = ep93xx_chip_revision(); - - switch (rev) { - case EP93XX_CHIP_REV_D0: - return "D0"; - case EP93XX_CHIP_REV_D1: - return "D1"; - case EP93XX_CHIP_REV_E0: - return "E0"; - case EP93XX_CHIP_REV_E1: - return "E1"; - case EP93XX_CHIP_REV_E2: - return "E2"; - default: - return "unknown"; - } -} - -static const char __init *ep93xx_get_machine_name(void) -{ - return kasprintf(GFP_KERNEL,"%s", machine_desc->name); -} - -static struct device __init *ep93xx_init_soc(void) -{ - struct soc_device_attribute *soc_dev_attr; - struct soc_device *soc_dev; - - soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL); - if (!soc_dev_attr) - return NULL; - - soc_dev_attr->machine = ep93xx_get_machine_name(); - soc_dev_attr->family = "Cirrus Logic EP93xx"; - soc_dev_attr->revision = ep93xx_get_soc_rev(); - soc_dev_attr->soc_id = ep93xx_get_soc_id(); - - soc_dev = soc_device_register(soc_dev_attr); - if (IS_ERR(soc_dev)) { - kfree(soc_dev_attr->machine); - kfree(soc_dev_attr); - return NULL; - } - - return soc_device_to_device(soc_dev); -} - -struct device __init *ep93xx_init_devices(void) -{ - struct device *parent; - unsigned int i; - - /* Disallow access to MaverickCrunch initially */ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_CPENA); - - /* Default all ports to GPIO */ - ep93xx_devcfg_set_bits(EP93XX_SYSCON_DEVCFG_KEYS | - EP93XX_SYSCON_DEVCFG_GONK | - EP93XX_SYSCON_DEVCFG_EONIDE | - EP93XX_SYSCON_DEVCFG_GONIDE | - EP93XX_SYSCON_DEVCFG_HONIDE); - - parent = ep93xx_init_soc(); - - /* Get the GPIO working early, other devices need it */ - for (i = 0; i < ARRAY_SIZE(ep93xx_gpio_device); i++) - platform_device_register(ep93xx_gpio_device[i]); - - amba_device_register(&uart1_device, &iomem_resource); - amba_device_register(&uart2_device, &iomem_resource); - amba_device_register(&uart3_device, &iomem_resource); - - platform_device_register(&ep93xx_rtc_device); - platform_device_register(&ep93xx_ohci_device); - platform_device_register(&ep93xx_wdt_device); - - gpiod_add_lookup_table(&ep93xx_leds_gpio_table); - gpio_led_register_device(-1, &ep93xx_led_data); - - return parent; -} - -void ep93xx_restart(enum reboot_mode mode, const char *cmd) -{ - /* - * Set then clear the SWRST bit to initiate a software reset - */ - ep93xx_devcfg_set_bits(EP93XX_SYSCON_DEVCFG_SWRST); - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_SWRST); - - while (1) - ; -} diff --git a/arch/arm/mach-ep93xx/dma.c b/arch/arm/mach-ep93xx/dma.c deleted file mode 100644 index 74515acab8ef..000000000000 --- a/arch/arm/mach-ep93xx/dma.c +++ /dev/null @@ -1,114 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/arm/mach-ep93xx/dma.c - * - * Platform support code for the EP93xx dmaengine driver. - * - * Copyright (C) 2011 Mika Westerberg - * - * This work is based on the original dma-m2p implementation with - * following copyrights: - * - * Copyright (C) 2006 Lennert Buytenhek - * Copyright (C) 2006 Applied Data Systems - * Copyright (C) 2009 Ryan Mallon - */ - -#include -#include -#include -#include -#include -#include - -#include -#include "hardware.h" - -#include "soc.h" - -#define DMA_CHANNEL(_name, _base, _irq) \ - { .name = (_name), .base = (_base), .irq = (_irq) } - -/* - * DMA M2P channels. - * - * On the EP93xx chip the following peripherals my be allocated to the 10 - * Memory to Internal Peripheral (M2P) channels (5 transmit + 5 receive). - * - * I2S contains 3 Tx and 3 Rx DMA Channels - * AAC contains 3 Tx and 3 Rx DMA Channels - * UART1 contains 1 Tx and 1 Rx DMA Channels - * UART2 contains 1 Tx and 1 Rx DMA Channels - * UART3 contains 1 Tx and 1 Rx DMA Channels - * IrDA contains 1 Tx and 1 Rx DMA Channels - * - * Registers are mapped statically in ep93xx_map_io(). - */ -static struct ep93xx_dma_chan_data ep93xx_dma_m2p_channels[] = { - DMA_CHANNEL("m2p0", EP93XX_DMA_BASE + 0x0000, IRQ_EP93XX_DMAM2P0), - DMA_CHANNEL("m2p1", EP93XX_DMA_BASE + 0x0040, IRQ_EP93XX_DMAM2P1), - DMA_CHANNEL("m2p2", EP93XX_DMA_BASE + 0x0080, IRQ_EP93XX_DMAM2P2), - DMA_CHANNEL("m2p3", EP93XX_DMA_BASE + 0x00c0, IRQ_EP93XX_DMAM2P3), - DMA_CHANNEL("m2p4", EP93XX_DMA_BASE + 0x0240, IRQ_EP93XX_DMAM2P4), - DMA_CHANNEL("m2p5", EP93XX_DMA_BASE + 0x0200, IRQ_EP93XX_DMAM2P5), - DMA_CHANNEL("m2p6", EP93XX_DMA_BASE + 0x02c0, IRQ_EP93XX_DMAM2P6), - DMA_CHANNEL("m2p7", EP93XX_DMA_BASE + 0x0280, IRQ_EP93XX_DMAM2P7), - DMA_CHANNEL("m2p8", EP93XX_DMA_BASE + 0x0340, IRQ_EP93XX_DMAM2P8), - DMA_CHANNEL("m2p9", EP93XX_DMA_BASE + 0x0300, IRQ_EP93XX_DMAM2P9), -}; - -static struct ep93xx_dma_platform_data ep93xx_dma_m2p_data = { - .channels = ep93xx_dma_m2p_channels, - .num_channels = ARRAY_SIZE(ep93xx_dma_m2p_channels), -}; - -static u64 ep93xx_dma_m2p_mask = DMA_BIT_MASK(32); - -static struct platform_device ep93xx_dma_m2p_device = { - .name = "ep93xx-dma-m2p", - .id = -1, - .dev = { - .platform_data = &ep93xx_dma_m2p_data, - .dma_mask = &ep93xx_dma_m2p_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, -}; - -/* - * DMA M2M channels. - * - * There are 2 M2M channels which support memcpy/memset and in addition simple - * hardware requests from/to SSP and IDE. We do not implement an external - * hardware requests. - * - * Registers are mapped statically in ep93xx_map_io(). - */ -static struct ep93xx_dma_chan_data ep93xx_dma_m2m_channels[] = { - DMA_CHANNEL("m2m0", EP93XX_DMA_BASE + 0x0100, IRQ_EP93XX_DMAM2M0), - DMA_CHANNEL("m2m1", EP93XX_DMA_BASE + 0x0140, IRQ_EP93XX_DMAM2M1), -}; - -static struct ep93xx_dma_platform_data ep93xx_dma_m2m_data = { - .channels = ep93xx_dma_m2m_channels, - .num_channels = ARRAY_SIZE(ep93xx_dma_m2m_channels), -}; - -static u64 ep93xx_dma_m2m_mask = DMA_BIT_MASK(32); - -static struct platform_device ep93xx_dma_m2m_device = { - .name = "ep93xx-dma-m2m", - .id = -1, - .dev = { - .platform_data = &ep93xx_dma_m2m_data, - .dma_mask = &ep93xx_dma_m2m_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, -}; - -static int __init ep93xx_dma_init(void) -{ - platform_device_register(&ep93xx_dma_m2p_device); - platform_device_register(&ep93xx_dma_m2m_device); - return 0; -} -arch_initcall(ep93xx_dma_init); diff --git a/arch/arm/mach-ep93xx/edb93xx.c b/arch/arm/mach-ep93xx/edb93xx.c deleted file mode 100644 index 356b0460c7ed..000000000000 --- a/arch/arm/mach-ep93xx/edb93xx.c +++ /dev/null @@ -1,368 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/arm/mach-ep93xx/edb93xx.c - * Cirrus Logic EDB93xx Development Board support. - * - * EDB93XX, EDB9301, EDB9307A - * Copyright (C) 2008-2009 H Hartley Sweeten - * - * EDB9302 - * Copyright (C) 2006 George Kashperko - * - * EDB9302A, EDB9315, EDB9315A - * Copyright (C) 2006 Lennert Buytenhek - * - * EDB9307 - * Copyright (C) 2007 Herbert Valerio Riedel - * - * EDB9312 - * Copyright (C) 2006 Infosys Technologies Limited - * Toufeeq Hussain - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include "hardware.h" -#include -#include -#include "gpio-ep93xx.h" - -#include -#include - -#include "soc.h" - -static void __init edb93xx_register_flash(void) -{ - if (machine_is_edb9307() || machine_is_edb9312() || - machine_is_edb9315()) { - ep93xx_register_flash(4, EP93XX_CS6_PHYS_BASE, SZ_32M); - } else { - ep93xx_register_flash(2, EP93XX_CS6_PHYS_BASE, SZ_16M); - } -} - -static struct ep93xx_eth_data __initdata edb93xx_eth_data = { - .phy_id = 1, -}; - - -/************************************************************************* - * EDB93xx i2c peripheral handling - *************************************************************************/ - -static struct i2c_board_info __initdata edb93xxa_i2c_board_info[] = { - { - I2C_BOARD_INFO("isl1208", 0x6f), - }, -}; - -static struct i2c_board_info __initdata edb93xx_i2c_board_info[] = { - { - I2C_BOARD_INFO("ds1337", 0x68), - }, -}; - -static void __init edb93xx_register_i2c(void) -{ - if (machine_is_edb9302a() || machine_is_edb9307a() || - machine_is_edb9315a()) { - ep93xx_register_i2c(edb93xxa_i2c_board_info, - ARRAY_SIZE(edb93xxa_i2c_board_info)); - } else if (machine_is_edb9302() || machine_is_edb9307() - || machine_is_edb9312() || machine_is_edb9315()) { - ep93xx_register_i2c(edb93xx_i2c_board_info, - ARRAY_SIZE(edb93xx_i2c_board_info)); - } -} - - -/************************************************************************* - * EDB93xx SPI peripheral handling - *************************************************************************/ -static struct cs4271_platform_data edb93xx_cs4271_data = { - /* Intentionally left blank */ -}; - -static struct spi_board_info edb93xx_spi_board_info[] __initdata = { - { - .modalias = "cs4271", - .platform_data = &edb93xx_cs4271_data, - .max_speed_hz = 6000000, - .bus_num = 0, - .chip_select = 0, - .mode = SPI_MODE_3, - }, -}; - -static struct gpiod_lookup_table edb93xx_spi_cs_gpio_table = { - .dev_id = "spi0", - .table = { - GPIO_LOOKUP("gpio-ep93xx.0", 6, "cs", GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct ep93xx_spi_info edb93xx_spi_info __initdata = { - /* Intentionally left blank */ -}; - -static struct gpiod_lookup_table edb93xx_cs4272_edb9301_gpio_table = { - .dev_id = "spi0.0", /* CS0 on SPI0 */ - .table = { - GPIO_LOOKUP("A", 1, "reset", GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct gpiod_lookup_table edb93xx_cs4272_edb9302_gpio_table = { - .dev_id = "spi0.0", /* CS0 on SPI0 */ - .table = { - GPIO_LOOKUP("H", 2, "reset", GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct gpiod_lookup_table edb93xx_cs4272_edb9315_gpio_table = { - .dev_id = "spi0.0", /* CS0 on SPI0 */ - .table = { - GPIO_LOOKUP("B", 6, "reset", GPIO_ACTIVE_LOW), - { }, - }, -}; - -static void __init edb93xx_register_spi(void) -{ - if (machine_is_edb9301() || machine_is_edb9302()) - gpiod_add_lookup_table(&edb93xx_cs4272_edb9301_gpio_table); - else if (machine_is_edb9302a() || machine_is_edb9307a()) - gpiod_add_lookup_table(&edb93xx_cs4272_edb9302_gpio_table); - else if (machine_is_edb9315a()) - gpiod_add_lookup_table(&edb93xx_cs4272_edb9315_gpio_table); - - gpiod_add_lookup_table(&edb93xx_spi_cs_gpio_table); - ep93xx_register_spi(&edb93xx_spi_info, edb93xx_spi_board_info, - ARRAY_SIZE(edb93xx_spi_board_info)); -} - - -/************************************************************************* - * EDB93xx I2S - *************************************************************************/ -static struct platform_device edb93xx_audio_device = { - .name = "edb93xx-audio", - .id = -1, -}; - -static int __init edb93xx_has_audio(void) -{ - return (machine_is_edb9301() || machine_is_edb9302() || - machine_is_edb9302a() || machine_is_edb9307a() || - machine_is_edb9315a()); -} - -static void __init edb93xx_register_i2s(void) -{ - if (edb93xx_has_audio()) { - ep93xx_register_i2s(); - platform_device_register(&edb93xx_audio_device); - } -} - - -/************************************************************************* - * EDB93xx pwm - *************************************************************************/ -static void __init edb93xx_register_pwm(void) -{ - if (machine_is_edb9301() || - machine_is_edb9302() || machine_is_edb9302a()) { - /* EP9301 and EP9302 only have pwm.1 (EGPIO14) */ - ep93xx_register_pwm(0, 1); - } else if (machine_is_edb9307() || machine_is_edb9307a()) { - /* EP9307 only has pwm.0 (PWMOUT) */ - ep93xx_register_pwm(1, 0); - } else { - /* EP9312 and EP9315 have both */ - ep93xx_register_pwm(1, 1); - } -} - - -/************************************************************************* - * EDB93xx framebuffer - *************************************************************************/ -static struct ep93xxfb_mach_info __initdata edb93xxfb_info = { - .flags = 0, -}; - -static int __init edb93xx_has_fb(void) -{ - /* These platforms have an ep93xx with video capability */ - return machine_is_edb9307() || machine_is_edb9307a() || - machine_is_edb9312() || machine_is_edb9315() || - machine_is_edb9315a(); -} - -static void __init edb93xx_register_fb(void) -{ - if (!edb93xx_has_fb()) - return; - - if (machine_is_edb9307a() || machine_is_edb9315a()) - edb93xxfb_info.flags |= EP93XXFB_USE_SDCSN0; - else - edb93xxfb_info.flags |= EP93XXFB_USE_SDCSN3; - - ep93xx_register_fb(&edb93xxfb_info); -} - - -/************************************************************************* - * EDB93xx IDE - *************************************************************************/ -static int __init edb93xx_has_ide(void) -{ - /* - * Although EDB9312 and EDB9315 do have IDE capability, they have - * INTRQ line wired as pull-up, which makes using IDE interface - * problematic. - */ - return machine_is_edb9312() || machine_is_edb9315() || - machine_is_edb9315a(); -} - -static void __init edb93xx_register_ide(void) -{ - if (!edb93xx_has_ide()) - return; - - ep93xx_register_ide(); -} - - -static void __init edb93xx_init_machine(void) -{ - ep93xx_init_devices(); - edb93xx_register_flash(); - ep93xx_register_eth(&edb93xx_eth_data, 1); - edb93xx_register_i2c(); - edb93xx_register_spi(); - edb93xx_register_i2s(); - edb93xx_register_pwm(); - edb93xx_register_fb(); - edb93xx_register_ide(); - ep93xx_register_adc(); -} - - -#ifdef CONFIG_MACH_EDB9301 -MACHINE_START(EDB9301, "Cirrus Logic EDB9301 Evaluation Board") - /* Maintainer: H Hartley Sweeten */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9302 -MACHINE_START(EDB9302, "Cirrus Logic EDB9302 Evaluation Board") - /* Maintainer: George Kashperko */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9302A -MACHINE_START(EDB9302A, "Cirrus Logic EDB9302A Evaluation Board") - /* Maintainer: Lennert Buytenhek */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9307 -MACHINE_START(EDB9307, "Cirrus Logic EDB9307 Evaluation Board") - /* Maintainer: Herbert Valerio Riedel */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9307A -MACHINE_START(EDB9307A, "Cirrus Logic EDB9307A Evaluation Board") - /* Maintainer: H Hartley Sweeten */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9312 -MACHINE_START(EDB9312, "Cirrus Logic EDB9312 Evaluation Board") - /* Maintainer: Toufeeq Hussain */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9315 -MACHINE_START(EDB9315, "Cirrus Logic EDB9315 Evaluation Board") - /* Maintainer: Lennert Buytenhek */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9315A -MACHINE_START(EDB9315A, "Cirrus Logic EDB9315A Evaluation Board") - /* Maintainer: Lennert Buytenhek */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif diff --git a/arch/arm/mach-ep93xx/ep93xx-regs.h b/arch/arm/mach-ep93xx/ep93xx-regs.h deleted file mode 100644 index 8fa3646de0a4..000000000000 --- a/arch/arm/mach-ep93xx/ep93xx-regs.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARCH_EP93XX_REGS_H -#define __ASM_ARCH_EP93XX_REGS_H - -/* - * EP93xx linux memory map: - * - * virt phys size - * fe800000 5M per-platform mappings - * fed00000 80800000 2M APB - * fef00000 80000000 1M AHB - */ - -#define EP93XX_AHB_PHYS_BASE 0x80000000 -#define EP93XX_AHB_VIRT_BASE 0xfef00000 -#define EP93XX_AHB_SIZE 0x00100000 - -#define EP93XX_AHB_PHYS(x) (EP93XX_AHB_PHYS_BASE + (x)) -#define EP93XX_AHB_IOMEM(x) IOMEM(EP93XX_AHB_VIRT_BASE + (x)) - -#define EP93XX_APB_PHYS_BASE 0x80800000 -#define EP93XX_APB_VIRT_BASE 0xfed00000 -#define EP93XX_APB_SIZE 0x00200000 - -#define EP93XX_APB_PHYS(x) (EP93XX_APB_PHYS_BASE + (x)) -#define EP93XX_APB_IOMEM(x) IOMEM(EP93XX_APB_VIRT_BASE + (x)) - -/* APB UARTs */ -#define EP93XX_UART1_PHYS_BASE EP93XX_APB_PHYS(0x000c0000) -#define EP93XX_UART1_BASE EP93XX_APB_IOMEM(0x000c0000) - -#define EP93XX_UART2_PHYS_BASE EP93XX_APB_PHYS(0x000d0000) -#define EP93XX_UART2_BASE EP93XX_APB_IOMEM(0x000d0000) - -#define EP93XX_UART3_PHYS_BASE EP93XX_APB_PHYS(0x000e0000) -#define EP93XX_UART3_BASE EP93XX_APB_IOMEM(0x000e0000) - -#endif diff --git a/arch/arm/mach-ep93xx/gpio-ep93xx.h b/arch/arm/mach-ep93xx/gpio-ep93xx.h deleted file mode 100644 index 7b46eb7e5507..000000000000 --- a/arch/arm/mach-ep93xx/gpio-ep93xx.h +++ /dev/null @@ -1,111 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Include file for the EP93XX GPIO controller machine specifics */ - -#ifndef __GPIO_EP93XX_H -#define __GPIO_EP93XX_H - -#include "ep93xx-regs.h" - -#define EP93XX_GPIO_PHYS_BASE EP93XX_APB_PHYS(0x00040000) -#define EP93XX_GPIO_BASE EP93XX_APB_IOMEM(0x00040000) -#define EP93XX_GPIO_REG(x) (EP93XX_GPIO_BASE + (x)) -#define EP93XX_GPIO_F_INT_STATUS EP93XX_GPIO_REG(0x5c) -#define EP93XX_GPIO_A_INT_STATUS EP93XX_GPIO_REG(0xa0) -#define EP93XX_GPIO_B_INT_STATUS EP93XX_GPIO_REG(0xbc) -#define EP93XX_GPIO_EEDRIVE EP93XX_GPIO_REG(0xc8) - -/* GPIO port A. */ -#define EP93XX_GPIO_LINE_A(x) ((x) + 0) -#define EP93XX_GPIO_LINE_EGPIO0 EP93XX_GPIO_LINE_A(0) -#define EP93XX_GPIO_LINE_EGPIO1 EP93XX_GPIO_LINE_A(1) -#define EP93XX_GPIO_LINE_EGPIO2 EP93XX_GPIO_LINE_A(2) -#define EP93XX_GPIO_LINE_EGPIO3 EP93XX_GPIO_LINE_A(3) -#define EP93XX_GPIO_LINE_EGPIO4 EP93XX_GPIO_LINE_A(4) -#define EP93XX_GPIO_LINE_EGPIO5 EP93XX_GPIO_LINE_A(5) -#define EP93XX_GPIO_LINE_EGPIO6 EP93XX_GPIO_LINE_A(6) -#define EP93XX_GPIO_LINE_EGPIO7 EP93XX_GPIO_LINE_A(7) - -/* GPIO port B. */ -#define EP93XX_GPIO_LINE_B(x) ((x) + 8) -#define EP93XX_GPIO_LINE_EGPIO8 EP93XX_GPIO_LINE_B(0) -#define EP93XX_GPIO_LINE_EGPIO9 EP93XX_GPIO_LINE_B(1) -#define EP93XX_GPIO_LINE_EGPIO10 EP93XX_GPIO_LINE_B(2) -#define EP93XX_GPIO_LINE_EGPIO11 EP93XX_GPIO_LINE_B(3) -#define EP93XX_GPIO_LINE_EGPIO12 EP93XX_GPIO_LINE_B(4) -#define EP93XX_GPIO_LINE_EGPIO13 EP93XX_GPIO_LINE_B(5) -#define EP93XX_GPIO_LINE_EGPIO14 EP93XX_GPIO_LINE_B(6) -#define EP93XX_GPIO_LINE_EGPIO15 EP93XX_GPIO_LINE_B(7) - -/* GPIO port C. */ -#define EP93XX_GPIO_LINE_C(x) ((x) + 40) -#define EP93XX_GPIO_LINE_ROW0 EP93XX_GPIO_LINE_C(0) -#define EP93XX_GPIO_LINE_ROW1 EP93XX_GPIO_LINE_C(1) -#define EP93XX_GPIO_LINE_ROW2 EP93XX_GPIO_LINE_C(2) -#define EP93XX_GPIO_LINE_ROW3 EP93XX_GPIO_LINE_C(3) -#define EP93XX_GPIO_LINE_ROW4 EP93XX_GPIO_LINE_C(4) -#define EP93XX_GPIO_LINE_ROW5 EP93XX_GPIO_LINE_C(5) -#define EP93XX_GPIO_LINE_ROW6 EP93XX_GPIO_LINE_C(6) -#define EP93XX_GPIO_LINE_ROW7 EP93XX_GPIO_LINE_C(7) - -/* GPIO port D. */ -#define EP93XX_GPIO_LINE_D(x) ((x) + 24) -#define EP93XX_GPIO_LINE_COL0 EP93XX_GPIO_LINE_D(0) -#define EP93XX_GPIO_LINE_COL1 EP93XX_GPIO_LINE_D(1) -#define EP93XX_GPIO_LINE_COL2 EP93XX_GPIO_LINE_D(2) -#define EP93XX_GPIO_LINE_COL3 EP93XX_GPIO_LINE_D(3) -#define EP93XX_GPIO_LINE_COL4 EP93XX_GPIO_LINE_D(4) -#define EP93XX_GPIO_LINE_COL5 EP93XX_GPIO_LINE_D(5) -#define EP93XX_GPIO_LINE_COL6 EP93XX_GPIO_LINE_D(6) -#define EP93XX_GPIO_LINE_COL7 EP93XX_GPIO_LINE_D(7) - -/* GPIO port E. */ -#define EP93XX_GPIO_LINE_E(x) ((x) + 32) -#define EP93XX_GPIO_LINE_GRLED EP93XX_GPIO_LINE_E(0) -#define EP93XX_GPIO_LINE_RDLED EP93XX_GPIO_LINE_E(1) -#define EP93XX_GPIO_LINE_DIORn EP93XX_GPIO_LINE_E(2) -#define EP93XX_GPIO_LINE_IDECS1n EP93XX_GPIO_LINE_E(3) -#define EP93XX_GPIO_LINE_IDECS2n EP93XX_GPIO_LINE_E(4) -#define EP93XX_GPIO_LINE_IDEDA0 EP93XX_GPIO_LINE_E(5) -#define EP93XX_GPIO_LINE_IDEDA1 EP93XX_GPIO_LINE_E(6) -#define EP93XX_GPIO_LINE_IDEDA2 EP93XX_GPIO_LINE_E(7) - -/* GPIO port F. */ -#define EP93XX_GPIO_LINE_F(x) ((x) + 16) -#define EP93XX_GPIO_LINE_WP EP93XX_GPIO_LINE_F(0) -#define EP93XX_GPIO_LINE_MCCD1 EP93XX_GPIO_LINE_F(1) -#define EP93XX_GPIO_LINE_MCCD2 EP93XX_GPIO_LINE_F(2) -#define EP93XX_GPIO_LINE_MCBVD1 EP93XX_GPIO_LINE_F(3) -#define EP93XX_GPIO_LINE_MCBVD2 EP93XX_GPIO_LINE_F(4) -#define EP93XX_GPIO_LINE_VS1 EP93XX_GPIO_LINE_F(5) -#define EP93XX_GPIO_LINE_READY EP93XX_GPIO_LINE_F(6) -#define EP93XX_GPIO_LINE_VS2 EP93XX_GPIO_LINE_F(7) - -/* GPIO port G. */ -#define EP93XX_GPIO_LINE_G(x) ((x) + 48) -#define EP93XX_GPIO_LINE_EECLK EP93XX_GPIO_LINE_G(0) -#define EP93XX_GPIO_LINE_EEDAT EP93XX_GPIO_LINE_G(1) -#define EP93XX_GPIO_LINE_SLA0 EP93XX_GPIO_LINE_G(2) -#define EP93XX_GPIO_LINE_SLA1 EP93XX_GPIO_LINE_G(3) -#define EP93XX_GPIO_LINE_DD12 EP93XX_GPIO_LINE_G(4) -#define EP93XX_GPIO_LINE_DD13 EP93XX_GPIO_LINE_G(5) -#define EP93XX_GPIO_LINE_DD14 EP93XX_GPIO_LINE_G(6) -#define EP93XX_GPIO_LINE_DD15 EP93XX_GPIO_LINE_G(7) - -/* GPIO port H. */ -#define EP93XX_GPIO_LINE_H(x) ((x) + 56) -#define EP93XX_GPIO_LINE_DD0 EP93XX_GPIO_LINE_H(0) -#define EP93XX_GPIO_LINE_DD1 EP93XX_GPIO_LINE_H(1) -#define EP93XX_GPIO_LINE_DD2 EP93XX_GPIO_LINE_H(2) -#define EP93XX_GPIO_LINE_DD3 EP93XX_GPIO_LINE_H(3) -#define EP93XX_GPIO_LINE_DD4 EP93XX_GPIO_LINE_H(4) -#define EP93XX_GPIO_LINE_DD5 EP93XX_GPIO_LINE_H(5) -#define EP93XX_GPIO_LINE_DD6 EP93XX_GPIO_LINE_H(6) -#define EP93XX_GPIO_LINE_DD7 EP93XX_GPIO_LINE_H(7) - -/* maximum value for gpio line identifiers */ -#define EP93XX_GPIO_LINE_MAX EP93XX_GPIO_LINE_H(7) - -/* maximum value for irq capable line identifiers */ -#define EP93XX_GPIO_LINE_MAX_IRQ EP93XX_GPIO_LINE_F(7) - -#endif /* __GPIO_EP93XX_H */ diff --git a/arch/arm/mach-ep93xx/hardware.h b/arch/arm/mach-ep93xx/hardware.h deleted file mode 100644 index e7d850e04782..000000000000 --- a/arch/arm/mach-ep93xx/hardware.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/arm/mach-ep93xx/include/mach/hardware.h - */ - -#ifndef __ASM_ARCH_HARDWARE_H -#define __ASM_ARCH_HARDWARE_H - -#include "platform.h" - -/* - * The EP93xx has two external crystal oscillators. To generate the - * required high-frequency clocks, the processor uses two phase-locked- - * loops (PLLs) to multiply the incoming external clock signal to much - * higher frequencies that are then divided down by programmable dividers - * to produce the needed clocks. The PLLs operate independently of one - * another. - */ -#define EP93XX_EXT_CLK_RATE 14745600 -#define EP93XX_EXT_RTC_RATE 32768 - -#define EP93XX_KEYTCHCLK_DIV4 (EP93XX_EXT_CLK_RATE / 4) -#define EP93XX_KEYTCHCLK_DIV16 (EP93XX_EXT_CLK_RATE / 16) - -#endif diff --git a/arch/arm/mach-ep93xx/irqs.h b/arch/arm/mach-ep93xx/irqs.h deleted file mode 100644 index 353201b90c66..000000000000 --- a/arch/arm/mach-ep93xx/irqs.h +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARCH_IRQS_H -#define __ASM_ARCH_IRQS_H - -#define IRQ_EP93XX_VIC0 1 - -#define IRQ_EP93XX_COMMRX (IRQ_EP93XX_VIC0 + 2) -#define IRQ_EP93XX_COMMTX (IRQ_EP93XX_VIC0 + 3) -#define IRQ_EP93XX_TIMER1 (IRQ_EP93XX_VIC0 + 4) -#define IRQ_EP93XX_TIMER2 (IRQ_EP93XX_VIC0 + 5) -#define IRQ_EP93XX_AACINTR (IRQ_EP93XX_VIC0 + 6) -#define IRQ_EP93XX_DMAM2P0 (IRQ_EP93XX_VIC0 + 7) -#define IRQ_EP93XX_DMAM2P1 (IRQ_EP93XX_VIC0 + 8) -#define IRQ_EP93XX_DMAM2P2 (IRQ_EP93XX_VIC0 + 9) -#define IRQ_EP93XX_DMAM2P3 (IRQ_EP93XX_VIC0 + 10) -#define IRQ_EP93XX_DMAM2P4 (IRQ_EP93XX_VIC0 + 11) -#define IRQ_EP93XX_DMAM2P5 (IRQ_EP93XX_VIC0 + 12) -#define IRQ_EP93XX_DMAM2P6 (IRQ_EP93XX_VIC0 + 13) -#define IRQ_EP93XX_DMAM2P7 (IRQ_EP93XX_VIC0 + 14) -#define IRQ_EP93XX_DMAM2P8 (IRQ_EP93XX_VIC0 + 15) -#define IRQ_EP93XX_DMAM2P9 (IRQ_EP93XX_VIC0 + 16) -#define IRQ_EP93XX_DMAM2M0 (IRQ_EP93XX_VIC0 + 17) -#define IRQ_EP93XX_DMAM2M1 (IRQ_EP93XX_VIC0 + 18) -#define IRQ_EP93XX_GPIO0MUX (IRQ_EP93XX_VIC0 + 19) -#define IRQ_EP93XX_GPIO1MUX (IRQ_EP93XX_VIC0 + 20) -#define IRQ_EP93XX_GPIO2MUX (IRQ_EP93XX_VIC0 + 21) -#define IRQ_EP93XX_GPIO3MUX (IRQ_EP93XX_VIC0 + 22) -#define IRQ_EP93XX_UART1RX (IRQ_EP93XX_VIC0 + 23) -#define IRQ_EP93XX_UART1TX (IRQ_EP93XX_VIC0 + 24) -#define IRQ_EP93XX_UART2RX (IRQ_EP93XX_VIC0 + 25) -#define IRQ_EP93XX_UART2TX (IRQ_EP93XX_VIC0 + 26) -#define IRQ_EP93XX_UART3RX (IRQ_EP93XX_VIC0 + 27) -#define IRQ_EP93XX_UART3TX (IRQ_EP93XX_VIC0 + 28) -#define IRQ_EP93XX_KEY (IRQ_EP93XX_VIC0 + 29) -#define IRQ_EP93XX_TOUCH (IRQ_EP93XX_VIC0 + 30) -#define EP93XX_VIC1_VALID_IRQ_MASK 0x7ffffffc - -#define IRQ_EP93XX_VIC1 (IRQ_EP93XX_VIC0 + 32) - -#define IRQ_EP93XX_EXT0 (IRQ_EP93XX_VIC1 + 0) -#define IRQ_EP93XX_EXT1 (IRQ_EP93XX_VIC1 + 1) -#define IRQ_EP93XX_EXT2 (IRQ_EP93XX_VIC1 + 2) -#define IRQ_EP93XX_64HZ (IRQ_EP93XX_VIC1 + 3) -#define IRQ_EP93XX_WATCHDOG (IRQ_EP93XX_VIC1 + 4) -#define IRQ_EP93XX_RTC (IRQ_EP93XX_VIC1 + 5) -#define IRQ_EP93XX_IRDA (IRQ_EP93XX_VIC1 + 6) -#define IRQ_EP93XX_ETHERNET (IRQ_EP93XX_VIC1 + 7) -#define IRQ_EP93XX_EXT3 (IRQ_EP93XX_VIC1 + 8) -#define IRQ_EP93XX_PROG (IRQ_EP93XX_VIC1 + 9) -#define IRQ_EP93XX_1HZ (IRQ_EP93XX_VIC1 + 10) -#define IRQ_EP93XX_VSYNC (IRQ_EP93XX_VIC1 + 11) -#define IRQ_EP93XX_VIDEO_FIFO (IRQ_EP93XX_VIC1 + 12) -#define IRQ_EP93XX_SSP1RX (IRQ_EP93XX_VIC1 + 13) -#define IRQ_EP93XX_SSP1TX (IRQ_EP93XX_VIC1 + 14) -#define IRQ_EP93XX_GPIO4MUX (IRQ_EP93XX_VIC1 + 15) -#define IRQ_EP93XX_GPIO5MUX (IRQ_EP93XX_VIC1 + 16) -#define IRQ_EP93XX_GPIO6MUX (IRQ_EP93XX_VIC1 + 17) -#define IRQ_EP93XX_GPIO7MUX (IRQ_EP93XX_VIC1 + 18) -#define IRQ_EP93XX_TIMER3 (IRQ_EP93XX_VIC1 + 19) -#define IRQ_EP93XX_UART1 (IRQ_EP93XX_VIC1 + 20) -#define IRQ_EP93XX_SSP (IRQ_EP93XX_VIC1 + 21) -#define IRQ_EP93XX_UART2 (IRQ_EP93XX_VIC1 + 22) -#define IRQ_EP93XX_UART3 (IRQ_EP93XX_VIC1 + 23) -#define IRQ_EP93XX_USB (IRQ_EP93XX_VIC1 + 24) -#define IRQ_EP93XX_ETHERNET_PME (IRQ_EP93XX_VIC1 + 25) -#define IRQ_EP93XX_DSP (IRQ_EP93XX_VIC1 + 26) -#define IRQ_EP93XX_GPIO_AB (IRQ_EP93XX_VIC1 + 27) -#define IRQ_EP93XX_SAI (IRQ_EP93XX_VIC1 + 28) -#define EP93XX_VIC2_VALID_IRQ_MASK 0x1fffffff - -#define NR_EP93XX_IRQS (IRQ_EP93XX_VIC1 + 32 + 24) - -#define EP93XX_BOARD_IRQ(x) (NR_EP93XX_IRQS + (x)) -#define EP93XX_BOARD_IRQS 32 - -#endif diff --git a/arch/arm/mach-ep93xx/platform.h b/arch/arm/mach-ep93xx/platform.h deleted file mode 100644 index 5fb1b919133f..000000000000 --- a/arch/arm/mach-ep93xx/platform.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/arm/mach-ep93xx/include/mach/platform.h - */ - -#ifndef __ASSEMBLY__ - -#include -#include - -struct device; -struct i2c_board_info; -struct spi_board_info; -struct platform_device; -struct ep93xxfb_mach_info; -struct ep93xx_keypad_platform_data; -struct ep93xx_spi_info; - -void ep93xx_map_io(void); -void ep93xx_init_irq(void); - -void ep93xx_register_flash(unsigned int width, - resource_size_t start, resource_size_t size); - -void ep93xx_register_eth(struct ep93xx_eth_data *data, int copy_addr); -void ep93xx_register_i2c(struct i2c_board_info *devices, int num); -void ep93xx_register_spi(struct ep93xx_spi_info *info, - struct spi_board_info *devices, int num); -void ep93xx_register_fb(struct ep93xxfb_mach_info *data); -void ep93xx_register_pwm(int pwm0, int pwm1); -void ep93xx_register_keypad(struct ep93xx_keypad_platform_data *data); -void ep93xx_register_i2s(void); -void ep93xx_register_ac97(void); -void ep93xx_register_ide(void); -void ep93xx_register_adc(void); - -struct device *ep93xx_init_devices(void); -extern void ep93xx_timer_init(void); - -void ep93xx_restart(enum reboot_mode, const char *); - -#endif diff --git a/arch/arm/mach-ep93xx/soc.h b/arch/arm/mach-ep93xx/soc.h deleted file mode 100644 index 3245ebbd5069..000000000000 --- a/arch/arm/mach-ep93xx/soc.h +++ /dev/null @@ -1,212 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * arch/arm/mach-ep93xx/soc.h - * - * Copyright (C) 2012 Open Kernel Labs - * Copyright (C) 2012 Ryan Mallon - */ - -#ifndef _EP93XX_SOC_H -#define _EP93XX_SOC_H - -#include "ep93xx-regs.h" -#include "irqs.h" - -/* - * EP93xx Physical Memory Map: - * - * The ASDO pin is sampled at system reset to select a synchronous or - * asynchronous boot configuration. When ASDO is "1" (i.e. pulled-up) - * the synchronous boot mode is selected. When ASDO is "0" (i.e - * pulled-down) the asynchronous boot mode is selected. - * - * In synchronous boot mode nSDCE3 is decoded starting at physical address - * 0x00000000 and nCS0 is decoded starting at 0xf0000000. For asynchronous - * boot mode they are swapped with nCS0 decoded at 0x00000000 ann nSDCE3 - * decoded at 0xf0000000. - * - * There is known errata for the EP93xx dealing with External Memory - * Configurations. Please refer to "AN273: EP93xx Silicon Rev E Design - * Guidelines" for more information. This document can be found at: - * - * http://www.cirrus.com/en/pubs/appNote/AN273REV4.pdf - */ - -#define EP93XX_CS0_PHYS_BASE_ASYNC 0x00000000 /* ASDO Pin = 0 */ -#define EP93XX_SDCE3_PHYS_BASE_SYNC 0x00000000 /* ASDO Pin = 1 */ -#define EP93XX_CS1_PHYS_BASE 0x10000000 -#define EP93XX_CS2_PHYS_BASE 0x20000000 -#define EP93XX_CS3_PHYS_BASE 0x30000000 -#define EP93XX_PCMCIA_PHYS_BASE 0x40000000 -#define EP93XX_CS6_PHYS_BASE 0x60000000 -#define EP93XX_CS7_PHYS_BASE 0x70000000 -#define EP93XX_SDCE0_PHYS_BASE 0xc0000000 -#define EP93XX_SDCE1_PHYS_BASE 0xd0000000 -#define EP93XX_SDCE2_PHYS_BASE 0xe0000000 -#define EP93XX_SDCE3_PHYS_BASE_ASYNC 0xf0000000 /* ASDO Pin = 0 */ -#define EP93XX_CS0_PHYS_BASE_SYNC 0xf0000000 /* ASDO Pin = 1 */ - -/* AHB peripherals */ -#define EP93XX_DMA_BASE EP93XX_AHB_IOMEM(0x00000000) - -#define EP93XX_ETHERNET_PHYS_BASE EP93XX_AHB_PHYS(0x00010000) -#define EP93XX_ETHERNET_BASE EP93XX_AHB_IOMEM(0x00010000) - -#define EP93XX_USB_PHYS_BASE EP93XX_AHB_PHYS(0x00020000) -#define EP93XX_USB_BASE EP93XX_AHB_IOMEM(0x00020000) - -#define EP93XX_RASTER_PHYS_BASE EP93XX_AHB_PHYS(0x00030000) -#define EP93XX_RASTER_BASE EP93XX_AHB_IOMEM(0x00030000) - -#define EP93XX_GRAPHICS_ACCEL_BASE EP93XX_AHB_IOMEM(0x00040000) - -#define EP93XX_SDRAM_CONTROLLER_BASE EP93XX_AHB_IOMEM(0x00060000) - -#define EP93XX_PCMCIA_CONTROLLER_BASE EP93XX_AHB_IOMEM(0x00080000) - -#define EP93XX_BOOT_ROM_BASE EP93XX_AHB_IOMEM(0x00090000) - -#define EP93XX_IDE_PHYS_BASE EP93XX_AHB_PHYS(0x000a0000) -#define EP93XX_IDE_BASE EP93XX_AHB_IOMEM(0x000a0000) - -#define EP93XX_VIC1_BASE EP93XX_AHB_IOMEM(0x000b0000) - -#define EP93XX_VIC2_BASE EP93XX_AHB_IOMEM(0x000c0000) - -/* APB peripherals */ -#define EP93XX_TIMER_BASE EP93XX_APB_IOMEM(0x00010000) - -#define EP93XX_I2S_PHYS_BASE EP93XX_APB_PHYS(0x00020000) -#define EP93XX_I2S_BASE EP93XX_APB_IOMEM(0x00020000) - -#define EP93XX_SECURITY_BASE EP93XX_APB_IOMEM(0x00030000) - -#define EP93XX_AAC_PHYS_BASE EP93XX_APB_PHYS(0x00080000) -#define EP93XX_AAC_BASE EP93XX_APB_IOMEM(0x00080000) - -#define EP93XX_SPI_PHYS_BASE EP93XX_APB_PHYS(0x000a0000) -#define EP93XX_SPI_BASE EP93XX_APB_IOMEM(0x000a0000) - -#define EP93XX_IRDA_BASE EP93XX_APB_IOMEM(0x000b0000) - -#define EP93XX_KEY_MATRIX_PHYS_BASE EP93XX_APB_PHYS(0x000f0000) -#define EP93XX_KEY_MATRIX_BASE EP93XX_APB_IOMEM(0x000f0000) - -#define EP93XX_ADC_PHYS_BASE EP93XX_APB_PHYS(0x00100000) -#define EP93XX_ADC_BASE EP93XX_APB_IOMEM(0x00100000) -#define EP93XX_TOUCHSCREEN_BASE EP93XX_APB_IOMEM(0x00100000) - -#define EP93XX_PWM_PHYS_BASE EP93XX_APB_PHYS(0x00110000) -#define EP93XX_PWM_BASE EP93XX_APB_IOMEM(0x00110000) - -#define EP93XX_RTC_PHYS_BASE EP93XX_APB_PHYS(0x00120000) -#define EP93XX_RTC_BASE EP93XX_APB_IOMEM(0x00120000) - -#define EP93XX_WATCHDOG_PHYS_BASE EP93XX_APB_PHYS(0x00140000) -#define EP93XX_WATCHDOG_BASE EP93XX_APB_IOMEM(0x00140000) - -/* System controller */ -#define EP93XX_SYSCON_BASE EP93XX_APB_IOMEM(0x00130000) -#define EP93XX_SYSCON_REG(x) (EP93XX_SYSCON_BASE + (x)) -#define EP93XX_SYSCON_POWER_STATE EP93XX_SYSCON_REG(0x00) -#define EP93XX_SYSCON_PWRCNT EP93XX_SYSCON_REG(0x04) -#define EP93XX_SYSCON_PWRCNT_FIR_EN (1<<31) -#define EP93XX_SYSCON_PWRCNT_UARTBAUD (1<<29) -#define EP93XX_SYSCON_PWRCNT_USH_EN 28 -#define EP93XX_SYSCON_PWRCNT_DMA_M2M1 27 -#define EP93XX_SYSCON_PWRCNT_DMA_M2M0 26 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P8 25 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P9 24 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P6 23 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P7 22 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P4 21 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P5 20 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P2 19 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P3 18 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P0 17 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P1 16 -#define EP93XX_SYSCON_HALT EP93XX_SYSCON_REG(0x08) -#define EP93XX_SYSCON_STANDBY EP93XX_SYSCON_REG(0x0c) -#define EP93XX_SYSCON_CLKSET1 EP93XX_SYSCON_REG(0x20) -#define EP93XX_SYSCON_CLKSET1_NBYP1 (1<<23) -#define EP93XX_SYSCON_CLKSET2 EP93XX_SYSCON_REG(0x24) -#define EP93XX_SYSCON_CLKSET2_NBYP2 (1<<19) -#define EP93XX_SYSCON_CLKSET2_PLL2_EN (1<<18) -#define EP93XX_SYSCON_DEVCFG EP93XX_SYSCON_REG(0x80) -#define EP93XX_SYSCON_DEVCFG_SWRST (1<<31) -#define EP93XX_SYSCON_DEVCFG_D1ONG (1<<30) -#define EP93XX_SYSCON_DEVCFG_D0ONG (1<<29) -#define EP93XX_SYSCON_DEVCFG_IONU2 (1<<28) -#define EP93XX_SYSCON_DEVCFG_GONK (1<<27) -#define EP93XX_SYSCON_DEVCFG_TONG (1<<26) -#define EP93XX_SYSCON_DEVCFG_MONG (1<<25) -#define EP93XX_SYSCON_DEVCFG_U3EN 24 -#define EP93XX_SYSCON_DEVCFG_CPENA (1<<23) -#define EP93XX_SYSCON_DEVCFG_A2ONG (1<<22) -#define EP93XX_SYSCON_DEVCFG_A1ONG (1<<21) -#define EP93XX_SYSCON_DEVCFG_U2EN 20 -#define EP93XX_SYSCON_DEVCFG_EXVC (1<<19) -#define EP93XX_SYSCON_DEVCFG_U1EN 18 -#define EP93XX_SYSCON_DEVCFG_TIN (1<<17) -#define EP93XX_SYSCON_DEVCFG_HC3IN (1<<15) -#define EP93XX_SYSCON_DEVCFG_HC3EN (1<<14) -#define EP93XX_SYSCON_DEVCFG_HC1IN (1<<13) -#define EP93XX_SYSCON_DEVCFG_HC1EN (1<<12) -#define EP93XX_SYSCON_DEVCFG_HONIDE (1<<11) -#define EP93XX_SYSCON_DEVCFG_GONIDE (1<<10) -#define EP93XX_SYSCON_DEVCFG_PONG (1<<9) -#define EP93XX_SYSCON_DEVCFG_EONIDE (1<<8) -#define EP93XX_SYSCON_DEVCFG_I2SONSSP (1<<7) -#define EP93XX_SYSCON_DEVCFG_I2SONAC97 (1<<6) -#define EP93XX_SYSCON_DEVCFG_RASONP3 (1<<4) -#define EP93XX_SYSCON_DEVCFG_RAS (1<<3) -#define EP93XX_SYSCON_DEVCFG_ADCPD (1<<2) -#define EP93XX_SYSCON_DEVCFG_KEYS (1<<1) -#define EP93XX_SYSCON_DEVCFG_SHENA (1<<0) -#define EP93XX_SYSCON_VIDCLKDIV EP93XX_SYSCON_REG(0x84) -#define EP93XX_SYSCON_CLKDIV_ENABLE 15 -#define EP93XX_SYSCON_CLKDIV_ESEL (1<<14) -#define EP93XX_SYSCON_CLKDIV_PSEL (1<<13) -#define EP93XX_SYSCON_CLKDIV_PDIV_SHIFT 8 -#define EP93XX_SYSCON_I2SCLKDIV EP93XX_SYSCON_REG(0x8c) -#define EP93XX_SYSCON_I2SCLKDIV_SENA 31 -#define EP93XX_SYSCON_I2SCLKDIV_ORIDE (1<<29) -#define EP93XX_SYSCON_I2SCLKDIV_SPOL (1<<19) -#define EP93XX_I2SCLKDIV_SDIV (1 << 16) -#define EP93XX_I2SCLKDIV_LRDIV32 (0 << 17) -#define EP93XX_I2SCLKDIV_LRDIV64 (1 << 17) -#define EP93XX_I2SCLKDIV_LRDIV128 (2 << 17) -#define EP93XX_I2SCLKDIV_LRDIV_MASK (3 << 17) -#define EP93XX_SYSCON_KEYTCHCLKDIV EP93XX_SYSCON_REG(0x90) -#define EP93XX_SYSCON_KEYTCHCLKDIV_TSEN 31 -#define EP93XX_SYSCON_KEYTCHCLKDIV_ADIV 16 -#define EP93XX_SYSCON_KEYTCHCLKDIV_KEN 15 -#define EP93XX_SYSCON_KEYTCHCLKDIV_KDIV (1<<0) -#define EP93XX_SYSCON_SYSCFG EP93XX_SYSCON_REG(0x9c) -#define EP93XX_SYSCON_SYSCFG_REV_MASK (0xf0000000) -#define EP93XX_SYSCON_SYSCFG_REV_SHIFT (28) -#define EP93XX_SYSCON_SYSCFG_SBOOT (1<<8) -#define EP93XX_SYSCON_SYSCFG_LCSN7 (1<<7) -#define EP93XX_SYSCON_SYSCFG_LCSN6 (1<<6) -#define EP93XX_SYSCON_SYSCFG_LASDO (1<<5) -#define EP93XX_SYSCON_SYSCFG_LEEDA (1<<4) -#define EP93XX_SYSCON_SYSCFG_LEECLK (1<<3) -#define EP93XX_SYSCON_SYSCFG_LCSN2 (1<<1) -#define EP93XX_SYSCON_SYSCFG_LCSN1 (1<<0) -#define EP93XX_SYSCON_SWLOCK EP93XX_SYSCON_REG(0xc0) - -/* EP93xx System Controller software locked register write */ -void ep93xx_syscon_swlocked_write(unsigned int val, void __iomem *reg); -void ep93xx_devcfg_set_clear(unsigned int set_bits, unsigned int clear_bits); - -static inline void ep93xx_devcfg_set_bits(unsigned int bits) -{ - ep93xx_devcfg_set_clear(bits, 0x00); -} - -static inline void ep93xx_devcfg_clear_bits(unsigned int bits) -{ - ep93xx_devcfg_set_clear(0x00, bits); -} - -#endif /* _EP93XX_SOC_H */ diff --git a/arch/arm/mach-ep93xx/timer-ep93xx.c b/arch/arm/mach-ep93xx/timer-ep93xx.c deleted file mode 100644 index a9efa7bc2fa1..000000000000 --- a/arch/arm/mach-ep93xx/timer-ep93xx.c +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "soc.h" -#include "platform.h" - -/************************************************************************* - * Timer handling for EP93xx - ************************************************************************* - * The ep93xx has four internal timers. Timers 1, 2 (both 16 bit) and - * 3 (32 bit) count down at 508 kHz, are self-reloading, and can generate - * an interrupt on underflow. Timer 4 (40 bit) counts down at 983.04 kHz, - * is free-running, and can't generate interrupts. - * - * The 508 kHz timers are ideal for use for the timer interrupt, as the - * most common values of HZ divide 508 kHz nicely. We pick the 32 bit - * timer (timer 3) to get as long sleep intervals as possible when using - * CONFIG_NO_HZ. - * - * The higher clock rate of timer 4 makes it a better choice than the - * other timers for use as clock source and for sched_clock(), providing - * a stable 40 bit time base. - ************************************************************************* - */ -#define EP93XX_TIMER_REG(x) (EP93XX_TIMER_BASE + (x)) -#define EP93XX_TIMER1_LOAD EP93XX_TIMER_REG(0x00) -#define EP93XX_TIMER1_VALUE EP93XX_TIMER_REG(0x04) -#define EP93XX_TIMER1_CONTROL EP93XX_TIMER_REG(0x08) -#define EP93XX_TIMER123_CONTROL_ENABLE (1 << 7) -#define EP93XX_TIMER123_CONTROL_MODE (1 << 6) -#define EP93XX_TIMER123_CONTROL_CLKSEL (1 << 3) -#define EP93XX_TIMER1_CLEAR EP93XX_TIMER_REG(0x0c) -#define EP93XX_TIMER2_LOAD EP93XX_TIMER_REG(0x20) -#define EP93XX_TIMER2_VALUE EP93XX_TIMER_REG(0x24) -#define EP93XX_TIMER2_CONTROL EP93XX_TIMER_REG(0x28) -#define EP93XX_TIMER2_CLEAR EP93XX_TIMER_REG(0x2c) -#define EP93XX_TIMER4_VALUE_LOW EP93XX_TIMER_REG(0x60) -#define EP93XX_TIMER4_VALUE_HIGH EP93XX_TIMER_REG(0x64) -#define EP93XX_TIMER4_VALUE_HIGH_ENABLE (1 << 8) -#define EP93XX_TIMER3_LOAD EP93XX_TIMER_REG(0x80) -#define EP93XX_TIMER3_VALUE EP93XX_TIMER_REG(0x84) -#define EP93XX_TIMER3_CONTROL EP93XX_TIMER_REG(0x88) -#define EP93XX_TIMER3_CLEAR EP93XX_TIMER_REG(0x8c) - -#define EP93XX_TIMER123_RATE 508469 -#define EP93XX_TIMER4_RATE 983040 - -static u64 notrace ep93xx_read_sched_clock(void) -{ - u64 ret; - - ret = readl(EP93XX_TIMER4_VALUE_LOW); - ret |= ((u64) (readl(EP93XX_TIMER4_VALUE_HIGH) & 0xff) << 32); - return ret; -} - -static u64 ep93xx_clocksource_read(struct clocksource *c) -{ - u64 ret; - - ret = readl(EP93XX_TIMER4_VALUE_LOW); - ret |= ((u64) (readl(EP93XX_TIMER4_VALUE_HIGH) & 0xff) << 32); - return (u64) ret; -} - -static int ep93xx_clkevt_set_next_event(unsigned long next, - struct clock_event_device *evt) -{ - /* Default mode: periodic, off, 508 kHz */ - u32 tmode = EP93XX_TIMER123_CONTROL_MODE | - EP93XX_TIMER123_CONTROL_CLKSEL; - - /* Clear timer */ - writel(tmode, EP93XX_TIMER3_CONTROL); - - /* Set next event */ - writel(next, EP93XX_TIMER3_LOAD); - writel(tmode | EP93XX_TIMER123_CONTROL_ENABLE, - EP93XX_TIMER3_CONTROL); - return 0; -} - - -static int ep93xx_clkevt_shutdown(struct clock_event_device *evt) -{ - /* Disable timer */ - writel(0, EP93XX_TIMER3_CONTROL); - - return 0; -} - -static struct clock_event_device ep93xx_clockevent = { - .name = "timer1", - .features = CLOCK_EVT_FEAT_ONESHOT, - .set_state_shutdown = ep93xx_clkevt_shutdown, - .set_state_oneshot = ep93xx_clkevt_shutdown, - .tick_resume = ep93xx_clkevt_shutdown, - .set_next_event = ep93xx_clkevt_set_next_event, - .rating = 300, -}; - -static irqreturn_t ep93xx_timer_interrupt(int irq, void *dev_id) -{ - struct clock_event_device *evt = dev_id; - - /* Writing any value clears the timer interrupt */ - writel(1, EP93XX_TIMER3_CLEAR); - - evt->event_handler(evt); - - return IRQ_HANDLED; -} - -void __init ep93xx_timer_init(void) -{ - int irq = IRQ_EP93XX_TIMER3; - unsigned long flags = IRQF_TIMER | IRQF_IRQPOLL; - - /* Enable and register clocksource and sched_clock on timer 4 */ - writel(EP93XX_TIMER4_VALUE_HIGH_ENABLE, - EP93XX_TIMER4_VALUE_HIGH); - clocksource_mmio_init(NULL, "timer4", - EP93XX_TIMER4_RATE, 200, 40, - ep93xx_clocksource_read); - sched_clock_register(ep93xx_read_sched_clock, 40, - EP93XX_TIMER4_RATE); - - /* Set up clockevent on timer 3 */ - if (request_irq(irq, ep93xx_timer_interrupt, flags, "ep93xx timer", - &ep93xx_clockevent)) - pr_err("Failed to request irq %d (ep93xx timer)\n", irq); - clockevents_config_and_register(&ep93xx_clockevent, - EP93XX_TIMER123_RATE, - 1, - 0xffffffffU); -} diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c deleted file mode 100644 index 0bbdf587c685..000000000000 --- a/arch/arm/mach-ep93xx/ts72xx.c +++ /dev/null @@ -1,422 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/arm/mach-ep93xx/ts72xx.c - * Technologic Systems TS72xx SBC support. - * - * Copyright (C) 2006 Lennert Buytenhek - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gpio-ep93xx.h" -#include "hardware.h" - -#include -#include -#include - -#include "soc.h" -#include "ts72xx.h" - -/************************************************************************* - * IO map - *************************************************************************/ -static struct map_desc ts72xx_io_desc[] __initdata = { - { - .virtual = (unsigned long)TS72XX_MODEL_VIRT_BASE, - .pfn = __phys_to_pfn(TS72XX_MODEL_PHYS_BASE), - .length = TS72XX_MODEL_SIZE, - .type = MT_DEVICE, - }, { - .virtual = (unsigned long)TS72XX_OPTIONS_VIRT_BASE, - .pfn = __phys_to_pfn(TS72XX_OPTIONS_PHYS_BASE), - .length = TS72XX_OPTIONS_SIZE, - .type = MT_DEVICE, - }, { - .virtual = (unsigned long)TS72XX_OPTIONS2_VIRT_BASE, - .pfn = __phys_to_pfn(TS72XX_OPTIONS2_PHYS_BASE), - .length = TS72XX_OPTIONS2_SIZE, - .type = MT_DEVICE, - }, { - .virtual = (unsigned long)TS72XX_CPLDVER_VIRT_BASE, - .pfn = __phys_to_pfn(TS72XX_CPLDVER_PHYS_BASE), - .length = TS72XX_CPLDVER_SIZE, - .type = MT_DEVICE, - } -}; - -static void __init ts72xx_map_io(void) -{ - ep93xx_map_io(); - iotable_init(ts72xx_io_desc, ARRAY_SIZE(ts72xx_io_desc)); -} - - -/************************************************************************* - * NAND flash - *************************************************************************/ -#define TS72XX_NAND_CONTROL_ADDR_LINE 22 /* 0xN0400000 */ -#define TS72XX_NAND_BUSY_ADDR_LINE 23 /* 0xN0800000 */ - -static void ts72xx_nand_hwcontrol(struct nand_chip *chip, - int cmd, unsigned int ctrl) -{ - if (ctrl & NAND_CTRL_CHANGE) { - void __iomem *addr = chip->legacy.IO_ADDR_R; - unsigned char bits; - - addr += (1 << TS72XX_NAND_CONTROL_ADDR_LINE); - - bits = __raw_readb(addr) & ~0x07; - bits |= (ctrl & NAND_NCE) << 2; /* bit 0 -> bit 2 */ - bits |= (ctrl & NAND_CLE); /* bit 1 -> bit 1 */ - bits |= (ctrl & NAND_ALE) >> 2; /* bit 2 -> bit 0 */ - - __raw_writeb(bits, addr); - } - - if (cmd != NAND_CMD_NONE) - __raw_writeb(cmd, chip->legacy.IO_ADDR_W); -} - -static int ts72xx_nand_device_ready(struct nand_chip *chip) -{ - void __iomem *addr = chip->legacy.IO_ADDR_R; - - addr += (1 << TS72XX_NAND_BUSY_ADDR_LINE); - - return !!(__raw_readb(addr) & 0x20); -} - -#define TS72XX_BOOTROM_PART_SIZE (SZ_16K) -#define TS72XX_REDBOOT_PART_SIZE (SZ_2M + SZ_1M) - -static struct mtd_partition ts72xx_nand_parts[] = { - { - .name = "TS-BOOTROM", - .offset = 0, - .size = TS72XX_BOOTROM_PART_SIZE, - .mask_flags = MTD_WRITEABLE, /* force read-only */ - }, { - .name = "Linux", - .offset = MTDPART_OFS_RETAIN, - .size = TS72XX_REDBOOT_PART_SIZE, - /* leave so much for last partition */ - }, { - .name = "RedBoot", - .offset = MTDPART_OFS_APPEND, - .size = MTDPART_SIZ_FULL, - .mask_flags = MTD_WRITEABLE, /* force read-only */ - }, -}; - -static struct platform_nand_data ts72xx_nand_data = { - .chip = { - .nr_chips = 1, - .chip_offset = 0, - .chip_delay = 15, - }, - .ctrl = { - .cmd_ctrl = ts72xx_nand_hwcontrol, - .dev_ready = ts72xx_nand_device_ready, - }, -}; - -static struct resource ts72xx_nand_resource[] = { - { - .start = 0, /* filled in later */ - .end = 0, /* filled in later */ - .flags = IORESOURCE_MEM, - }, -}; - -static struct platform_device ts72xx_nand_flash = { - .name = "gen_nand", - .id = -1, - .dev.platform_data = &ts72xx_nand_data, - .resource = ts72xx_nand_resource, - .num_resources = ARRAY_SIZE(ts72xx_nand_resource), -}; - -static void __init ts72xx_register_flash(struct mtd_partition *parts, int n, - resource_size_t start) -{ - /* - * TS7200 has NOR flash all other TS72xx board have NAND flash. - */ - if (board_is_ts7200()) { - ep93xx_register_flash(2, EP93XX_CS6_PHYS_BASE, SZ_16M); - } else { - ts72xx_nand_resource[0].start = start; - ts72xx_nand_resource[0].end = start + SZ_16M - 1; - - ts72xx_nand_data.chip.partitions = parts; - ts72xx_nand_data.chip.nr_partitions = n; - - platform_device_register(&ts72xx_nand_flash); - } -} - -/************************************************************************* - * RTC M48T86 - *************************************************************************/ -#define TS72XX_RTC_INDEX_PHYS_BASE (EP93XX_CS1_PHYS_BASE + 0x00800000) -#define TS72XX_RTC_DATA_PHYS_BASE (EP93XX_CS1_PHYS_BASE + 0x01700000) - -static struct resource ts72xx_rtc_resources[] = { - DEFINE_RES_MEM(TS72XX_RTC_INDEX_PHYS_BASE, 0x01), - DEFINE_RES_MEM(TS72XX_RTC_DATA_PHYS_BASE, 0x01), -}; - -static struct platform_device ts72xx_rtc_device = { - .name = "rtc-m48t86", - .id = -1, - .resource = ts72xx_rtc_resources, - .num_resources = ARRAY_SIZE(ts72xx_rtc_resources), -}; - -/************************************************************************* - * Watchdog (in CPLD) - *************************************************************************/ -#define TS72XX_WDT_CONTROL_PHYS_BASE (EP93XX_CS2_PHYS_BASE + 0x03800000) -#define TS72XX_WDT_FEED_PHYS_BASE (EP93XX_CS2_PHYS_BASE + 0x03c00000) - -static struct resource ts72xx_wdt_resources[] = { - DEFINE_RES_MEM(TS72XX_WDT_CONTROL_PHYS_BASE, 0x01), - DEFINE_RES_MEM(TS72XX_WDT_FEED_PHYS_BASE, 0x01), -}; - -static struct platform_device ts72xx_wdt_device = { - .name = "ts72xx-wdt", - .id = -1, - .resource = ts72xx_wdt_resources, - .num_resources = ARRAY_SIZE(ts72xx_wdt_resources), -}; - -/************************************************************************* - * ETH - *************************************************************************/ -static struct ep93xx_eth_data __initdata ts72xx_eth_data = { - .phy_id = 1, -}; - -/************************************************************************* - * SPI SD/MMC host - *************************************************************************/ -#define BK3_EN_SDCARD_PHYS_BASE 0x12400000 -#define BK3_EN_SDCARD_PWR 0x0 -#define BK3_DIS_SDCARD_PWR 0x0C -static void bk3_mmc_spi_setpower(struct device *dev, unsigned int vdd) -{ - void __iomem *pwr_sd = ioremap(BK3_EN_SDCARD_PHYS_BASE, SZ_4K); - - if (!pwr_sd) { - pr_err("Failed to enable SD card power!"); - return; - } - - pr_debug("%s: SD card pwr %s VDD:0x%x\n", __func__, - !!vdd ? "ON" : "OFF", vdd); - - if (!!vdd) - __raw_writeb(BK3_EN_SDCARD_PWR, pwr_sd); - else - __raw_writeb(BK3_DIS_SDCARD_PWR, pwr_sd); - - iounmap(pwr_sd); -} - -static struct mmc_spi_platform_data bk3_spi_mmc_data = { - .detect_delay = 500, - .powerup_msecs = 100, - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .caps = MMC_CAP_NONREMOVABLE, - .setpower = bk3_mmc_spi_setpower, -}; - -/************************************************************************* - * SPI Bus - SD card access - *************************************************************************/ -static struct spi_board_info bk3_spi_board_info[] __initdata = { - { - .modalias = "mmc_spi", - .platform_data = &bk3_spi_mmc_data, - .max_speed_hz = 7.4E6, - .bus_num = 0, - .chip_select = 0, - .mode = SPI_MODE_0, - }, -}; - -/* - * This is a stub -> the FGPIO[3] pin is not connected on the schematic - * The all work is performed automatically by !SPI_FRAME (SFRM1) and - * goes through CPLD - */ -static struct gpiod_lookup_table bk3_spi_cs_gpio_table = { - .dev_id = "spi0", - .table = { - GPIO_LOOKUP("gpio-ep93xx.5", 3, "cs", GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct ep93xx_spi_info bk3_spi_master __initdata = { - .use_dma = 1, -}; - -/************************************************************************* - * TS72XX support code - *************************************************************************/ -#if IS_ENABLED(CONFIG_FPGA_MGR_TS73XX) - -/* Relative to EP93XX_CS1_PHYS_BASE */ -#define TS73XX_FPGA_LOADER_BASE 0x03c00000 - -static struct resource ts73xx_fpga_resources[] = { - { - .start = EP93XX_CS1_PHYS_BASE + TS73XX_FPGA_LOADER_BASE, - .end = EP93XX_CS1_PHYS_BASE + TS73XX_FPGA_LOADER_BASE + 1, - .flags = IORESOURCE_MEM, - }, -}; - -static struct platform_device ts73xx_fpga_device = { - .name = "ts73xx-fpga-mgr", - .id = -1, - .resource = ts73xx_fpga_resources, - .num_resources = ARRAY_SIZE(ts73xx_fpga_resources), -}; - -#endif - -/************************************************************************* - * SPI Bus - *************************************************************************/ -static struct spi_board_info ts72xx_spi_devices[] __initdata = { - { - .modalias = "tmp122", - .max_speed_hz = 2 * 1000 * 1000, - .bus_num = 0, - .chip_select = 0, - }, -}; - -static struct gpiod_lookup_table ts72xx_spi_cs_gpio_table = { - .dev_id = "spi0", - .table = { - /* DIO_17 */ - GPIO_LOOKUP("gpio-ep93xx.5", 2, "cs", GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct ep93xx_spi_info ts72xx_spi_info __initdata = { - /* Intentionally left blank */ -}; - -static void __init ts72xx_init_machine(void) -{ - ep93xx_init_devices(); - ts72xx_register_flash(ts72xx_nand_parts, ARRAY_SIZE(ts72xx_nand_parts), - is_ts9420_installed() ? - EP93XX_CS7_PHYS_BASE : EP93XX_CS6_PHYS_BASE); - platform_device_register(&ts72xx_rtc_device); - platform_device_register(&ts72xx_wdt_device); - - ep93xx_register_eth(&ts72xx_eth_data, 1); -#if IS_ENABLED(CONFIG_FPGA_MGR_TS73XX) - if (board_is_ts7300()) - platform_device_register(&ts73xx_fpga_device); -#endif - gpiod_add_lookup_table(&ts72xx_spi_cs_gpio_table); - ep93xx_register_spi(&ts72xx_spi_info, ts72xx_spi_devices, - ARRAY_SIZE(ts72xx_spi_devices)); -} - -MACHINE_START(TS72XX, "Technologic Systems TS-72xx SBC") - /* Maintainer: Lennert Buytenhek */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ts72xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = ts72xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END - -/************************************************************************* - * EP93xx I2S audio peripheral handling - *************************************************************************/ -static struct resource ep93xx_i2s_resource[] = { - DEFINE_RES_MEM(EP93XX_I2S_PHYS_BASE, 0x100), - DEFINE_RES_IRQ_NAMED(IRQ_EP93XX_SAI, "spilink i2s slave"), -}; - -static struct platform_device ep93xx_i2s_device = { - .name = "ep93xx-spilink-i2s", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_i2s_resource), - .resource = ep93xx_i2s_resource, -}; - -/************************************************************************* - * BK3 support code - *************************************************************************/ -static struct mtd_partition bk3_nand_parts[] = { - { - .name = "System", - .offset = 0x00000000, - .size = 0x01e00000, - }, { - .name = "Data", - .offset = 0x01e00000, - .size = 0x05f20000 - }, { - .name = "RedBoot", - .offset = 0x07d20000, - .size = 0x002e0000, - .mask_flags = MTD_WRITEABLE, /* force RO */ - }, -}; - -static void __init bk3_init_machine(void) -{ - ep93xx_init_devices(); - - ts72xx_register_flash(bk3_nand_parts, ARRAY_SIZE(bk3_nand_parts), - EP93XX_CS6_PHYS_BASE); - - ep93xx_register_eth(&ts72xx_eth_data, 1); - - gpiod_add_lookup_table(&bk3_spi_cs_gpio_table); - ep93xx_register_spi(&bk3_spi_master, bk3_spi_board_info, - ARRAY_SIZE(bk3_spi_board_info)); - - /* Configure ep93xx's I2S to use AC97 pins */ - ep93xx_devcfg_set_bits(EP93XX_SYSCON_DEVCFG_I2SONAC97); - platform_device_register(&ep93xx_i2s_device); -} - -MACHINE_START(BK3, "Liebherr controller BK3.1") - /* Maintainer: Lukasz Majewski */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ts72xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = bk3_init_machine, - .restart = ep93xx_restart, -MACHINE_END diff --git a/arch/arm/mach-ep93xx/ts72xx.h b/arch/arm/mach-ep93xx/ts72xx.h deleted file mode 100644 index 00b4941d29c9..000000000000 --- a/arch/arm/mach-ep93xx/ts72xx.h +++ /dev/null @@ -1,94 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/arm/mach-ep93xx/include/mach/ts72xx.h - */ - -/* - * TS72xx memory map: - * - * virt phys size - * febff000 22000000 4K model number register (bits 0-2) - * febfe000 22400000 4K options register - * febfd000 22800000 4K options register #2 - * febfc000 23400000 4K CPLD version register - */ - -#ifndef __TS72XX_H_ -#define __TS72XX_H_ - -#define TS72XX_MODEL_PHYS_BASE 0x22000000 -#define TS72XX_MODEL_VIRT_BASE IOMEM(0xfebff000) -#define TS72XX_MODEL_SIZE 0x00001000 - -#define TS72XX_MODEL_TS7200 0x00 -#define TS72XX_MODEL_TS7250 0x01 -#define TS72XX_MODEL_TS7260 0x02 -#define TS72XX_MODEL_TS7300 0x03 -#define TS72XX_MODEL_TS7400 0x04 -#define TS72XX_MODEL_MASK 0x07 - - -#define TS72XX_OPTIONS_PHYS_BASE 0x22400000 -#define TS72XX_OPTIONS_VIRT_BASE IOMEM(0xfebfe000) -#define TS72XX_OPTIONS_SIZE 0x00001000 - -#define TS72XX_OPTIONS_COM2_RS485 0x02 -#define TS72XX_OPTIONS_MAX197 0x01 - - -#define TS72XX_OPTIONS2_PHYS_BASE 0x22800000 -#define TS72XX_OPTIONS2_VIRT_BASE IOMEM(0xfebfd000) -#define TS72XX_OPTIONS2_SIZE 0x00001000 - -#define TS72XX_OPTIONS2_TS9420 0x04 -#define TS72XX_OPTIONS2_TS9420_BOOT 0x02 - -#define TS72XX_CPLDVER_PHYS_BASE 0x23400000 -#define TS72XX_CPLDVER_VIRT_BASE IOMEM(0xfebfc000) -#define TS72XX_CPLDVER_SIZE 0x00001000 - -#ifndef __ASSEMBLY__ - -static inline int ts72xx_model(void) -{ - return __raw_readb(TS72XX_MODEL_VIRT_BASE) & TS72XX_MODEL_MASK; -} - -static inline int board_is_ts7200(void) -{ - return ts72xx_model() == TS72XX_MODEL_TS7200; -} - -static inline int board_is_ts7250(void) -{ - return ts72xx_model() == TS72XX_MODEL_TS7250; -} - -static inline int board_is_ts7260(void) -{ - return ts72xx_model() == TS72XX_MODEL_TS7260; -} - -static inline int board_is_ts7300(void) -{ - return ts72xx_model() == TS72XX_MODEL_TS7300; -} - -static inline int board_is_ts7400(void) -{ - return ts72xx_model() == TS72XX_MODEL_TS7400; -} - -static inline int is_max197_installed(void) -{ - return !!(__raw_readb(TS72XX_OPTIONS_VIRT_BASE) & - TS72XX_OPTIONS_MAX197); -} - -static inline int is_ts9420_installed(void) -{ - return !!(__raw_readb(TS72XX_OPTIONS2_VIRT_BASE) & - TS72XX_OPTIONS2_TS9420); -} -#endif -#endif /* __TS72XX_H_ */ diff --git a/arch/arm/mach-ep93xx/vision_ep9307.c b/arch/arm/mach-ep93xx/vision_ep9307.c deleted file mode 100644 index b3087b8eed3f..000000000000 --- a/arch/arm/mach-ep93xx/vision_ep9307.c +++ /dev/null @@ -1,321 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/arm/mach-ep93xx/vision_ep9307.c - * Vision Engraving Systems EP9307 SoM support. - * - * Copyright (C) 2008-2011 Vision Engraving Systems - * H Hartley Sweeten - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "hardware.h" -#include -#include -#include "gpio-ep93xx.h" - -#include -#include -#include - -#include "soc.h" - -/************************************************************************* - * Static I/O mappings for the FPGA - *************************************************************************/ -#define VISION_PHYS_BASE EP93XX_CS7_PHYS_BASE -#define VISION_VIRT_BASE 0xfebff000 - -static struct map_desc vision_io_desc[] __initdata = { - { - .virtual = VISION_VIRT_BASE, - .pfn = __phys_to_pfn(VISION_PHYS_BASE), - .length = SZ_4K, - .type = MT_DEVICE, - }, -}; - -static void __init vision_map_io(void) -{ - ep93xx_map_io(); - - iotable_init(vision_io_desc, ARRAY_SIZE(vision_io_desc)); -} - -/************************************************************************* - * Ethernet - *************************************************************************/ -static struct ep93xx_eth_data vision_eth_data __initdata = { - .phy_id = 1, -}; - -/************************************************************************* - * Framebuffer - *************************************************************************/ -#define VISION_LCD_ENABLE EP93XX_GPIO_LINE_EGPIO1 - -static int vision_lcd_setup(struct platform_device *pdev) -{ - int err; - - err = gpio_request_one(VISION_LCD_ENABLE, GPIOF_INIT_HIGH, - dev_name(&pdev->dev)); - if (err) - return err; - - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_RAS | - EP93XX_SYSCON_DEVCFG_RASONP3 | - EP93XX_SYSCON_DEVCFG_EXVC); - - return 0; -} - -static void vision_lcd_teardown(struct platform_device *pdev) -{ - gpio_free(VISION_LCD_ENABLE); -} - -static void vision_lcd_blank(int blank_mode, struct fb_info *info) -{ - if (blank_mode) - gpio_set_value(VISION_LCD_ENABLE, 0); - else - gpio_set_value(VISION_LCD_ENABLE, 1); -} - -static struct ep93xxfb_mach_info ep93xxfb_info __initdata = { - .flags = EP93XXFB_USE_SDCSN0 | EP93XXFB_PCLK_FALLING, - .setup = vision_lcd_setup, - .teardown = vision_lcd_teardown, - .blank = vision_lcd_blank, -}; - - -/************************************************************************* - * GPIO Expanders - *************************************************************************/ -#define PCA9539_74_GPIO_BASE (EP93XX_GPIO_LINE_MAX + 1) -#define PCA9539_75_GPIO_BASE (PCA9539_74_GPIO_BASE + 16) -#define PCA9539_76_GPIO_BASE (PCA9539_75_GPIO_BASE + 16) -#define PCA9539_77_GPIO_BASE (PCA9539_76_GPIO_BASE + 16) - -static struct pca953x_platform_data pca953x_74_gpio_data = { - .gpio_base = PCA9539_74_GPIO_BASE, - .irq_base = EP93XX_BOARD_IRQ(0), -}; - -static struct pca953x_platform_data pca953x_75_gpio_data = { - .gpio_base = PCA9539_75_GPIO_BASE, - .irq_base = -1, -}; - -static struct pca953x_platform_data pca953x_76_gpio_data = { - .gpio_base = PCA9539_76_GPIO_BASE, - .irq_base = -1, -}; - -static struct pca953x_platform_data pca953x_77_gpio_data = { - .gpio_base = PCA9539_77_GPIO_BASE, - .irq_base = -1, -}; - -/************************************************************************* - * I2C Bus - *************************************************************************/ - -static struct i2c_board_info vision_i2c_info[] __initdata = { - { - I2C_BOARD_INFO("isl1208", 0x6f), - .irq = IRQ_EP93XX_EXT1, - }, { - I2C_BOARD_INFO("pca9539", 0x74), - .platform_data = &pca953x_74_gpio_data, - }, { - I2C_BOARD_INFO("pca9539", 0x75), - .platform_data = &pca953x_75_gpio_data, - }, { - I2C_BOARD_INFO("pca9539", 0x76), - .platform_data = &pca953x_76_gpio_data, - }, { - I2C_BOARD_INFO("pca9539", 0x77), - .platform_data = &pca953x_77_gpio_data, - }, -}; - -/************************************************************************* - * SPI CS4271 Audio Codec - *************************************************************************/ -static struct cs4271_platform_data vision_cs4271_data = { - /* Intentionally left blank */ -}; - -/************************************************************************* - * SPI Flash - *************************************************************************/ -static struct mtd_partition vision_spi_flash_partitions[] = { - { - .name = "SPI bootstrap", - .offset = 0, - .size = SZ_4K, - }, { - .name = "Bootstrap config", - .offset = MTDPART_OFS_APPEND, - .size = SZ_4K, - }, { - .name = "System config", - .offset = MTDPART_OFS_APPEND, - .size = MTDPART_SIZ_FULL, - }, -}; - -static struct flash_platform_data vision_spi_flash_data = { - .name = "SPI Flash", - .parts = vision_spi_flash_partitions, - .nr_parts = ARRAY_SIZE(vision_spi_flash_partitions), -}; - -/************************************************************************* - * SPI SD/MMC host - *************************************************************************/ -static struct mmc_spi_platform_data vision_spi_mmc_data = { - .detect_delay = 100, - .powerup_msecs = 100, - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .caps2 = MMC_CAP2_RO_ACTIVE_HIGH, -}; - -static struct gpiod_lookup_table vision_spi_mmc_gpio_table = { - .dev_id = "mmc_spi.2", /* "mmc_spi @ CS2 */ - .table = { - /* Card detect */ - GPIO_LOOKUP_IDX("gpio-ep93xx.1", 7, NULL, 0, GPIO_ACTIVE_LOW), - /* Write protect */ - GPIO_LOOKUP_IDX("gpio-ep93xx.5", 0, NULL, 1, GPIO_ACTIVE_HIGH), - { }, - }, -}; - -/************************************************************************* - * SPI Bus - *************************************************************************/ -static struct spi_board_info vision_spi_board_info[] __initdata = { - { - .modalias = "cs4271", - .platform_data = &vision_cs4271_data, - .max_speed_hz = 6000000, - .bus_num = 0, - .chip_select = 0, - .mode = SPI_MODE_3, - }, { - .modalias = "sst25l", - .platform_data = &vision_spi_flash_data, - .max_speed_hz = 20000000, - .bus_num = 0, - .chip_select = 1, - .mode = SPI_MODE_3, - }, { - .modalias = "mmc_spi", - .platform_data = &vision_spi_mmc_data, - .max_speed_hz = 20000000, - .bus_num = 0, - .chip_select = 2, - .mode = SPI_MODE_3, - }, -}; - -static struct gpiod_lookup_table vision_spi_cs4271_gpio_table = { - .dev_id = "spi0.0", /* cs4271 @ CS0 */ - .table = { - /* RESET */ - GPIO_LOOKUP_IDX("H", 2, NULL, 0, GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct gpiod_lookup_table vision_spi_cs_gpio_table = { - .dev_id = "spi0", - .table = { - GPIO_LOOKUP_IDX("gpio-ep93xx.0", 6, "cs", 0, GPIO_ACTIVE_LOW), - GPIO_LOOKUP_IDX("gpio-ep93xx.0", 7, "cs", 1, GPIO_ACTIVE_LOW), - GPIO_LOOKUP_IDX("gpio-ep93xx.6", 2, "cs", 2, GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct ep93xx_spi_info vision_spi_master __initdata = { - .use_dma = 1, -}; - -/************************************************************************* - * I2S Audio - *************************************************************************/ -static struct platform_device vision_audio_device = { - .name = "edb93xx-audio", - .id = -1, -}; - -static void __init vision_register_i2s(void) -{ - ep93xx_register_i2s(); - platform_device_register(&vision_audio_device); -} - -/************************************************************************* - * Machine Initialization - *************************************************************************/ -static void __init vision_init_machine(void) -{ - ep93xx_init_devices(); - ep93xx_register_flash(2, EP93XX_CS6_PHYS_BASE, SZ_64M); - ep93xx_register_eth(&vision_eth_data, 1); - ep93xx_register_fb(&ep93xxfb_info); - ep93xx_register_pwm(1, 0); - - /* - * Request the gpio expander's interrupt gpio line now to prevent - * the kernel from doing a WARN in gpiolib:gpio_ensure_requested(). - */ - if (gpio_request_one(EP93XX_GPIO_LINE_F(7), GPIOF_DIR_IN, - "pca9539:74")) - pr_warn("cannot request interrupt gpio for pca9539:74\n"); - - vision_i2c_info[1].irq = gpio_to_irq(EP93XX_GPIO_LINE_F(7)); - - ep93xx_register_i2c(vision_i2c_info, - ARRAY_SIZE(vision_i2c_info)); - gpiod_add_lookup_table(&vision_spi_cs4271_gpio_table); - gpiod_add_lookup_table(&vision_spi_mmc_gpio_table); - gpiod_add_lookup_table(&vision_spi_cs_gpio_table); - ep93xx_register_spi(&vision_spi_master, vision_spi_board_info, - ARRAY_SIZE(vision_spi_board_info)); - vision_register_i2s(); -} - -MACHINE_START(VISION_EP9307, "Vision Engraving Systems EP9307") - /* Maintainer: H Hartley Sweeten */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS + EP93XX_BOARD_IRQS, - .map_io = vision_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = vision_init_machine, - .restart = ep93xx_restart, -MACHINE_END diff --git a/include/linux/platform_data/spi-ep93xx.h b/include/linux/platform_data/spi-ep93xx.h deleted file mode 100644 index b439f2a896e0..000000000000 --- a/include/linux/platform_data/spi-ep93xx.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MACH_EP93XX_SPI_H -#define __ASM_MACH_EP93XX_SPI_H - -struct spi_device; - -/** - * struct ep93xx_spi_info - EP93xx specific SPI descriptor - * @use_dma: use DMA for the transfers - */ -struct ep93xx_spi_info { - bool use_dma; -}; - -#endif /* __ASM_MACH_EP93XX_SPI_H */ From 43528a72526152f17a918973b3b8b6f383b90f98 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:11:01 +0300 Subject: [PATCH 245/655] ARM: ep93xx: soc: drop defines Remove unnecessary defines, as we dropped board files. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- include/linux/platform_data/eth-ep93xx.h | 10 ------- include/linux/platform_data/keypad-ep93xx.h | 32 --------------------- include/linux/soc/cirrus/ep93xx.h | 13 --------- 3 files changed, 55 deletions(-) delete mode 100644 include/linux/platform_data/eth-ep93xx.h delete mode 100644 include/linux/platform_data/keypad-ep93xx.h diff --git a/include/linux/platform_data/eth-ep93xx.h b/include/linux/platform_data/eth-ep93xx.h deleted file mode 100644 index 8eef637a804d..000000000000 --- a/include/linux/platform_data/eth-ep93xx.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_PLATFORM_DATA_ETH_EP93XX -#define _LINUX_PLATFORM_DATA_ETH_EP93XX - -struct ep93xx_eth_data { - unsigned char dev_addr[6]; - unsigned char phy_id; -}; - -#endif diff --git a/include/linux/platform_data/keypad-ep93xx.h b/include/linux/platform_data/keypad-ep93xx.h deleted file mode 100644 index 3054fced8509..000000000000 --- a/include/linux/platform_data/keypad-ep93xx.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __KEYPAD_EP93XX_H -#define __KEYPAD_EP93XX_H - -struct matrix_keymap_data; - -/* flags for the ep93xx_keypad driver */ -#define EP93XX_KEYPAD_DISABLE_3_KEY (1<<0) /* disable 3-key reset */ -#define EP93XX_KEYPAD_DIAG_MODE (1<<1) /* diagnostic mode */ -#define EP93XX_KEYPAD_BACK_DRIVE (1<<2) /* back driving mode */ -#define EP93XX_KEYPAD_TEST_MODE (1<<3) /* scan only column 0 */ -#define EP93XX_KEYPAD_AUTOREPEAT (1<<4) /* enable key autorepeat */ - -/** - * struct ep93xx_keypad_platform_data - platform specific device structure - * @keymap_data: pointer to &matrix_keymap_data - * @debounce: debounce start count; terminal count is 0xff - * @prescale: row/column counter pre-scaler load value - * @flags: see above - */ -struct ep93xx_keypad_platform_data { - struct matrix_keymap_data *keymap_data; - unsigned int debounce; - unsigned int prescale; - unsigned int flags; - unsigned int clk_rate; -}; - -#define EP93XX_MATRIX_ROWS (8) -#define EP93XX_MATRIX_COLS (8) - -#endif /* __KEYPAD_EP93XX_H */ diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h index 142c33a2d7db..3e6cf2b25a97 100644 --- a/include/linux/soc/cirrus/ep93xx.h +++ b/include/linux/soc/cirrus/ep93xx.h @@ -2,7 +2,6 @@ #ifndef _SOC_EP93XX_H #define _SOC_EP93XX_H -struct platform_device; struct regmap; struct spinlock_t; @@ -36,16 +35,4 @@ struct ep93xx_regmap_adev { #define to_ep93xx_regmap_adev(_adev) \ container_of((_adev), struct ep93xx_regmap_adev, adev) -#ifdef CONFIG_ARCH_EP93XX -int ep93xx_i2s_acquire(void); -void ep93xx_i2s_release(void); -unsigned int ep93xx_chip_revision(void); - -#else -static inline int ep93xx_i2s_acquire(void) { return 0; } -static inline void ep93xx_i2s_release(void) {} -static inline unsigned int ep93xx_chip_revision(void) { return 0; } - -#endif - #endif From 29ed9cec87253eb3dcc597a681961b3cc9f52f40 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 9 Sep 2024 11:11:02 +0300 Subject: [PATCH 246/655] ASoC: cirrus: edb93xx: Delete driver Can be replaced with "simple-audio-card" for the rates up to 50kHz, refer to commit "ARM: dts: ep93xx: Add EDB9302 DT". Signed-off-by: Alexander Sverdlin Signed-off-by: Nikita Shubin Acked-by: Mark Brown Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- sound/soc/cirrus/Kconfig | 9 --- sound/soc/cirrus/Makefile | 4 -- sound/soc/cirrus/edb93xx.c | 116 ------------------------------------- 3 files changed, 129 deletions(-) delete mode 100644 sound/soc/cirrus/edb93xx.c diff --git a/sound/soc/cirrus/Kconfig b/sound/soc/cirrus/Kconfig index 38a83c4dcc2d..97def4e53fbc 100644 --- a/sound/soc/cirrus/Kconfig +++ b/sound/soc/cirrus/Kconfig @@ -31,12 +31,3 @@ config SND_EP93XX_SOC_I2S_WATCHDOG endif # if SND_EP93XX_SOC_I2S -config SND_EP93XX_SOC_EDB93XX - tristate "SoC Audio support for Cirrus Logic EDB93xx boards" - depends on SND_EP93XX_SOC && (MACH_EDB9301 || MACH_EDB9302 || MACH_EDB9302A || MACH_EDB9307A || MACH_EDB9315A) - select SND_EP93XX_SOC_I2S - select SND_SOC_CS4271_I2C if I2C - select SND_SOC_CS4271_SPI if SPI_MASTER - help - Say Y or M here if you want to add support for I2S audio on the - Cirrus Logic EDB93xx boards. diff --git a/sound/soc/cirrus/Makefile b/sound/soc/cirrus/Makefile index ad606b293715..61d8cf64e859 100644 --- a/sound/soc/cirrus/Makefile +++ b/sound/soc/cirrus/Makefile @@ -6,7 +6,3 @@ snd-soc-ep93xx-i2s-y := ep93xx-i2s.o obj-$(CONFIG_SND_EP93XX_SOC) += snd-soc-ep93xx.o obj-$(CONFIG_SND_EP93XX_SOC_I2S) += snd-soc-ep93xx-i2s.o -# EP93XX Machine Support -snd-soc-edb93xx-y := edb93xx.o - -obj-$(CONFIG_SND_EP93XX_SOC_EDB93XX) += snd-soc-edb93xx.o diff --git a/sound/soc/cirrus/edb93xx.c b/sound/soc/cirrus/edb93xx.c deleted file mode 100644 index 8bb67d7d2b4b..000000000000 --- a/sound/soc/cirrus/edb93xx.c +++ /dev/null @@ -1,116 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * SoC audio for EDB93xx - * - * Copyright (c) 2010 Alexander Sverdlin - * - * This driver support CS4271 codec being master or slave, working - * in control port mode, connected either via SPI or I2C. - * The data format accepted is I2S or left-justified. - * DAPM support not implemented. - */ - -#include -#include -#include -#include -#include -#include -#include - -static int edb93xx_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) -{ - struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); - struct snd_soc_dai *codec_dai = snd_soc_rtd_to_codec(rtd, 0); - struct snd_soc_dai *cpu_dai = snd_soc_rtd_to_cpu(rtd, 0); - int err; - unsigned int mclk_rate; - unsigned int rate = params_rate(params); - - /* - * According to CS4271 datasheet we use MCLK/LRCK=256 for - * rates below 50kHz and 128 for higher sample rates - */ - if (rate < 50000) - mclk_rate = rate * 64 * 4; - else - mclk_rate = rate * 64 * 2; - - err = snd_soc_dai_set_sysclk(codec_dai, 0, mclk_rate, - SND_SOC_CLOCK_IN); - if (err) - return err; - - return snd_soc_dai_set_sysclk(cpu_dai, 0, mclk_rate, - SND_SOC_CLOCK_OUT); -} - -static const struct snd_soc_ops edb93xx_ops = { - .hw_params = edb93xx_hw_params, -}; - -SND_SOC_DAILINK_DEFS(hifi, - DAILINK_COMP_ARRAY(COMP_CPU("ep93xx-i2s")), - DAILINK_COMP_ARRAY(COMP_CODEC("spi0.0", "cs4271-hifi")), - DAILINK_COMP_ARRAY(COMP_PLATFORM("ep93xx-i2s"))); - -static struct snd_soc_dai_link edb93xx_dai = { - .name = "CS4271", - .stream_name = "CS4271 HiFi", - .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBC_CFC, - .ops = &edb93xx_ops, - SND_SOC_DAILINK_REG(hifi), -}; - -static struct snd_soc_card snd_soc_edb93xx = { - .name = "EDB93XX", - .owner = THIS_MODULE, - .dai_link = &edb93xx_dai, - .num_links = 1, -}; - -static int edb93xx_probe(struct platform_device *pdev) -{ - struct snd_soc_card *card = &snd_soc_edb93xx; - int ret; - - ret = ep93xx_i2s_acquire(); - if (ret) - return ret; - - card->dev = &pdev->dev; - - ret = snd_soc_register_card(card); - if (ret) { - dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", - ret); - ep93xx_i2s_release(); - } - - return ret; -} - -static void edb93xx_remove(struct platform_device *pdev) -{ - struct snd_soc_card *card = platform_get_drvdata(pdev); - - snd_soc_unregister_card(card); - ep93xx_i2s_release(); -} - -static struct platform_driver edb93xx_driver = { - .driver = { - .name = "edb93xx-audio", - }, - .probe = edb93xx_probe, - .remove_new = edb93xx_remove, -}; - -module_platform_driver(edb93xx_driver); - -MODULE_AUTHOR("Alexander Sverdlin "); -MODULE_DESCRIPTION("ALSA SoC EDB93xx"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:edb93xx-audio"); From a015b1828653b591de0aa5303c0dbc4235935f94 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:11:03 +0300 Subject: [PATCH 247/655] dmaengine: cirrus: remove platform code Remove DMA platform header, from now on we use device tree for DMA clients. Acked-by: Vinod Koul Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Signed-off-by: Arnd Bergmann --- drivers/dma/ep93xx_dma.c | 48 ++++++++++- include/linux/platform_data/dma-ep93xx.h | 100 ----------------------- 2 files changed, 46 insertions(+), 102 deletions(-) delete mode 100644 include/linux/platform_data/dma-ep93xx.h diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index 17c8e2badee2..43c4241af7f5 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -25,8 +26,6 @@ #include #include -#include - #include "dmaengine.h" /* M2P registers */ @@ -106,6 +105,26 @@ #define DMA_MAX_CHAN_BYTES 0xffff #define DMA_MAX_CHAN_DESCRIPTORS 32 +/* + * M2P channels. + * + * Note that these values are also directly used for setting the PPALLOC + * register. + */ +#define EP93XX_DMA_I2S1 0 +#define EP93XX_DMA_I2S2 1 +#define EP93XX_DMA_AAC1 2 +#define EP93XX_DMA_AAC2 3 +#define EP93XX_DMA_AAC3 4 +#define EP93XX_DMA_I2S3 5 +#define EP93XX_DMA_UART1 6 +#define EP93XX_DMA_UART2 7 +#define EP93XX_DMA_UART3 8 +#define EP93XX_DMA_IRDA 9 +/* M2M channels */ +#define EP93XX_DMA_SSP 10 +#define EP93XX_DMA_IDE 11 + enum ep93xx_dma_type { M2P_DMA, M2M_DMA, @@ -242,6 +261,31 @@ static struct ep93xx_dma_chan *to_ep93xx_dma_chan(struct dma_chan *chan) return container_of(chan, struct ep93xx_dma_chan, chan); } +static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan) +{ + if (device_is_compatible(chan->device->dev, "cirrus,ep9301-dma-m2p")) + return true; + + return !strcmp(dev_name(chan->device->dev), "ep93xx-dma-m2p"); +} + +/* + * ep93xx_dma_chan_direction - returns direction the channel can be used + * + * This function can be used in filter functions to find out whether the + * channel supports given DMA direction. Only M2P channels have such + * limitation, for M2M channels the direction is configurable. + */ +static inline enum dma_transfer_direction +ep93xx_dma_chan_direction(struct dma_chan *chan) +{ + if (!ep93xx_dma_chan_is_m2p(chan)) + return DMA_TRANS_NONE; + + /* even channels are for TX, odd for RX */ + return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; +} + /** * ep93xx_dma_set_active - set new active descriptor chain * @edmac: channel diff --git a/include/linux/platform_data/dma-ep93xx.h b/include/linux/platform_data/dma-ep93xx.h deleted file mode 100644 index 9ec5cdd5a1eb..000000000000 --- a/include/linux/platform_data/dma-ep93xx.h +++ /dev/null @@ -1,100 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARCH_DMA_H -#define __ASM_ARCH_DMA_H - -#include -#include -#include -#include -#include -#include - -/* - * M2P channels. - * - * Note that these values are also directly used for setting the PPALLOC - * register. - */ -#define EP93XX_DMA_I2S1 0 -#define EP93XX_DMA_I2S2 1 -#define EP93XX_DMA_AAC1 2 -#define EP93XX_DMA_AAC2 3 -#define EP93XX_DMA_AAC3 4 -#define EP93XX_DMA_I2S3 5 -#define EP93XX_DMA_UART1 6 -#define EP93XX_DMA_UART2 7 -#define EP93XX_DMA_UART3 8 -#define EP93XX_DMA_IRDA 9 -/* M2M channels */ -#define EP93XX_DMA_SSP 10 -#define EP93XX_DMA_IDE 11 - -/** - * struct ep93xx_dma_data - configuration data for the EP93xx dmaengine - * @port: peripheral which is requesting the channel - * @direction: TX/RX channel - * @name: optional name for the channel, this is displayed in /proc/interrupts - * - * This information is passed as private channel parameter in a filter - * function. Note that this is only needed for slave/cyclic channels. For - * memcpy channels %NULL data should be passed. - */ -struct ep93xx_dma_data { - int port; - enum dma_transfer_direction direction; - const char *name; -}; - -/** - * struct ep93xx_dma_chan_data - platform specific data for a DMA channel - * @name: name of the channel, used for getting the right clock for the channel - * @base: mapped registers - * @irq: interrupt number used by this channel - */ -struct ep93xx_dma_chan_data { - const char *name; - void __iomem *base; - int irq; -}; - -/** - * struct ep93xx_dma_platform_data - platform data for the dmaengine driver - * @channels: array of channels which are passed to the driver - * @num_channels: number of channels in the array - * - * This structure is passed to the DMA engine driver via platform data. For - * M2P channels, contract is that even channels are for TX and odd for RX. - * There is no requirement for the M2M channels. - */ -struct ep93xx_dma_platform_data { - struct ep93xx_dma_chan_data *channels; - size_t num_channels; -}; - -static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan) -{ - if (device_is_compatible(chan->device->dev, "cirrus,ep9301-dma-m2p")) - return true; - - return !strcmp(dev_name(chan->device->dev), "ep93xx-dma-m2p"); -} - -/** - * ep93xx_dma_chan_direction - returns direction the channel can be used - * @chan: channel - * - * This function can be used in filter functions to find out whether the - * channel supports given DMA direction. Only M2P channels have such - * limitation, for M2M channels the direction is configurable. - */ -static inline enum dma_transfer_direction -ep93xx_dma_chan_direction(struct dma_chan *chan) -{ - if (!ep93xx_dma_chan_is_m2p(chan)) - return DMA_TRANS_NONE; - - /* even channels are for TX, odd for RX */ - return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; -} - -#endif /* __ASM_ARCH_DMA_H */ From e2a79105903a9122c2717515454f9c05dd9081e4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Sep 2024 12:04:50 +0000 Subject: [PATCH 248/655] clk: ep93xx: add module license When configured as a lodable module, this driver produces a build time warning: ERROR: modpost: missing MODULE_LICENSE() in drivers/clk/clk-ep93xx.o All all three tags for license, author and description based on the header. Acked-by: Alexander Sverdlin Signed-off-by: Arnd Bergmann --- drivers/clk/clk-ep93xx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/clk-ep93xx.c b/drivers/clk/clk-ep93xx.c index 4727c06a59ba..26317623d9d5 100644 --- a/drivers/clk/clk-ep93xx.c +++ b/drivers/clk/clk-ep93xx.c @@ -844,3 +844,7 @@ static struct auxiliary_driver ep93xx_clk_driver = { .id_table = ep93xx_clk_ids, }; module_auxiliary_driver(ep93xx_clk_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Nikita Shubin "); +MODULE_DESCRIPTION("Clock control for Cirrus EP93xx chips"); From 53cf1dc480a5bdebad457cf6754ed3018b2533ba Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Sep 2024 10:39:15 +0300 Subject: [PATCH 249/655] clk: ep93xx: Fix off by one in ep93xx_div_recalc_rate() The psc->div[] array has psc->num_div elements. These values come from when we call clk_hw_register_div(). It's adc_divisors and ARRAY_SIZE(adc_divisors)) and so on. So this condition needs to be >= instead of > to prevent an out of bounds read. Fixes: 9645ccc7bd7a ("ep93xx: clock: convert in-place to COMMON_CLK") Signed-off-by: Dan Carpenter Acked-by: Alexander Sverdlin Reviewed-by: Nikita Shubin Signed-off-by: Alexander Sverdlin Link: https://lore.kernel.org/r/1caf01ad4c0a8069535813c26c7f0b8ea011155e.camel@linaro.org [arnd: the original patch was for arch/arm/mach-ep93xx/clock.c, but the same bug ended up in arch/arm/mach-ep93xx/clock.c. Signed-off-by: Arnd Bergmann --- drivers/clk/clk-ep93xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-ep93xx.c b/drivers/clk/clk-ep93xx.c index 26317623d9d5..f888aed79b11 100644 --- a/drivers/clk/clk-ep93xx.c +++ b/drivers/clk/clk-ep93xx.c @@ -383,7 +383,7 @@ static unsigned long ep93xx_div_recalc_rate(struct clk_hw *hw, regmap_read(priv->map, clk->reg, &val); index = (val & clk->mask) >> clk->shift; - if (index > clk->num_div) + if (index >= clk->num_div) return 0; return DIV_ROUND_CLOSEST(parent_rate, clk->div[index]); From ba091a81f8237a6db1ccff37c2485791788107dd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 12 Sep 2024 13:19:12 +0000 Subject: [PATCH 250/655] spi: ep93xx: update kerneldoc comments for ep93xx_spi Two fields got removed but are still documented: drivers/spi/spi-ep93xx.c:98: warning: Excess struct member 'dma_rx_data' description in 'ep93xx_spi' drivers/spi/spi-ep93xx.c:98: warning: Excess struct member 'dma_tx_data' description in 'ep93xx_spi' Fixes: 3cfe73256905 ("spi: ep93xx: add DT support for Cirrus EP93xx") Reported-by: kernel test robot Signed-off-by: Arnd Bergmann --- drivers/spi/spi-ep93xx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c index ffbe0d522bce..dc6bdc74643d 100644 --- a/drivers/spi/spi-ep93xx.c +++ b/drivers/spi/spi-ep93xx.c @@ -76,8 +76,6 @@ * frame decreases this level and sending one frame increases it. * @dma_rx: RX DMA channel * @dma_tx: TX DMA channel - * @dma_rx_data: RX parameters passed to the DMA engine - * @dma_tx_data: TX parameters passed to the DMA engine * @rx_sgt: sg table for RX transfers * @tx_sgt: sg table for TX transfers * @zeropage: dummy page used as RX buffer when only TX buffer is passed in by From 8d8081cecfb9940beeb4a8a700db34e615a96056 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 5 Sep 2024 15:35:46 -0700 Subject: [PATCH 251/655] cxl: Move mailbox related bits to the same context Create a new 'struct cxl_mailbox' and move all mailbox related bits to it. This allows isolation of all CXL mailbox data in order to export some of the calls to external kernel callers and avoid exporting of CXL driver specific bits such has device states. The allocation of 'struct cxl_mailbox' is also split out with cxl_mailbox_init() so the mailbox can be created independently. Reviewed-by: Jonathan Cameron Reviewed-by: Alejandro Lucero Reviewed-by: Fan Ni Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20240905223711.1990186-3-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 57 ++++++++++++++++++-------- drivers/cxl/core/memdev.c | 18 +++++---- drivers/cxl/cxlmem.h | 21 +++++----- drivers/cxl/pci.c | 78 ++++++++++++++++++++++++------------ drivers/cxl/pmem.c | 4 +- include/cxl/mailbox.h | 28 +++++++++++++ tools/testing/cxl/test/mem.c | 44 +++++++++++++++----- 7 files changed, 177 insertions(+), 73 deletions(-) create mode 100644 include/cxl/mailbox.h diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 0a913b30c7fc..3d7ee6a12915 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -244,16 +244,17 @@ static const char *cxl_mem_opcode_to_name(u16 opcode) int cxl_internal_send_cmd(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *mbox_cmd) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; size_t out_size, min_out; int rc; - if (mbox_cmd->size_in > mds->payload_size || - mbox_cmd->size_out > mds->payload_size) + if (mbox_cmd->size_in > cxl_mbox->payload_size || + mbox_cmd->size_out > cxl_mbox->payload_size) return -E2BIG; out_size = mbox_cmd->size_out; min_out = mbox_cmd->min_out; - rc = mds->mbox_send(mds, mbox_cmd); + rc = cxl_mbox->mbox_send(cxl_mbox, mbox_cmd); /* * EIO is reserved for a payload size mismatch and mbox_send() * may not return this error. @@ -353,6 +354,7 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, struct cxl_memdev_state *mds, u16 opcode, size_t in_size, size_t out_size, u64 in_payload) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; *mbox = (struct cxl_mbox_cmd) { .opcode = opcode, .size_in = in_size, @@ -374,7 +376,7 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, /* Prepare to handle a full payload for variable sized output */ if (out_size == CXL_VARIABLE_PAYLOAD) - mbox->size_out = mds->payload_size; + mbox->size_out = cxl_mbox->payload_size; else mbox->size_out = out_size; @@ -398,6 +400,8 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, const struct cxl_send_command *send_cmd, struct cxl_memdev_state *mds) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + if (send_cmd->raw.rsvd) return -EINVAL; @@ -406,7 +410,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, * gets passed along without further checking, so it must be * validated here. */ - if (send_cmd->out.size > mds->payload_size) + if (send_cmd->out.size > cxl_mbox->payload_size) return -EINVAL; if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) @@ -494,6 +498,7 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, struct cxl_memdev_state *mds, const struct cxl_send_command *send_cmd) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mem_command mem_cmd; int rc; @@ -505,7 +510,7 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, * supports, but output can be arbitrarily large (simply write out as * much data as the hardware provides). */ - if (send_cmd->in.size > mds->payload_size) + if (send_cmd->in.size > cxl_mbox->payload_size) return -EINVAL; /* Sanitize and construct a cxl_mem_command */ @@ -591,6 +596,7 @@ static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds, u64 out_payload, s32 *size_out, u32 *retval) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct device *dev = mds->cxlds.dev; int rc; @@ -601,7 +607,7 @@ static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds, cxl_mem_opcode_to_name(mbox_cmd->opcode), mbox_cmd->opcode, mbox_cmd->size_in); - rc = mds->mbox_send(mds, mbox_cmd); + rc = cxl_mbox->mbox_send(cxl_mbox, mbox_cmd); if (rc) goto out; @@ -659,11 +665,12 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid, u32 *size, u8 *out) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; u32 remaining = *size; u32 offset = 0; while (remaining) { - u32 xfer_size = min_t(u32, remaining, mds->payload_size); + u32 xfer_size = min_t(u32, remaining, cxl_mbox->payload_size); struct cxl_mbox_cmd mbox_cmd; struct cxl_mbox_get_log log; int rc; @@ -752,17 +759,18 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel) static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state *mds) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_supported_logs *ret; struct cxl_mbox_cmd mbox_cmd; int rc; - ret = kvmalloc(mds->payload_size, GFP_KERNEL); + ret = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL); if (!ret) return ERR_PTR(-ENOMEM); mbox_cmd = (struct cxl_mbox_cmd) { .opcode = CXL_MBOX_OP_GET_SUPPORTED_LOGS, - .size_out = mds->payload_size, + .size_out = cxl_mbox->payload_size, .payload_out = ret, /* At least the record number field must be valid */ .min_out = 2, @@ -910,6 +918,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds, enum cxl_event_log_type log, struct cxl_get_event_payload *get_pl) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_clear_event_payload *payload; u16 total = le16_to_cpu(get_pl->record_count); u8 max_handles = CXL_CLEAR_EVENT_MAX_HANDLES; @@ -920,8 +929,8 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds, int i; /* Payload size may limit the max handles */ - if (pl_size > mds->payload_size) { - max_handles = (mds->payload_size - sizeof(*payload)) / + if (pl_size > cxl_mbox->payload_size) { + max_handles = (cxl_mbox->payload_size - sizeof(*payload)) / sizeof(__le16); pl_size = struct_size(payload, handles, max_handles); } @@ -979,6 +988,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds, static void cxl_mem_get_records_log(struct cxl_memdev_state *mds, enum cxl_event_log_type type) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_memdev *cxlmd = mds->cxlds.cxlmd; struct device *dev = mds->cxlds.dev; struct cxl_get_event_payload *payload; @@ -995,7 +1005,7 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds, .payload_in = &log_type, .size_in = sizeof(log_type), .payload_out = payload, - .size_out = mds->payload_size, + .size_out = cxl_mbox->payload_size, .min_out = struct_size(payload, records, 0), }; @@ -1327,6 +1337,7 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, struct cxl_region *cxlr) { struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_poison_out *po; struct cxl_mbox_poison_in pi; int nr_records = 0; @@ -1345,7 +1356,7 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, .opcode = CXL_MBOX_OP_GET_POISON, .size_in = sizeof(pi), .payload_in = &pi, - .size_out = mds->payload_size, + .size_out = cxl_mbox->payload_size, .payload_out = po, .min_out = struct_size(po, record, 0), }; @@ -1381,7 +1392,9 @@ static void free_poison_buf(void *buf) /* Get Poison List output buffer is protected by mds->poison.lock */ static int cxl_poison_alloc_buf(struct cxl_memdev_state *mds) { - mds->poison.list_out = kvmalloc(mds->payload_size, GFP_KERNEL); + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + + mds->poison.list_out = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL); if (!mds->poison.list_out) return -ENOMEM; @@ -1407,6 +1420,19 @@ int cxl_poison_state_init(struct cxl_memdev_state *mds) } EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL); +int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host) +{ + if (!cxl_mbox || !host) + return -EINVAL; + + cxl_mbox->host = host; + mutex_init(&cxl_mbox->mbox_mutex); + rcuwait_init(&cxl_mbox->mbox_wait); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, CXL); + struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) { struct cxl_memdev_state *mds; @@ -1417,7 +1443,6 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) return ERR_PTR(-ENOMEM); } - mutex_init(&mds->mbox_mutex); mutex_init(&mds->event.log_lock); mds->cxlds.dev = dev; mds->cxlds.reg_map.host = dev; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 0277726afd04..05bb84cb1274 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -58,7 +58,7 @@ static ssize_t payload_max_show(struct device *dev, if (!mds) return sysfs_emit(buf, "\n"); - return sysfs_emit(buf, "%zu\n", mds->payload_size); + return sysfs_emit(buf, "%zu\n", cxlds->cxl_mbox.payload_size); } static DEVICE_ATTR_RO(payload_max); @@ -124,15 +124,16 @@ static ssize_t security_state_show(struct device *dev, { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); unsigned long state = mds->security.state; int rc = 0; /* sync with latest submission state */ - mutex_lock(&mds->mbox_mutex); + mutex_lock(&cxl_mbox->mbox_mutex); if (mds->security.sanitize_active) rc = sysfs_emit(buf, "sanitize\n"); - mutex_unlock(&mds->mbox_mutex); + mutex_unlock(&cxl_mbox->mbox_mutex); if (rc) return rc; @@ -829,12 +830,13 @@ static enum fw_upload_err cxl_fw_prepare(struct fw_upload *fwl, const u8 *data, { struct cxl_memdev_state *mds = fwl->dd_handle; struct cxl_mbox_transfer_fw *transfer; + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; if (!size) return FW_UPLOAD_ERR_INVALID_SIZE; mds->fw.oneshot = struct_size(transfer, data, size) < - mds->payload_size; + cxl_mbox->payload_size; if (cxl_mem_get_fw_info(mds)) return FW_UPLOAD_ERR_HW_ERROR; @@ -854,6 +856,7 @@ static enum fw_upload_err cxl_fw_write(struct fw_upload *fwl, const u8 *data, { struct cxl_memdev_state *mds = fwl->dd_handle; struct cxl_dev_state *cxlds = &mds->cxlds; + struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; struct cxl_memdev *cxlmd = cxlds->cxlmd; struct cxl_mbox_transfer_fw *transfer; struct cxl_mbox_cmd mbox_cmd; @@ -877,7 +880,7 @@ static enum fw_upload_err cxl_fw_write(struct fw_upload *fwl, const u8 *data, * sizeof(*transfer) is 128. These constraints imply that @cur_size * will always be 128b aligned. */ - cur_size = min_t(size_t, size, mds->payload_size - sizeof(*transfer)); + cur_size = min_t(size_t, size, cxl_mbox->payload_size - sizeof(*transfer)); remaining = size - cur_size; size_in = struct_size(transfer, data, cur_size); @@ -1059,16 +1062,17 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL); static void sanitize_teardown_notifier(void *data) { struct cxl_memdev_state *mds = data; + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct kernfs_node *state; /* * Prevent new irq triggered invocations of the workqueue and * flush inflight invocations. */ - mutex_lock(&mds->mbox_mutex); + mutex_lock(&cxl_mbox->mbox_mutex); state = mds->security.sanitize_node; mds->security.sanitize_node = NULL; - mutex_unlock(&mds->mbox_mutex); + mutex_unlock(&cxl_mbox->mbox_mutex); cancel_delayed_work_sync(&mds->security.poll_dwork); sysfs_put(state); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index a81a8982bf93..4ead415f8971 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -3,11 +3,12 @@ #ifndef __CXL_MEM_H__ #define __CXL_MEM_H__ #include +#include #include #include -#include #include #include +#include #include "cxl.h" /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */ @@ -424,6 +425,7 @@ struct cxl_dpa_perf { * @ram_res: Active Volatile memory capacity configuration * @serial: PCIe Device Serial Number * @type: Generic Memory Class device or Vendor Specific Memory device + * @cxl_mbox: CXL mailbox context */ struct cxl_dev_state { struct device *dev; @@ -438,8 +440,14 @@ struct cxl_dev_state { struct resource ram_res; u64 serial; enum cxl_devtype type; + struct cxl_mailbox cxl_mbox; }; +static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) +{ + return dev_get_drvdata(cxl_mbox->host); +} + /** * struct cxl_memdev_state - Generic Type-3 Memory Device Class driver data * @@ -448,11 +456,8 @@ struct cxl_dev_state { * the functionality related to that like Identify Memory Device and Get * Partition Info * @cxlds: Core driver state common across Type-2 and Type-3 devices - * @payload_size: Size of space for payload - * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register) * @lsa_size: Size of Label Storage Area * (CXL 2.0 8.2.9.5.1.1 Identify Memory Device) - * @mbox_mutex: Mutex to synchronize mailbox access. * @firmware_version: Firmware version for the memory device. * @enabled_cmds: Hardware commands found enabled in CEL. * @exclusive_cmds: Commands that are kernel-internal only @@ -470,17 +475,13 @@ struct cxl_dev_state { * @poison: poison driver state info * @security: security driver state info * @fw: firmware upload / activation state - * @mbox_wait: RCU wait for mbox send completely - * @mbox_send: @dev specific transport for transmitting mailbox commands * * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for * details on capacity parameters. */ struct cxl_memdev_state { struct cxl_dev_state cxlds; - size_t payload_size; size_t lsa_size; - struct mutex mbox_mutex; /* Protects device mailbox and firmware */ char firmware_version[0x10]; DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX); DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); @@ -500,10 +501,6 @@ struct cxl_memdev_state { struct cxl_poison_state poison; struct cxl_security_state security; struct cxl_fw_state fw; - - struct rcuwait mbox_wait; - int (*mbox_send)(struct cxl_memdev_state *mds, - struct cxl_mbox_cmd *cmd); }; static inline struct cxl_memdev_state * diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 26e75499abdd..2fc7fd252c2b 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "cxlmem.h" #include "cxlpci.h" #include "cxl.h" @@ -124,6 +125,7 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) u16 opcode; struct cxl_dev_id *dev_id = id; struct cxl_dev_state *cxlds = dev_id->cxlds; + struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); if (!cxl_mbox_background_complete(cxlds)) @@ -132,13 +134,13 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); if (opcode == CXL_MBOX_OP_SANITIZE) { - mutex_lock(&mds->mbox_mutex); + mutex_lock(&cxl_mbox->mbox_mutex); if (mds->security.sanitize_node) mod_delayed_work(system_wq, &mds->security.poll_dwork, 0); - mutex_unlock(&mds->mbox_mutex); + mutex_unlock(&cxl_mbox->mbox_mutex); } else { /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ - rcuwait_wake_up(&mds->mbox_wait); + rcuwait_wake_up(&cxl_mbox->mbox_wait); } return IRQ_HANDLED; @@ -152,8 +154,9 @@ static void cxl_mbox_sanitize_work(struct work_struct *work) struct cxl_memdev_state *mds = container_of(work, typeof(*mds), security.poll_dwork.work); struct cxl_dev_state *cxlds = &mds->cxlds; + struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; - mutex_lock(&mds->mbox_mutex); + mutex_lock(&cxl_mbox->mbox_mutex); if (cxl_mbox_background_complete(cxlds)) { mds->security.poll_tmo_secs = 0; if (mds->security.sanitize_node) @@ -167,12 +170,12 @@ static void cxl_mbox_sanitize_work(struct work_struct *work) mds->security.poll_tmo_secs = min(15 * 60, timeout); schedule_delayed_work(&mds->security.poll_dwork, timeout * HZ); } - mutex_unlock(&mds->mbox_mutex); + mutex_unlock(&cxl_mbox->mbox_mutex); } /** * __cxl_pci_mbox_send_cmd() - Execute a mailbox command - * @mds: The memory device driver data + * @cxl_mbox: CXL mailbox context * @mbox_cmd: Command to send to the memory device. * * Context: Any context. Expects mbox_mutex to be held. @@ -192,17 +195,18 @@ static void cxl_mbox_sanitize_work(struct work_struct *work) * not need to coordinate with each other. The driver only uses the primary * mailbox. */ -static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds, +static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *mbox_cmd) { - struct cxl_dev_state *cxlds = &mds->cxlds; + struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox); + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; struct device *dev = cxlds->dev; u64 cmd_reg, status_reg; size_t out_len; int rc; - lockdep_assert_held(&mds->mbox_mutex); + lockdep_assert_held(&cxl_mbox->mbox_mutex); /* * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. @@ -315,10 +319,10 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds, timeout = mbox_cmd->poll_interval_ms; for (i = 0; i < mbox_cmd->poll_count; i++) { - if (rcuwait_wait_event_timeout(&mds->mbox_wait, - cxl_mbox_background_complete(cxlds), - TASK_UNINTERRUPTIBLE, - msecs_to_jiffies(timeout)) > 0) + if (rcuwait_wait_event_timeout(&cxl_mbox->mbox_wait, + cxl_mbox_background_complete(cxlds), + TASK_UNINTERRUPTIBLE, + msecs_to_jiffies(timeout)) > 0) break; } @@ -360,7 +364,7 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds, */ size_t n; - n = min3(mbox_cmd->size_out, mds->payload_size, out_len); + n = min3(mbox_cmd->size_out, cxl_mbox->payload_size, out_len); memcpy_fromio(mbox_cmd->payload_out, payload, n); mbox_cmd->size_out = n; } else { @@ -370,14 +374,14 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds, return 0; } -static int cxl_pci_mbox_send(struct cxl_memdev_state *mds, +static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd) { int rc; - mutex_lock_io(&mds->mbox_mutex); - rc = __cxl_pci_mbox_send_cmd(mds, cmd); - mutex_unlock(&mds->mbox_mutex); + mutex_lock_io(&cxl_mbox->mbox_mutex); + rc = __cxl_pci_mbox_send_cmd(cxl_mbox, cmd); + mutex_unlock(&cxl_mbox->mbox_mutex); return rc; } @@ -385,6 +389,7 @@ static int cxl_pci_mbox_send(struct cxl_memdev_state *mds, static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) { struct cxl_dev_state *cxlds = &mds->cxlds; + struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); struct device *dev = cxlds->dev; unsigned long timeout; @@ -417,8 +422,8 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) return -ETIMEDOUT; } - mds->mbox_send = cxl_pci_mbox_send; - mds->payload_size = + cxl_mbox->mbox_send = cxl_pci_mbox_send; + cxl_mbox->payload_size = 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); /* @@ -428,16 +433,15 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) * there's no point in going forward. If the size is too large, there's * no harm is soft limiting it. */ - mds->payload_size = min_t(size_t, mds->payload_size, SZ_1M); - if (mds->payload_size < 256) { + cxl_mbox->payload_size = min_t(size_t, cxl_mbox->payload_size, SZ_1M); + if (cxl_mbox->payload_size < 256) { dev_err(dev, "Mailbox is too small (%zub)", - mds->payload_size); + cxl_mbox->payload_size); return -ENXIO; } - dev_dbg(dev, "Mailbox payload sized %zu", mds->payload_size); + dev_dbg(dev, "Mailbox payload sized %zu", cxl_mbox->payload_size); - rcuwait_init(&mds->mbox_wait); INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); /* background command interrupts are optional */ @@ -575,9 +579,10 @@ static void free_event_buf(void *buf) */ static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_get_event_payload *buf; - buf = kvmalloc(mds->payload_size, GFP_KERNEL); + buf = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL); if (!buf) return -ENOMEM; mds->event.buf = buf; @@ -783,6 +788,23 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, return 0; } +static int cxl_pci_type3_init_mailbox(struct cxl_dev_state *cxlds) +{ + int rc; + + /* + * Fail the init if there's no mailbox. For a type3 this is out of spec. + */ + if (!cxlds->reg_map.device_map.mbox.valid) + return -ENODEV; + + rc = cxl_mailbox_init(&cxlds->cxl_mbox, cxlds->dev); + if (rc) + return rc; + + return 0; +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); @@ -843,6 +865,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); + rc = cxl_pci_type3_init_mailbox(cxlds); + if (rc) + return rc; + rc = cxl_await_media_ready(cxlds); if (rc == 0) cxlds->media_ready = true; diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 647c7e25ef3a..50c6d25ede89 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -102,13 +102,15 @@ static int cxl_pmem_get_config_size(struct cxl_memdev_state *mds, struct nd_cmd_get_config_size *cmd, unsigned int buf_len) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + if (sizeof(*cmd) > buf_len) return -EINVAL; *cmd = (struct nd_cmd_get_config_size){ .config_size = mds->lsa_size, .max_xfer = - mds->payload_size - sizeof(struct cxl_mbox_set_lsa), + cxl_mbox->payload_size - sizeof(struct cxl_mbox_set_lsa), }; return 0; diff --git a/include/cxl/mailbox.h b/include/cxl/mailbox.h new file mode 100644 index 000000000000..bacd111e75f1 --- /dev/null +++ b/include/cxl/mailbox.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation. */ +#ifndef __CXL_MBOX_H__ +#define __CXL_MBOX_H__ +#include + +struct cxl_mbox_cmd; + +/** + * struct cxl_mailbox - context for CXL mailbox operations + * @host: device that hosts the mailbox + * @payload_size: Size of space for payload + * (CXL 3.1 8.2.8.4.3 Mailbox Capabilities Register) + * @mbox_mutex: mutex protects device mailbox and firmware + * @mbox_wait: rcuwait for mailbox + * @mbox_send: @dev specific transport for transmitting mailbox commands + */ +struct cxl_mailbox { + struct device *host; + size_t payload_size; + struct mutex mbox_mutex; /* lock to protect mailbox context */ + struct rcuwait mbox_wait; + int (*mbox_send)(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd); +}; + +int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host); + +#endif diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 129f179b0ac5..ccdd6a504222 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -534,6 +535,7 @@ static int mock_gsl(struct cxl_mbox_cmd *cmd) static int mock_get_log(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *cmd) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_log *gl = cmd->payload_in; u32 offset = le32_to_cpu(gl->offset); u32 length = le32_to_cpu(gl->length); @@ -542,7 +544,7 @@ static int mock_get_log(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *cmd) if (cmd->size_in < sizeof(*gl)) return -EINVAL; - if (length > mds->payload_size) + if (length > cxl_mbox->payload_size) return -EINVAL; if (offset + length > sizeof(mock_cel)) return -EINVAL; @@ -617,12 +619,13 @@ void cxl_mockmem_sanitize_work(struct work_struct *work) { struct cxl_memdev_state *mds = container_of(work, typeof(*mds), security.poll_dwork.work); + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; - mutex_lock(&mds->mbox_mutex); + mutex_lock(&cxl_mbox->mbox_mutex); if (mds->security.sanitize_node) sysfs_notify_dirent(mds->security.sanitize_node); mds->security.sanitize_active = false; - mutex_unlock(&mds->mbox_mutex); + mutex_unlock(&cxl_mbox->mbox_mutex); dev_dbg(mds->cxlds.dev, "sanitize complete\n"); } @@ -631,6 +634,7 @@ static int mock_sanitize(struct cxl_mockmem_data *mdata, struct cxl_mbox_cmd *cmd) { struct cxl_memdev_state *mds = mdata->mds; + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; int rc = 0; if (cmd->size_in != 0) @@ -648,14 +652,14 @@ static int mock_sanitize(struct cxl_mockmem_data *mdata, return -ENXIO; } - mutex_lock(&mds->mbox_mutex); + mutex_lock(&cxl_mbox->mbox_mutex); if (schedule_delayed_work(&mds->security.poll_dwork, msecs_to_jiffies(mdata->sanitize_timeout))) { mds->security.sanitize_active = true; dev_dbg(mds->cxlds.dev, "sanitize issued\n"); } else rc = -EBUSY; - mutex_unlock(&mds->mbox_mutex); + mutex_unlock(&cxl_mbox->mbox_mutex); return rc; } @@ -1333,12 +1337,13 @@ static int mock_activate_fw(struct cxl_mockmem_data *mdata, return -EINVAL; } -static int cxl_mock_mbox_send(struct cxl_memdev_state *mds, +static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd) { - struct cxl_dev_state *cxlds = &mds->cxlds; - struct device *dev = cxlds->dev; + struct device *dev = cxl_mbox->host; struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); + struct cxl_memdev_state *mds = mdata->mds; + struct cxl_dev_state *cxlds = &mds->cxlds; int rc = -EIO; switch (cmd->opcode) { @@ -1453,6 +1458,17 @@ static ssize_t event_trigger_store(struct device *dev, } static DEVICE_ATTR_WO(event_trigger); +static int cxl_mock_mailbox_create(struct cxl_dev_state *cxlds) +{ + int rc; + + rc = cxl_mailbox_init(&cxlds->cxl_mbox, cxlds->dev); + if (rc) + return rc; + + return 0; +} + static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1460,6 +1476,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) struct cxl_memdev_state *mds; struct cxl_dev_state *cxlds; struct cxl_mockmem_data *mdata; + struct cxl_mailbox *cxl_mbox; int rc; mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL); @@ -1487,13 +1504,18 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (IS_ERR(mds)) return PTR_ERR(mds); + cxlds = &mds->cxlds; + rc = cxl_mock_mailbox_create(cxlds); + if (rc) + return rc; + + cxl_mbox = &mds->cxlds.cxl_mbox; mdata->mds = mds; - mds->mbox_send = cxl_mock_mbox_send; - mds->payload_size = SZ_4K; + cxl_mbox->mbox_send = cxl_mock_mbox_send; + cxl_mbox->payload_size = SZ_4K; mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work); - cxlds = &mds->cxlds; cxlds->serial = pdev->id; if (is_rcd(pdev)) cxlds->rcd = true; From b5209da36b19b573cf25fe7e698e3a45b0f40a75 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 5 Sep 2024 15:35:47 -0700 Subject: [PATCH 252/655] cxl: Convert cxl_internal_send_cmd() to use 'struct cxl_mailbox' as input With the CXL mailbox context split out, cxl_internal_send_cmd() can take 'struct cxl_mailbox' as an input parameter rather than 'struct memdev_dev_state'. Change input parameter for cxl_internal_send_cmd() and fixup all impacted call sites. Reviewed-by: Fan Ni Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20240905223711.1990186-4-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 38 ++++++++++++++++++++------------------ drivers/cxl/core/memdev.c | 23 +++++++++++++---------- drivers/cxl/cxlmem.h | 2 +- drivers/cxl/pci.c | 6 ++++-- drivers/cxl/pmem.c | 6 ++++-- drivers/cxl/security.c | 23 ++++++++++++----------- 6 files changed, 54 insertions(+), 44 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 3d7ee6a12915..915a93ced78f 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -225,7 +225,7 @@ static const char *cxl_mem_opcode_to_name(u16 opcode) /** * cxl_internal_send_cmd() - Kernel internal interface to send a mailbox command - * @mds: The driver data for the operation + * @cxl_mbox: CXL mailbox context * @mbox_cmd: initialized command to execute * * Context: Any context. @@ -241,10 +241,9 @@ static const char *cxl_mem_opcode_to_name(u16 opcode) * error. While this distinction can be useful for commands from userspace, the * kernel will only be able to use results when both are successful. */ -int cxl_internal_send_cmd(struct cxl_memdev_state *mds, +int cxl_internal_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *mbox_cmd) { - struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; size_t out_size, min_out; int rc; @@ -689,7 +688,7 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid, .payload_out = out, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); /* * The output payload length that indicates the number @@ -775,7 +774,7 @@ static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state * /* At least the record number field must be valid */ .min_out = 2, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc < 0) { kvfree(ret); return ERR_PTR(rc); @@ -964,7 +963,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds, if (i == max_handles) { payload->nr_recs = i; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc) goto free_pl; i = 0; @@ -975,7 +974,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds, if (i) { payload->nr_recs = i; mbox_cmd.size_in = struct_size(payload, handles, i); - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc) goto free_pl; } @@ -1009,7 +1008,7 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds, .min_out = struct_size(payload, records, 0), }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc) { dev_err_ratelimited(dev, "Event log '%d': Failed to query event records : %d", @@ -1080,6 +1079,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL); */ static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_partition_info pi; struct cxl_mbox_cmd mbox_cmd; int rc; @@ -1089,7 +1089,7 @@ static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) .size_out = sizeof(pi), .payload_out = &pi, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc) return rc; @@ -1116,6 +1116,7 @@ static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) */ int cxl_dev_state_identify(struct cxl_memdev_state *mds) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */ struct cxl_mbox_identify id; struct cxl_mbox_cmd mbox_cmd; @@ -1130,7 +1131,7 @@ int cxl_dev_state_identify(struct cxl_memdev_state *mds) .size_out = sizeof(id), .payload_out = &id, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc < 0) return rc; @@ -1158,6 +1159,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL); static int __cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; int rc; u32 sec_out = 0; struct cxl_get_security_output { @@ -1169,14 +1171,13 @@ static int __cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) .size_out = sizeof(out), }; struct cxl_mbox_cmd mbox_cmd = { .opcode = cmd }; - struct cxl_dev_state *cxlds = &mds->cxlds; if (cmd != CXL_MBOX_OP_SANITIZE && cmd != CXL_MBOX_OP_SECURE_ERASE) return -EINVAL; - rc = cxl_internal_send_cmd(mds, &sec_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &sec_cmd); if (rc < 0) { - dev_err(cxlds->dev, "Failed to get security state : %d", rc); + dev_err(cxl_mbox->host, "Failed to get security state : %d", rc); return rc; } @@ -1193,9 +1194,9 @@ static int __cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) sec_out & CXL_PMEM_SEC_STATE_LOCKED) return -EINVAL; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc < 0) { - dev_err(cxlds->dev, "Failed to sanitize device : %d", rc); + dev_err(cxl_mbox->host, "Failed to sanitize device : %d", rc); return rc; } @@ -1309,6 +1310,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL); int cxl_set_timestamp(struct cxl_memdev_state *mds) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_cmd mbox_cmd; struct cxl_mbox_set_timestamp_in pi; int rc; @@ -1320,7 +1322,7 @@ int cxl_set_timestamp(struct cxl_memdev_state *mds) .payload_in = &pi, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); /* * Command is optional. Devices may have another way of providing * a timestamp, or may return all 0s in timestamp fields. @@ -1337,7 +1339,7 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, struct cxl_region *cxlr) { struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_poison_out *po; struct cxl_mbox_poison_in pi; int nr_records = 0; @@ -1361,7 +1363,7 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, .min_out = struct_size(po, record, 0), }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc) break; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 05bb84cb1274..84fefb76dafa 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -278,7 +278,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) { - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_inject_poison inject; struct cxl_poison_record record; struct cxl_mbox_cmd mbox_cmd; @@ -308,13 +308,13 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) .size_in = sizeof(inject), .payload_in = &inject, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc) goto out; cxlr = cxl_dpa_to_region(cxlmd, dpa); if (cxlr) - dev_warn_once(mds->cxlds.dev, + dev_warn_once(cxl_mbox->host, "poison inject dpa:%#llx region: %s\n", dpa, dev_name(&cxlr->dev)); @@ -333,7 +333,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL); int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) { - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_clear_poison clear; struct cxl_poison_record record; struct cxl_mbox_cmd mbox_cmd; @@ -372,13 +372,13 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) .payload_in = &clear, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc) goto out; cxlr = cxl_dpa_to_region(cxlmd, dpa); if (cxlr) - dev_warn_once(mds->cxlds.dev, + dev_warn_once(cxl_mbox->host, "poison clear dpa:%#llx region: %s\n", dpa, dev_name(&cxlr->dev)); @@ -715,6 +715,7 @@ static int cxl_memdev_release_file(struct inode *inode, struct file *file) */ static int cxl_mem_get_fw_info(struct cxl_memdev_state *mds) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_fw_info info; struct cxl_mbox_cmd mbox_cmd; int rc; @@ -725,7 +726,7 @@ static int cxl_mem_get_fw_info(struct cxl_memdev_state *mds) .payload_out = &info, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc < 0) return rc; @@ -749,6 +750,7 @@ static int cxl_mem_get_fw_info(struct cxl_memdev_state *mds) */ static int cxl_mem_activate_fw(struct cxl_memdev_state *mds, int slot) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_activate_fw activate; struct cxl_mbox_cmd mbox_cmd; @@ -765,7 +767,7 @@ static int cxl_mem_activate_fw(struct cxl_memdev_state *mds, int slot) activate.action = CXL_FW_ACTIVATE_OFFLINE; activate.slot = slot; - return cxl_internal_send_cmd(mds, &mbox_cmd); + return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); } /** @@ -780,6 +782,7 @@ static int cxl_mem_activate_fw(struct cxl_memdev_state *mds, int slot) */ static int cxl_mem_abort_fw_xfer(struct cxl_memdev_state *mds) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_transfer_fw *transfer; struct cxl_mbox_cmd mbox_cmd; int rc; @@ -799,7 +802,7 @@ static int cxl_mem_abort_fw_xfer(struct cxl_memdev_state *mds) transfer->action = CXL_FW_TRANSFER_ACTION_ABORT; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); kfree(transfer); return rc; } @@ -924,7 +927,7 @@ static enum fw_upload_err cxl_fw_write(struct fw_upload *fwl, const u8 *data, .poll_count = 30, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc < 0) { rc = FW_UPLOAD_ERR_RW_ERROR; goto out_free; diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 4ead415f8971..c7c423ae16ab 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -811,7 +811,7 @@ enum { CXL_PMEM_SEC_PASS_USER, }; -int cxl_internal_send_cmd(struct cxl_memdev_state *mds, +int cxl_internal_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd); int cxl_dev_state_identify(struct cxl_memdev_state *mds); int cxl_await_media_ready(struct cxl_dev_state *cxlds); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 2fc7fd252c2b..37164174b5fb 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -655,6 +655,7 @@ static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, struct cxl_event_interrupt_policy *policy) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_cmd mbox_cmd = { .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY, .payload_out = policy, @@ -662,7 +663,7 @@ static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, }; int rc; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc < 0) dev_err(mds->cxlds.dev, "Failed to get event interrupt policy : %d", rc); @@ -673,6 +674,7 @@ static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, static int cxl_event_config_msgnums(struct cxl_memdev_state *mds, struct cxl_event_interrupt_policy *policy) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_cmd mbox_cmd; int rc; @@ -689,7 +691,7 @@ static int cxl_event_config_msgnums(struct cxl_memdev_state *mds, .size_in = sizeof(*policy), }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc < 0) { dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d", rc); diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 50c6d25ede89..a6538a5f5c9f 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -120,6 +120,7 @@ static int cxl_pmem_get_config_data(struct cxl_memdev_state *mds, struct nd_cmd_get_config_data_hdr *cmd, unsigned int buf_len) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_lsa get_lsa; struct cxl_mbox_cmd mbox_cmd; int rc; @@ -141,7 +142,7 @@ static int cxl_pmem_get_config_data(struct cxl_memdev_state *mds, .payload_out = cmd->out_buf, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); cmd->status = 0; return rc; @@ -151,6 +152,7 @@ static int cxl_pmem_set_config_data(struct cxl_memdev_state *mds, struct nd_cmd_set_config_hdr *cmd, unsigned int buf_len) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_set_lsa *set_lsa; struct cxl_mbox_cmd mbox_cmd; int rc; @@ -177,7 +179,7 @@ static int cxl_pmem_set_config_data(struct cxl_memdev_state *mds, .size_in = struct_size(set_lsa, data, cmd->in_length), }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); /* * Set "firmware" status (4-packed bytes at the end of the input diff --git a/drivers/cxl/security.c b/drivers/cxl/security.c index 21856a3f408e..452d1a9b9148 100644 --- a/drivers/cxl/security.c +++ b/drivers/cxl/security.c @@ -14,6 +14,7 @@ static unsigned long cxl_pmem_get_security_flags(struct nvdimm *nvdimm, { struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); unsigned long security_flags = 0; struct cxl_get_security_output { @@ -29,7 +30,7 @@ static unsigned long cxl_pmem_get_security_flags(struct nvdimm *nvdimm, .payload_out = &out, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc < 0) return 0; @@ -70,7 +71,7 @@ static int cxl_pmem_security_change_key(struct nvdimm *nvdimm, { struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_cmd mbox_cmd; struct cxl_set_pass set_pass; @@ -87,7 +88,7 @@ static int cxl_pmem_security_change_key(struct nvdimm *nvdimm, .payload_in = &set_pass, }; - return cxl_internal_send_cmd(mds, &mbox_cmd); + return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); } static int __cxl_pmem_security_disable(struct nvdimm *nvdimm, @@ -96,7 +97,7 @@ static int __cxl_pmem_security_disable(struct nvdimm *nvdimm, { struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_disable_pass dis_pass; struct cxl_mbox_cmd mbox_cmd; @@ -112,7 +113,7 @@ static int __cxl_pmem_security_disable(struct nvdimm *nvdimm, .payload_in = &dis_pass, }; - return cxl_internal_send_cmd(mds, &mbox_cmd); + return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); } static int cxl_pmem_security_disable(struct nvdimm *nvdimm, @@ -131,12 +132,12 @@ static int cxl_pmem_security_freeze(struct nvdimm *nvdimm) { struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_cmd mbox_cmd = { .opcode = CXL_MBOX_OP_FREEZE_SECURITY, }; - return cxl_internal_send_cmd(mds, &mbox_cmd); + return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); } static int cxl_pmem_security_unlock(struct nvdimm *nvdimm, @@ -144,7 +145,7 @@ static int cxl_pmem_security_unlock(struct nvdimm *nvdimm, { struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; u8 pass[NVDIMM_PASSPHRASE_LEN]; struct cxl_mbox_cmd mbox_cmd; int rc; @@ -156,7 +157,7 @@ static int cxl_pmem_security_unlock(struct nvdimm *nvdimm, .payload_in = pass, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc < 0) return rc; @@ -169,7 +170,7 @@ static int cxl_pmem_security_passphrase_erase(struct nvdimm *nvdimm, { struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_cmd mbox_cmd; struct cxl_pass_erase erase; int rc; @@ -185,7 +186,7 @@ static int cxl_pmem_security_passphrase_erase(struct nvdimm *nvdimm, .payload_in = &erase, }; - rc = cxl_internal_send_cmd(mds, &mbox_cmd); + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc < 0) return rc; From ec24b988eb26e21f37707d090ec3ab53c51fd386 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 Jul 2024 12:20:36 +0200 Subject: [PATCH 253/655] um: remove variable stack array in os_rcv_fd_msg() When generalizing this, I was in the mindset of this being "userspace" code, but even there we should not use variable arrays as the kernel is moving away from allowing that. Simply reserve (but not use) enough space for the maximum two descriptors we might need now, and return an error if attempting to receive more than that. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407041459.3SYg4TEi-lkp@intel.com/ Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/os-Linux/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 5adf8f630049..f1d03cf3957f 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -528,7 +528,8 @@ int os_shutdown_socket(int fd, int r, int w) ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds, void *data, size_t data_len) { - char buf[CMSG_SPACE(sizeof(*fds) * n_fds)]; +#define MAX_RCV_FDS 2 + char buf[CMSG_SPACE(sizeof(*fds) * MAX_RCV_FDS)]; struct cmsghdr *cmsg; struct iovec iov = { .iov_base = data, @@ -538,10 +539,13 @@ ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds, .msg_iov = &iov, .msg_iovlen = 1, .msg_control = buf, - .msg_controllen = sizeof(buf), + .msg_controllen = CMSG_SPACE(sizeof(*fds) * n_fds), }; int n; + if (n_fds > MAX_RCV_FDS) + return -EINVAL; + n = recvmsg(fd, &msg, 0); if (n < 0) return -errno; From 612a8c8e0b43ba7e3d0e51f6f76a5fec4912d439 Mon Sep 17 00:00:00 2001 From: Anton Ivanov Date: Fri, 5 Jul 2024 11:53:31 +0100 Subject: [PATCH 254/655] um: vector: Replace locks guarding queue depth with atomics UML vector drivers use ring buffer structures which map preallocated skbs onto mmsg vectors for use with sendmmsg and recvmmsg. They are designed around a single consumer, single producer pattern allowing simultaneous enqueue and dequeue. Lock debugging with preemption showed possible races when locking the queue depth. This patch addresses this by removing extra locks, adding barriers and making queue depth inc/dec and access atomic. Signed-off-by: Anton Ivanov Signed-off-by: Richard Weinberger --- arch/um/drivers/vector_kern.c | 208 +++++++++++++++++----------------- arch/um/drivers/vector_kern.h | 4 +- 2 files changed, 110 insertions(+), 102 deletions(-) diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 2d473282ab51..0d7e7a25b674 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -102,18 +103,33 @@ static const struct { static void vector_reset_stats(struct vector_private *vp) { + /* We reuse the existing queue locks for stats */ + + /* RX stats are modified with RX head_lock held + * in vector_poll. + */ + + spin_lock(&vp->rx_queue->head_lock); vp->estats.rx_queue_max = 0; vp->estats.rx_queue_running_average = 0; - vp->estats.tx_queue_max = 0; - vp->estats.tx_queue_running_average = 0; vp->estats.rx_encaps_errors = 0; + vp->estats.sg_ok = 0; + vp->estats.sg_linearized = 0; + spin_unlock(&vp->rx_queue->head_lock); + + /* TX stats are modified with TX head_lock held + * in vector_send. + */ + + spin_lock(&vp->tx_queue->head_lock); vp->estats.tx_timeout_count = 0; vp->estats.tx_restart_queue = 0; vp->estats.tx_kicks = 0; vp->estats.tx_flow_control_xon = 0; vp->estats.tx_flow_control_xoff = 0; - vp->estats.sg_ok = 0; - vp->estats.sg_linearized = 0; + vp->estats.tx_queue_max = 0; + vp->estats.tx_queue_running_average = 0; + spin_unlock(&vp->tx_queue->head_lock); } static int get_mtu(struct arglist *def) @@ -232,12 +248,6 @@ static int get_transport_options(struct arglist *def) static char *drop_buffer; -/* Array backed queues optimized for bulk enqueue/dequeue and - * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios. - * For more details and full design rationale see - * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt - */ - /* * Advance the mmsg queue head by n = advance. Resets the queue to @@ -247,27 +257,13 @@ static char *drop_buffer; static int vector_advancehead(struct vector_queue *qi, int advance) { - int queue_depth; - qi->head = (qi->head + advance) % qi->max_depth; - spin_lock(&qi->tail_lock); - qi->queue_depth -= advance; - - /* we are at 0, use this to - * reset head and tail so we can use max size vectors - */ - - if (qi->queue_depth == 0) { - qi->head = 0; - qi->tail = 0; - } - queue_depth = qi->queue_depth; - spin_unlock(&qi->tail_lock); - return queue_depth; + atomic_sub(advance, &qi->queue_depth); + return atomic_read(&qi->queue_depth); } /* Advance the queue tail by n = advance. @@ -277,16 +273,11 @@ static int vector_advancehead(struct vector_queue *qi, int advance) static int vector_advancetail(struct vector_queue *qi, int advance) { - int queue_depth; - qi->tail = (qi->tail + advance) % qi->max_depth; - spin_lock(&qi->head_lock); - qi->queue_depth += advance; - queue_depth = qi->queue_depth; - spin_unlock(&qi->head_lock); - return queue_depth; + atomic_add(advance, &qi->queue_depth); + return atomic_read(&qi->queue_depth); } static int prep_msg(struct vector_private *vp, @@ -339,9 +330,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) int iov_count; spin_lock(&qi->tail_lock); - spin_lock(&qi->head_lock); - queue_depth = qi->queue_depth; - spin_unlock(&qi->head_lock); + queue_depth = atomic_read(&qi->queue_depth); if (skb) packet_len = skb->len; @@ -360,6 +349,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) mmsg_vector->msg_hdr.msg_iovlen = iov_count; mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr; mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size; + wmb(); /* Make the packet visible to the NAPI poll thread */ queue_depth = vector_advancetail(qi, 1); } else goto drop; @@ -398,7 +388,7 @@ static int consume_vector_skbs(struct vector_queue *qi, int count) } /* - * Generic vector deque via sendmmsg with support for forming headers + * Generic vector dequeue via sendmmsg with support for forming headers * using transport specific callback. Allows GRE, L2TPv3, RAW and * other transports to use a common dequeue procedure in vector mode */ @@ -408,69 +398,64 @@ static int vector_send(struct vector_queue *qi) { struct vector_private *vp = netdev_priv(qi->dev); struct mmsghdr *send_from; - int result = 0, send_len, queue_depth = qi->max_depth; + int result = 0, send_len; if (spin_trylock(&qi->head_lock)) { - if (spin_trylock(&qi->tail_lock)) { - /* update queue_depth to current value */ - queue_depth = qi->queue_depth; - spin_unlock(&qi->tail_lock); - while (queue_depth > 0) { - /* Calculate the start of the vector */ - send_len = queue_depth; - send_from = qi->mmsg_vector; - send_from += qi->head; - /* Adjust vector size if wraparound */ - if (send_len + qi->head > qi->max_depth) - send_len = qi->max_depth - qi->head; - /* Try to TX as many packets as possible */ - if (send_len > 0) { - result = uml_vector_sendmmsg( - vp->fds->tx_fd, - send_from, - send_len, - 0 - ); - vp->in_write_poll = - (result != send_len); - } - /* For some of the sendmmsg error scenarios - * we may end being unsure in the TX success - * for all packets. It is safer to declare - * them all TX-ed and blame the network. + /* update queue_depth to current value */ + while (atomic_read(&qi->queue_depth) > 0) { + /* Calculate the start of the vector */ + send_len = atomic_read(&qi->queue_depth); + send_from = qi->mmsg_vector; + send_from += qi->head; + /* Adjust vector size if wraparound */ + if (send_len + qi->head > qi->max_depth) + send_len = qi->max_depth - qi->head; + /* Try to TX as many packets as possible */ + if (send_len > 0) { + result = uml_vector_sendmmsg( + vp->fds->tx_fd, + send_from, + send_len, + 0 + ); + vp->in_write_poll = + (result != send_len); + } + /* For some of the sendmmsg error scenarios + * we may end being unsure in the TX success + * for all packets. It is safer to declare + * them all TX-ed and blame the network. + */ + if (result < 0) { + if (net_ratelimit()) + netdev_err(vp->dev, "sendmmsg err=%i\n", + result); + vp->in_error = true; + result = send_len; + } + if (result > 0) { + consume_vector_skbs(qi, result); + /* This is equivalent to an TX IRQ. + * Restart the upper layers to feed us + * more packets. */ - if (result < 0) { - if (net_ratelimit()) - netdev_err(vp->dev, "sendmmsg err=%i\n", - result); - vp->in_error = true; - result = send_len; - } - if (result > 0) { - queue_depth = - consume_vector_skbs(qi, result); - /* This is equivalent to an TX IRQ. - * Restart the upper layers to feed us - * more packets. - */ - if (result > vp->estats.tx_queue_max) - vp->estats.tx_queue_max = result; - vp->estats.tx_queue_running_average = - (vp->estats.tx_queue_running_average + result) >> 1; - } - netif_wake_queue(qi->dev); - /* if TX is busy, break out of the send loop, - * poll write IRQ will reschedule xmit for us - */ - if (result != send_len) { - vp->estats.tx_restart_queue++; - break; - } + if (result > vp->estats.tx_queue_max) + vp->estats.tx_queue_max = result; + vp->estats.tx_queue_running_average = + (vp->estats.tx_queue_running_average + result) >> 1; + } + netif_wake_queue(qi->dev); + /* if TX is busy, break out of the send loop, + * poll write IRQ will reschedule xmit for us. + */ + if (result != send_len) { + vp->estats.tx_restart_queue++; + break; } } spin_unlock(&qi->head_lock); } - return queue_depth; + return atomic_read(&qi->queue_depth); } /* Queue destructor. Deliberately stateless so we can use @@ -589,7 +574,7 @@ static struct vector_queue *create_queue( } spin_lock_init(&result->head_lock); spin_lock_init(&result->tail_lock); - result->queue_depth = 0; + atomic_set(&result->queue_depth, 0); result->head = 0; result->tail = 0; return result; @@ -668,18 +653,27 @@ static struct sk_buff *prep_skb( } -/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/ +/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs */ static void prep_queue_for_rx(struct vector_queue *qi) { struct vector_private *vp = netdev_priv(qi->dev); struct mmsghdr *mmsg_vector = qi->mmsg_vector; void **skbuff_vector = qi->skbuff_vector; - int i; + int i, queue_depth; - if (qi->queue_depth == 0) + queue_depth = atomic_read(&qi->queue_depth); + + if (queue_depth == 0) return; - for (i = 0; i < qi->queue_depth; i++) { + + /* RX is always emptied 100% during each cycle, so we do not + * have to do the tail wraparound math for it. + */ + + qi->head = qi->tail = 0; + + for (i = 0; i < queue_depth; i++) { /* it is OK if allocation fails - recvmmsg with NULL data in * iov argument still performs an RX, just drops the packet * This allows us stop faffing around with a "drop buffer" @@ -689,7 +683,7 @@ static void prep_queue_for_rx(struct vector_queue *qi) skbuff_vector++; mmsg_vector++; } - qi->queue_depth = 0; + atomic_set(&qi->queue_depth, 0); } static struct vector_device *find_device(int n) @@ -985,7 +979,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget) * many do we need to prep the next time prep_queue_for_rx() is called. */ - qi->queue_depth = packet_count; + atomic_add(packet_count, &qi->queue_depth); for (i = 0; i < packet_count; i++) { skb = (*skbuff_vector); @@ -1172,6 +1166,7 @@ static int vector_poll(struct napi_struct *napi, int budget) if ((vp->options & VECTOR_TX) != 0) tx_enqueued = (vector_send(vp->tx_queue) > 0); + spin_lock(&vp->rx_queue->head_lock); if ((vp->options & VECTOR_RX) > 0) err = vector_mmsg_rx(vp, budget); else { @@ -1179,6 +1174,7 @@ static int vector_poll(struct napi_struct *napi, int budget) if (err > 0) err = 1; } + spin_unlock(&vp->rx_queue->head_lock); if (err > 0) work_done += err; @@ -1225,7 +1221,7 @@ static int vector_net_open(struct net_device *dev) vp->rx_header_size, MAX_IOV_SIZE ); - vp->rx_queue->queue_depth = get_depth(vp->parsed); + atomic_set(&vp->rx_queue->queue_depth, get_depth(vp->parsed)); } else { vp->header_rxbuffer = kmalloc( vp->rx_header_size, @@ -1467,7 +1463,17 @@ static void vector_get_ethtool_stats(struct net_device *dev, { struct vector_private *vp = netdev_priv(dev); + /* Stats are modified in the dequeue portions of + * rx/tx which are protected by the head locks + * grabbing these locks here ensures they are up + * to date. + */ + + spin_lock(&vp->tx_queue->head_lock); + spin_lock(&vp->rx_queue->head_lock); memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats)); + spin_unlock(&vp->rx_queue->head_lock); + spin_unlock(&vp->tx_queue->head_lock); } static int vector_get_coalesce(struct net_device *netdev, diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h index 806df551be0b..417834793658 100644 --- a/arch/um/drivers/vector_kern.h +++ b/arch/um/drivers/vector_kern.h @@ -14,6 +14,7 @@ #include #include #include +#include #include "vector_user.h" @@ -44,7 +45,8 @@ struct vector_queue { struct net_device *dev; spinlock_t head_lock; spinlock_t tail_lock; - int queue_depth, head, tail, max_depth, max_iov_frags; + atomic_t queue_depth; + int head, tail, max_depth, max_iov_frags; short options; }; From 671cd5eed9db3415b42826747114a330bc303ae9 Mon Sep 17 00:00:00 2001 From: Anton Ivanov Date: Sat, 6 Jul 2024 10:12:00 +0100 Subject: [PATCH 255/655] um: vector: Fix NAPI budget handling Fix the handling of NAPI budget. Signed-off-by: Anton Ivanov Signed-off-by: Richard Weinberger --- arch/um/drivers/vector_kern.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 0d7e7a25b674..c992da83268d 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -966,7 +966,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget) budget = qi->max_depth; packet_count = uml_vector_recvmmsg( - vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0); + vp->fds->rx_fd, qi->mmsg_vector, budget, 0); if (packet_count < 0) vp->in_error = true; @@ -1180,7 +1180,7 @@ static int vector_poll(struct napi_struct *napi, int budget) if (tx_enqueued || err > 0) napi_schedule(napi); - if (work_done < budget) + if (work_done <= budget) napi_complete_done(napi, work_done); return work_done; } From 64dcf0b8779363ca07dfb5649a4cc71f9fdf390b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 23 Jul 2024 14:24:56 +0200 Subject: [PATCH 256/655] um: remove ARCH_NO_PREEMPT_DYNAMIC There's no such symbol and we currently don't have any of the mechanisms to make boot-time selection cheap enough, so we can't have HAVE_PREEMPT_DYNAMIC_CALL or HAVE_PREEMPT_DYNAMIC_KEY. Remove the select statement. Reported-by: Lukas Bulwahn Fixes: cd01672d64a3 ("um: Enable preemption in UML") Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index dca84fd6d00a..c89575d05021 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -11,7 +11,6 @@ config UML select ARCH_HAS_KCOV select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER - select ARCH_NO_PREEMPT_DYNAMIC select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_KASAN if X86_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN From ab1d5895cf6cdd1df4b6ce0ac8763828e2bf7e62 Mon Sep 17 00:00:00 2001 From: Renzo Davoli Date: Tue, 30 Jul 2024 15:54:54 +0200 Subject: [PATCH 257/655] vector_user: add VDE support This is the actual implementation of VDE support as a vector transport. Signed-off-by: Renzo Davoli Acked-By: Anton Ivanov Signed-off-by: Richard Weinberger --- arch/um/drivers/vector_user.c | 83 +++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c index b16a5e5619d3..2ea67e6fd067 100644 --- a/arch/um/drivers/vector_user.c +++ b/arch/um/drivers/vector_user.c @@ -46,6 +46,9 @@ #define TRANS_FD "fd" #define TRANS_FD_LEN strlen(TRANS_FD) +#define TRANS_VDE "vde" +#define TRANS_VDE_LEN strlen(TRANS_VDE) + #define VNET_HDR_FAIL "could not enable vnet headers on fd %d" #define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s" #define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i" @@ -434,6 +437,84 @@ static struct vector_fds *user_init_fd_fds(struct arglist *ifspec) return NULL; } +/* enough char to store an int type */ +#define ENOUGH(type) ((CHAR_BIT * sizeof(type) - 1) / 3 + 2) +#define ENOUGH_OCTAL(type) ((CHAR_BIT * sizeof(type) + 2) / 3) +/* vde_plug --descr xx --port2 xx --mod2 xx --group2 xx seqpacket://NN vnl (NULL) */ +#define VDE_MAX_ARGC 12 +#define VDE_SEQPACKET_HEAD "seqpacket://" +#define VDE_SEQPACKET_HEAD_LEN (sizeof(VDE_SEQPACKET_HEAD) - 1) +#define VDE_DEFAULT_DESCRIPTION "UML" + +static struct vector_fds *user_init_vde_fds(struct arglist *ifspec) +{ + char seqpacketvnl[VDE_SEQPACKET_HEAD_LEN + ENOUGH(int) + 1]; + char *argv[VDE_MAX_ARGC] = {"vde_plug"}; + int argc = 1; + int rv; + int sv[2]; + struct vector_fds *result = NULL; + + char *vnl = uml_vector_fetch_arg(ifspec,"vnl"); + char *descr = uml_vector_fetch_arg(ifspec,"descr"); + char *port = uml_vector_fetch_arg(ifspec,"port"); + char *mode = uml_vector_fetch_arg(ifspec,"mode"); + char *group = uml_vector_fetch_arg(ifspec,"group"); + if (descr == NULL) descr = VDE_DEFAULT_DESCRIPTION; + + argv[argc++] = "--descr"; + argv[argc++] = descr; + if (port != NULL) { + argv[argc++] = "--port2"; + argv[argc++] = port; + } + if (mode != NULL) { + argv[argc++] = "--mod2"; + argv[argc++] = mode; + } + if (group != NULL) { + argv[argc++] = "--group2"; + argv[argc++] = group; + } + argv[argc++] = seqpacketvnl; + argv[argc++] = vnl; + argv[argc++] = NULL; + + rv = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sv); + if (rv < 0) { + printk(UM_KERN_ERR "vde: seqpacket socketpair err %d", -errno); + return NULL; + } + rv = os_set_exec_close(sv[0]); + if (rv < 0) { + printk(UM_KERN_ERR "vde: seqpacket socketpair cloexec err %d", -errno); + goto vde_cleanup_sv; + } + snprintf(seqpacketvnl, sizeof(seqpacketvnl), VDE_SEQPACKET_HEAD "%d", sv[1]); + + run_helper(NULL, NULL, argv); + + close(sv[1]); + + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result == NULL) { + printk(UM_KERN_ERR "fd open: allocation failed"); + goto vde_cleanup; + } + + result->rx_fd = sv[0]; + result->tx_fd = sv[0]; + result->remote_addr_size = 0; + result->remote_addr = NULL; + return result; + +vde_cleanup_sv: + close(sv[1]); +vde_cleanup: + close(sv[0]); + return NULL; +} + static struct vector_fds *user_init_raw_fds(struct arglist *ifspec) { int rxfd = -1, txfd = -1; @@ -673,6 +754,8 @@ struct vector_fds *uml_vector_user_open( return user_init_unix_fds(parsed, ID_BESS); if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0) return user_init_fd_fds(parsed); + if (strncmp(transport, TRANS_VDE, TRANS_VDE_LEN) == 0) + return user_init_vde_fds(parsed); return NULL; } From cccf19f8b568a4edabb16a998a81bc5d4c4c7e01 Mon Sep 17 00:00:00 2001 From: Renzo Davoli Date: Tue, 30 Jul 2024 15:55:04 +0200 Subject: [PATCH 258/655] user_mode_linux_howto_v2: add VDE vector support in doc Add a description of the VDE vector transport in user_mode_linux_howto_v2.rst. Signed-off-by: Renzo Davoli Signed-off-by: Renzo Davoi Acked-by: Randy Dunlap Signed-off-by: Richard Weinberger --- .../virt/uml/user_mode_linux_howto_v2.rst | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/Documentation/virt/uml/user_mode_linux_howto_v2.rst b/Documentation/virt/uml/user_mode_linux_howto_v2.rst index 27942446f406..584000b743f3 100644 --- a/Documentation/virt/uml/user_mode_linux_howto_v2.rst +++ b/Documentation/virt/uml/user_mode_linux_howto_v2.rst @@ -217,6 +217,8 @@ remote UML and other VM instances. +-----------+--------+------------------------------------+------------+ | fd | vector | dependent on fd type | varies | +-----------+--------+------------------------------------+------------+ +| vde | vector | dep. on VDE VPN: Virt.Net Locator | varies | ++-----------+--------+------------------------------------+------------+ | tuntap | legacy | none | ~ 500Mbit | +-----------+--------+------------------------------------+------------+ | daemon | legacy | none | ~ 450Mbit | @@ -573,6 +575,41 @@ https://github.com/NetSys/bess/wiki/Built-In-Modules-and-Ports BESS transport does not require any special privileges. +VDE vector transport +-------------------- + +Virtual Distributed Ethernet (VDE) is a project whose main goal is to provide a +highly flexible support for virtual networking. + +http://wiki.virtualsquare.org/#/tutorials/vdebasics + +Common usages of VDE include fast prototyping and teaching. + +Examples: + + ``vecX:transport=vde,vnl=tap://tap0`` + +use tap0 + + ``vecX:transport=vde,vnl=slirp://`` + +use slirp + + ``vec0:transport=vde,vnl=vde:///tmp/switch`` + +connect to a vde switch + + ``vecX:transport=\"vde,vnl=cmd://ssh remote.host //tmp/sshlirp\"`` + +connect to a remote slirp (instant VPN: convert ssh to VPN, it uses sshlirp) +https://github.com/virtualsquare/sshlirp + + ``vec0:transport=vde,vnl=vxvde://234.0.0.1`` + +connect to a local area cloud (all the UML nodes using the same +multicast address running on hosts in the same multicast domain (LAN) +will be automagically connected together to a virtual LAN. + Configuring Legacy transports ============================= From 2df8c8d118c750054fdf2c047d2eb3c0ed854dc7 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sat, 24 Aug 2024 20:04:49 +0800 Subject: [PATCH 259/655] um: Remove obsoleted declaration for execute_syscall_skas The execute_syscall_skas() have been removed since commit e32dacb9f481 ("[PATCH] uml: system call path cleanup"), and now it is useless, so remove it. Signed-off-by: Gaosheng Cui Reviewed-by: Geert Uytterhoeven Signed-off-by: Richard Weinberger --- arch/um/include/shared/skas/skas.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h index ebaa116de30b..854cff8d6319 100644 --- a/arch/um/include/shared/skas/skas.h +++ b/arch/um/include/shared/skas/skas.h @@ -13,7 +13,6 @@ extern int userspace_pid[]; extern int user_thread(unsigned long stack, int flags); extern void new_thread_handler(void); extern void handle_syscall(struct uml_pt_regs *regs); -extern long execute_syscall_skas(void *r); extern unsigned long current_stub_stack(void); extern struct mm_id *current_mm_id(void); extern void current_mm_sync(void); From 2fcd16fbab9f448c7174bf4c3eeda53ef84e28ee Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 26 Aug 2024 18:08:09 +0800 Subject: [PATCH 260/655] um: Remove unused kpte_clear_flush macro This macro has no users, and __flush_tlb_one doesn't exist either. Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/include/asm/pgtable.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 5bb397b65efb..83373c9963e7 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -359,11 +359,4 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return pte; } -/* Clear a kernel PTE and flush it from the TLB */ -#define kpte_clear_flush(ptep, vaddr) \ -do { \ - pte_clear(&init_mm, (vaddr), (ptep)); \ - __flush_tlb_one((vaddr)); \ -} while (0) - #endif From 669afa4e8715c5730fb353166f9aaaa14d4fed64 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 26 Aug 2024 18:08:10 +0800 Subject: [PATCH 261/655] um: Remove the redundant newpage check in update_pte_range The two checks have been identical since commit ef714f15027c ("um: remove force_flush_all from fork_handler"). And the inner one isn't necessary anymore. Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/kernel/tlb.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 44c6fc697f3a..548af31d4111 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -82,16 +82,12 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, (x ? UM_PROT_EXEC : 0)); if (pte_newpage(*pte)) { if (pte_present(*pte)) { - if (pte_newpage(*pte)) { - __u64 offset; - unsigned long phys = - pte_val(*pte) & PAGE_MASK; - int fd = phys_mapping(phys, &offset); + __u64 offset; + unsigned long phys = pte_val(*pte) & PAGE_MASK; + int fd = phys_mapping(phys, &offset); - ret = ops->mmap(ops->mm_idp, addr, - PAGE_SIZE, prot, fd, - offset); - } + ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE, + prot, fd, offset); } else ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); } else if (pte_newprot(*pte)) From 94090f418fc80c50ca7ea3f8a6d7ff547260a801 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 26 Aug 2024 18:08:11 +0800 Subject: [PATCH 262/655] um: Remove unused fields from thread_struct These fields are no longer used since the removal of tt mode. Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/include/asm/processor-generic.h | 20 +++++--------------- arch/um/kernel/process.c | 8 ++++---- arch/um/kernel/skas/process.c | 4 ++-- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 5a7c05275aa7..bce4595798da 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -28,20 +28,10 @@ struct thread_struct { struct arch_thread arch; jmp_buf switch_buf; struct { - int op; - union { - struct { - int pid; - } fork, exec; - struct { - int (*proc)(void *); - void *arg; - } thread; - struct { - void (*proc)(void *); - void *arg; - } cb; - } u; + struct { + int (*proc)(void *); + void *arg; + } thread; } request; }; @@ -51,7 +41,7 @@ struct thread_struct { .fault_addr = NULL, \ .prev_sched = NULL, \ .arch = INIT_ARCH_THREAD, \ - .request = { 0 } \ + .request = { } \ } /* diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index f36b63f53bab..be2856af6d4c 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -109,8 +109,8 @@ void new_thread_handler(void) schedule_tail(current->thread.prev_sched); current->thread.prev_sched = NULL; - fn = current->thread.request.u.thread.proc; - arg = current->thread.request.u.thread.arg; + fn = current->thread.request.thread.proc; + arg = current->thread.request.thread.arg; /* * callback returns only if the kernel thread execs a process @@ -158,8 +158,8 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) arch_copy_thread(¤t->thread.arch, &p->thread.arch); } else { get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); - p->thread.request.u.thread.proc = args->fn; - p->thread.request.u.thread.arg = args->fn_arg; + p->thread.request.thread.proc = args->fn; + p->thread.request.thread.arg = args->fn_arg; handler = new_thread_handler; } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 5f9c1c5f36e2..68657988c8d1 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -39,8 +39,8 @@ int __init start_uml(void) init_new_thread_signals(); - init_task.thread.request.u.thread.proc = start_kernel_proc; - init_task.thread.request.u.thread.arg = NULL; + init_task.thread.request.thread.proc = start_kernel_proc; + init_task.thread.request.thread.arg = NULL; return start_idle_thread(task_stack_page(&init_task), &init_task.thread.switch_buf); } From 59376fb2a71b81dfc018db6fe9b6fa9cd3f41ce7 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 26 Aug 2024 18:08:12 +0800 Subject: [PATCH 263/655] um: Remove unused mm_fd field from mm_id It's no longer used since the removal of the SKAS3/4 support. Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/include/shared/skas/mm_id.h | 5 +---- arch/um/kernel/reboot.c | 2 +- arch/um/kernel/skas/mmu.c | 12 ++++++------ arch/um/kernel/time.c | 2 +- arch/um/os-Linux/skas/mem.c | 2 +- arch/um/os-Linux/skas/process.c | 2 +- 6 files changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h index 1e76ba40feba..140388c282f6 100644 --- a/arch/um/include/shared/skas/mm_id.h +++ b/arch/um/include/shared/skas/mm_id.h @@ -7,10 +7,7 @@ #define __MM_ID_H struct mm_id { - union { - int mm_fd; - int pid; - } u; + int pid; unsigned long stack; int syscall_data_len; }; diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 3736bca626ba..680bce4bd8fa 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -29,7 +29,7 @@ static void kill_off_processes(void) t = find_lock_task_mm(p); if (!t) continue; - pid = t->mm->context.id.u.pid; + pid = t->mm->context.id.pid; task_unlock(t); os_kill_ptraced_process(pid, 1); } diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 47f98d87ea3c..886ed5e65674 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -32,11 +32,11 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) new_id->stack = stack; block_signals_trace(); - new_id->u.pid = start_userspace(stack); + new_id->pid = start_userspace(stack); unblock_signals_trace(); - if (new_id->u.pid < 0) { - ret = new_id->u.pid; + if (new_id->pid < 0) { + ret = new_id->pid; goto out_free; } @@ -83,12 +83,12 @@ void destroy_context(struct mm_struct *mm) * whole UML suddenly dying. Also, cover negative and * 1 cases, since they shouldn't happen either. */ - if (mmu->id.u.pid < 2) { + if (mmu->id.pid < 2) { printk(KERN_ERR "corrupt mm_context - pid = %d\n", - mmu->id.u.pid); + mmu->id.pid); return; } - os_kill_ptraced_process(mmu->id.u.pid, 1); + os_kill_ptraced_process(mmu->id.pid, 1); free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 47b9f5e63566..29b27b90581f 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -839,7 +839,7 @@ static irqreturn_t um_timer(int irq, void *dev) if (get_current()->mm != NULL) { /* userspace - relay signal, results in correct userspace timers */ - os_alarm_process(get_current()->mm->context.id.u.pid); + os_alarm_process(get_current()->mm->context.id.pid); } (*timer_clockevent.event_handler)(&timer_clockevent); diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index c55430775efd..9a13ac23c606 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -78,7 +78,7 @@ static inline long do_syscall_stub(struct mm_id *mm_idp) { struct stub_data *proc_data = (void *)mm_idp->stack; int n, i; - int err, pid = mm_idp->u.pid; + int err, pid = mm_idp->pid; n = ptrace_setregs(pid, syscall_regs); if (n < 0) { diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index f7088345b3fc..b6f656bcffb1 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -588,5 +588,5 @@ void reboot_skas(void) void __switch_mm(struct mm_id *mm_idp) { - userspace_pid[0] = mm_idp->u.pid; + userspace_pid[0] = mm_idp->pid; } From bf67dbf4f7c0fe61e4e94b30f5913ca1c539f433 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 26 Aug 2024 18:08:13 +0800 Subject: [PATCH 264/655] um: Remove the call to SUBARCH_EXECVE1 macro This macro has never been defined by any supported sub-architectures in tree since it was introduced by commit 1d3468a6643a ("[PATCH uml: move _kern.c files"). Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/kernel/exec.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 2c15bb2c104c..cb8b5cd9285c 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -35,8 +35,5 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; clear_thread_flag(TIF_SINGLESTEP); -#ifdef SUBARCH_EXECVE1 - SUBARCH_EXECVE1(regs->regs); -#endif } EXPORT_SYMBOL(start_thread); From fe6abeba24996f826473630b2054699828fd9f18 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 26 Aug 2024 18:08:14 +0800 Subject: [PATCH 265/655] um: Remove the declaration of user_thread function This function has never been defined since its declaration was introduced by commit 1da177e4c3f4 ("Linux-2.6.12-rc2"). Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/include/shared/skas/skas.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h index 854cff8d6319..85c50122ab98 100644 --- a/arch/um/include/shared/skas/skas.h +++ b/arch/um/include/shared/skas/skas.h @@ -10,7 +10,6 @@ extern int userspace_pid[]; -extern int user_thread(unsigned long stack, int flags); extern void new_thread_handler(void); extern void handle_syscall(struct uml_pt_regs *regs); extern unsigned long current_stub_stack(void); From ae0dc67c2512e09fee26226e1b2d78b82ebebf66 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 26 Aug 2024 18:08:15 +0800 Subject: [PATCH 266/655] um: Remove outdated asm/sysrq.h header This header no longer serves a purpose after show_trace was removed by commit 9d1ee8ce92e1 ("um: Rewrite show_stack()"). Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/include/asm/sysrq.h | 8 -------- arch/um/kernel/sysrq.c | 1 - arch/x86/um/sysrq_32.c | 1 - arch/x86/um/sysrq_64.c | 1 - 4 files changed, 11 deletions(-) delete mode 100644 arch/um/include/asm/sysrq.h diff --git a/arch/um/include/asm/sysrq.h b/arch/um/include/asm/sysrq.h deleted file mode 100644 index 8fc8c65cd357..000000000000 --- a/arch/um/include/asm/sysrq.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_SYSRQ_H -#define __UM_SYSRQ_H - -struct task_struct; -extern void show_trace(struct task_struct* task, unsigned long *stack); - -#endif diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index 746715379f12..4bb8622dc512 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -11,7 +11,6 @@ #include #include -#include #include #include diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c index f2383484840d..a1ee415c008d 100644 --- a/arch/x86/um/sysrq_32.c +++ b/arch/x86/um/sysrq_32.c @@ -9,7 +9,6 @@ #include #include #include -#include /* This is declared by */ void show_regs(struct pt_regs *regs) diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c index 0bf6de40abff..340d8a243c8a 100644 --- a/arch/x86/um/sysrq_64.c +++ b/arch/x86/um/sysrq_64.c @@ -12,7 +12,6 @@ #include #include #include -#include void show_regs(struct pt_regs *regs) { From 381d2f95c8aa575d5d42bf1fe0ea9a70c4bec0cf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 27 Aug 2024 16:05:01 +0200 Subject: [PATCH 267/655] um: fix time-travel syscall scheduling hack The schedule() call there really never did anything at least since the introduction of the EEVDF scheduler, but now I found a case where we permanently hang in a loop of -ERESTARTNOINTR (due to locking.) Work around it by making any syscalls with error return take time (and then schedule after) so we cannot hang in such a loop forever. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/kernel/skas/syscall.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 9ee19e566da3..b09e85279d2b 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -12,23 +12,13 @@ #include #include #include +#include void handle_syscall(struct uml_pt_regs *r) { struct pt_regs *regs = container_of(r, struct pt_regs, regs); int syscall; - /* - * If we have infinite CPU resources, then make every syscall also a - * preemption point, since we don't have any other preemption in this - * case, and kernel threads would basically never run until userspace - * went to sleep, even if said userspace interacts with the kernel in - * various ways. - */ - if (time_travel_mode == TT_MODE_INFCPU || - time_travel_mode == TT_MODE_EXTERNAL) - schedule(); - /* Initialize the syscall number and default return value. */ UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS); @@ -41,9 +31,25 @@ void handle_syscall(struct uml_pt_regs *r) goto out; syscall = UPT_SYSCALL_NR(r); - if (syscall >= 0 && syscall < __NR_syscalls) - PT_REGS_SET_SYSCALL_RETURN(regs, - EXECUTE_SYSCALL(syscall, regs)); + if (syscall >= 0 && syscall < __NR_syscalls) { + unsigned long ret = EXECUTE_SYSCALL(syscall, regs); + + PT_REGS_SET_SYSCALL_RETURN(regs, ret); + + /* + * An error value here can be some form of -ERESTARTSYS + * and then we'd just loop. Make any error syscalls take + * some time, so that it won't just loop if something is + * not ready, and hopefully other things will make some + * progress. + */ + if (IS_ERR_VALUE(ret) && + (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL)) { + um_udelay(1); + schedule(); + } + } out: syscall_trace_leave(regs); From 3d882cca73be830549833517ddccb3ac4668c04e Mon Sep 17 00:00:00 2001 From: Rafael Rocha Date: Thu, 5 Sep 2024 12:39:21 -0500 Subject: [PATCH 268/655] scsi: st: Fix input/output error on empty drive reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A previous change was introduced to prevent data loss during a power-on reset when a tape is present inside the drive. This commit set the "pos_unknown" flag to true to avoid operations that could compromise data by performing actions from an untracked position. The relevant change is commit 9604eea5bd3a ("scsi: st: Add third party poweron reset handling") As a consequence of this change, a new issue has surfaced: the driver now returns an "Input/output error" even for empty drives when the drive, host, or bus is reset. This issue stems from the "flush_buffer" function, which first checks whether the "pos_unknown" flag is set. If the flag is set, the user will encounter an "Input/output error" until the tape position is known again. This behavior differs from the previous implementation, where empty drives were not affected at system start up time, allowing tape software to send commands to the driver to retrieve the drive's status and other information. The current behavior prioritizes the "pos_unknown" flag over the "ST_NO_TAPE" status, leading to issues for software that detects drives during system startup. This software will receive an "Input/output error" until a tape is loaded and its position is known. To resolve this, the "ST_NO_TAPE" status should take priority when the drive is empty, allowing communication with the drive following a power-on reset. At the same time, the change should continue to protect data by maintaining the "pos_unknown" flag when the drive contains a tape and its position is unknown. Signed-off-by: Rafael Rocha Link: https://lore.kernel.org/r/20240905173921.10944-1-rrochavi@fnal.gov Fixes: 9604eea5bd3a ("scsi: st: Add third party poweron reset handling") Acked-by: Kai Mäkisara Signed-off-by: Martin K. Petersen --- drivers/scsi/st.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 0d8ce1a92168..d50bad3a2ce9 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -834,6 +834,9 @@ static int flush_buffer(struct scsi_tape *STp, int seek_next) int backspace, result; struct st_partstat *STps; + if (STp->ready != ST_READY) + return 0; + /* * If there was a bus reset, block further access * to this device. @@ -841,8 +844,6 @@ static int flush_buffer(struct scsi_tape *STp, int seek_next) if (STp->pos_unknown) return (-EIO); - if (STp->ready != ST_READY) - return 0; STps = &(STp->ps[STp->partition]); if (STps->rw == ST_WRITING) /* Writing */ return st_flush_write_buffer(STp); From b112947ffc30e9632d5c2acd0e9081e3e6bee01e Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Fri, 30 Aug 2024 15:58:58 +0800 Subject: [PATCH 269/655] scsi: sd: Remove duplicate included header file linux/bio-integrity.h The header file linux/bio-integrity.h is included twice. Remove the last one. The compilation test has passed. Signed-off-by: Hongbo Li Link: https://lore.kernel.org/r/20240830075858.3541907-1-lihongbo22@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e90a2e4ab318..b0e565402f6a 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include From e3684006945414153a33c5c4d1202dda2b80650f Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 3 Sep 2024 08:47:09 -0500 Subject: [PATCH 270/655] scsi: ibmvfc: Add max_sectors module parameter There are some scenarios that can occur, such as performing an upgrade of the virtual I/O server, where the supported max transfer of the backing device for an ibmvfc HBA can change. If the max transfer of the backing device decreases, this can cause issues with previously discovered LUNs. This patch accomplishes two things. First, it changes the default ibmvfc max transfer value to 1MB. This is generally supported by all backing devices, which should mitigate this issue out of the box. Secondly, it adds a module parameter, enabling a user to increase the max transfer value to values that are larger than 1MB, as long as they have configured these larger values on the virtual I/O server as well. [mkp: fix checkpatch warnings] Signed-off-by: Brian King Link: https://lore.kernel.org/r/20240903134708.139645-2-brking@linux.ibm.com Reviewed-by: Martin Wilck Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 21 ++++++++++++++++++--- drivers/scsi/ibmvscsi/ibmvfc.h | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index a3d1013c8307..e66c3ef74267 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -37,6 +37,7 @@ static unsigned int default_timeout = IBMVFC_DEFAULT_TIMEOUT; static u64 max_lun = IBMVFC_MAX_LUN; static unsigned int max_targets = IBMVFC_MAX_TARGETS; static unsigned int max_requests = IBMVFC_MAX_REQUESTS_DEFAULT; +static u16 max_sectors = IBMVFC_MAX_SECTORS; static u16 scsi_qdepth = IBMVFC_SCSI_QDEPTH; static unsigned int disc_threads = IBMVFC_MAX_DISC_THREADS; static unsigned int ibmvfc_debug = IBMVFC_DEBUG; @@ -83,6 +84,9 @@ MODULE_PARM_DESC(default_timeout, module_param_named(max_requests, max_requests, uint, S_IRUGO); MODULE_PARM_DESC(max_requests, "Maximum requests for this adapter. " "[Default=" __stringify(IBMVFC_MAX_REQUESTS_DEFAULT) "]"); +module_param_named(max_sectors, max_sectors, ushort, S_IRUGO); +MODULE_PARM_DESC(max_sectors, "Maximum sectors for this adapter. " + "[Default=" __stringify(IBMVFC_MAX_SECTORS) "]"); module_param_named(scsi_qdepth, scsi_qdepth, ushort, S_IRUGO); MODULE_PARM_DESC(scsi_qdepth, "Maximum scsi command depth per adapter queue. " "[Default=" __stringify(IBMVFC_SCSI_QDEPTH) "]"); @@ -1494,7 +1498,7 @@ static void ibmvfc_set_login_info(struct ibmvfc_host *vhost) memset(login_info, 0, sizeof(*login_info)); login_info->ostype = cpu_to_be32(IBMVFC_OS_LINUX); - login_info->max_dma_len = cpu_to_be64(IBMVFC_MAX_SECTORS << 9); + login_info->max_dma_len = cpu_to_be64(max_sectors << 9); login_info->max_payload = cpu_to_be32(sizeof(struct ibmvfc_fcp_cmd_iu)); login_info->max_response = cpu_to_be32(sizeof(struct ibmvfc_fcp_rsp)); login_info->partition_num = cpu_to_be32(vhost->partition_number); @@ -5230,7 +5234,7 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt) } vhost->logged_in = 1; - npiv_max_sectors = min((uint)(be64_to_cpu(rsp->max_dma_len) >> 9), IBMVFC_MAX_SECTORS); + npiv_max_sectors = min((uint)(be64_to_cpu(rsp->max_dma_len) >> 9), max_sectors); dev_info(vhost->dev, "Host partition: %s, device: %s %s %s max sectors %u\n", rsp->partition_name, rsp->device_name, rsp->port_loc_code, rsp->drc_name, npiv_max_sectors); @@ -6329,7 +6333,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id) shost->can_queue = scsi_qdepth; shost->max_lun = max_lun; shost->max_id = max_targets; - shost->max_sectors = IBMVFC_MAX_SECTORS; + shost->max_sectors = max_sectors; shost->max_cmd_len = IBMVFC_MAX_CDB_LEN; shost->unique_id = shost->host_no; shost->nr_hw_queues = mq_enabled ? min(max_scsi_queues, nr_scsi_hw_queues) : 1; @@ -6556,6 +6560,7 @@ static struct fc_function_template ibmvfc_transport_functions = { **/ static int __init ibmvfc_module_init(void) { + int min_max_sectors = PAGE_SIZE >> 9; int rc; if (!firmware_has_feature(FW_FEATURE_VIO)) @@ -6564,6 +6569,16 @@ static int __init ibmvfc_module_init(void) printk(KERN_INFO IBMVFC_NAME": IBM Virtual Fibre Channel Driver version: %s %s\n", IBMVFC_DRIVER_VERSION, IBMVFC_DRIVER_DATE); + /* + * Range check the max_sectors module parameter. The upper bounds is + * implicity checked since the parameter is a ushort. + */ + if (max_sectors < min_max_sectors) { + printk(KERN_ERR IBMVFC_NAME ": max_sectors must be at least %d.\n", + min_max_sectors); + max_sectors = min_max_sectors; + } + ibmvfc_transport_template = fc_attach_transport(&ibmvfc_transport_functions); if (!ibmvfc_transport_template) return -ENOMEM; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 745ad5ac7251..c73ed2314ad0 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -32,7 +32,7 @@ #define IBMVFC_DEBUG 0 #define IBMVFC_MAX_TARGETS 1024 #define IBMVFC_MAX_LUN 0xffffffff -#define IBMVFC_MAX_SECTORS 0xffffu +#define IBMVFC_MAX_SECTORS 2048 #define IBMVFC_MAX_DISC_THREADS 4 #define IBMVFC_TGT_MEMPOOL_SZ 64 #define IBMVFC_MAX_CMDS_PER_LUN 64 From 45fad027df61f848fa9e036e8de6ba009cd1a134 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 1 Sep 2024 22:45:27 +0200 Subject: [PATCH 271/655] scsi: libcxgbi: Remove an unused field in struct cxgbi_device Usage of .dev_ddp_cleanup() in libcxgbi was removed by commit 5999299f1ce9 ("cxgb3i,cxgb4i,libcxgbi: remove iSCSI DDP support") on 2016-07. .csk_rx_pdu_ready() and debugfs_root have apparently never been used since introduction by commit 9ba682f01e2f ("[SCSI] libcxgbi: common library for cxgb3i and cxgb4i") Remove the now unused function pointer from struct cxgbi_device. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/58f77f690d85e2c653447e3e3fc4f8d3c3ce8563.1725223504.git.christophe.jaillet@wanadoo.fr Signed-off-by: Martin K. Petersen --- drivers/scsi/cxgbi/libcxgbi.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h index d92cf1dccc2f..0909b03e2497 100644 --- a/drivers/scsi/cxgbi/libcxgbi.h +++ b/drivers/scsi/cxgbi/libcxgbi.h @@ -485,7 +485,6 @@ struct cxgbi_device { unsigned char nmtus; unsigned char nports; struct pci_dev *pdev; - struct dentry *debugfs_root; struct iscsi_transport *itp; struct module *owner; @@ -499,7 +498,6 @@ struct cxgbi_device { unsigned int rxq_idx_cntr; struct cxgbi_ports_map pmap; - void (*dev_ddp_cleanup)(struct cxgbi_device *); struct cxgbi_ppm* (*cdev2ppm)(struct cxgbi_device *); int (*csk_ddp_set_map)(struct cxgbi_ppm *, struct cxgbi_sock *, struct cxgbi_task_tag_info *); @@ -512,7 +510,6 @@ struct cxgbi_device { unsigned int, int); void (*csk_release_offload_resources)(struct cxgbi_sock *); - int (*csk_rx_pdu_ready)(struct cxgbi_sock *, struct sk_buff *); u32 (*csk_send_rx_credits)(struct cxgbi_sock *, u32); int (*csk_push_tx_frames)(struct cxgbi_sock *, int); void (*csk_send_abort_req)(struct cxgbi_sock *); From e88ed594328900959f8aae72c2e6240703a91f33 Mon Sep 17 00:00:00 2001 From: Yan Zhen Date: Mon, 2 Sep 2024 09:33:03 +0800 Subject: [PATCH 272/655] scsi: fusion: mptctl: Use min() macro Using the real macro is usually more intuitive and readable when the original file is guaranteed to contain the minmax.h header file and compile correctly. Signed-off-by: Yan Zhen Link: https://lore.kernel.org/r/20240902013303.909316-1-yanzhen@vivo.com Signed-off-by: Martin K. Petersen --- drivers/message/fusion/mptctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c index 9f3999750c23..087397ccca59 100644 --- a/drivers/message/fusion/mptctl.c +++ b/drivers/message/fusion/mptctl.c @@ -1609,7 +1609,7 @@ mptctl_eventreport (MPT_ADAPTER *ioc, unsigned long arg) maxEvents = numBytes/sizeof(MPT_IOCTL_EVENTS); - max = MPTCTL_EVENT_LOG_SIZE < maxEvents ? MPTCTL_EVENT_LOG_SIZE : maxEvents; + max = min(maxEvents, MPTCTL_EVENT_LOG_SIZE); /* If fewer than 1 event is requested, there must have * been some type of error. From 57bada8a5e69e6cb0668436b206db1fbdbf7ebfd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Sep 2024 15:12:02 +0100 Subject: [PATCH 273/655] scsi: zalon: Remove trailing space after \n newline There is a extraneous space after a newline in a dev_printk message, remove it. Also fix non-tabbed indentation of the statement. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240902141202.308632-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/zalon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/zalon.c b/drivers/scsi/zalon.c index 22d412cab91d..15602ec862e3 100644 --- a/drivers/scsi/zalon.c +++ b/drivers/scsi/zalon.c @@ -139,7 +139,7 @@ zalon_probe(struct parisc_device *dev) return -ENODEV; if (request_irq(dev->irq, ncr53c8xx_intr, IRQF_SHARED, "zalon", host)) { - dev_printk(KERN_ERR, &dev->dev, "irq problem with %d, detaching\n ", + dev_printk(KERN_ERR, &dev->dev, "irq problem with %d, detaching\n", dev->irq); goto fail; } From 34f04a9b6e39d0085df5a2a5e6e4e1878d808132 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Sep 2024 15:15:37 +0100 Subject: [PATCH 274/655] scsi: pm8001: Remove trailing space after \n newline There is a extraneous space after a newline in a pm8001_dbg message. Remove it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240902141537.308914-1-colin.i.king@gmail.com Acked-by: Jack Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm80xx_hwi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 8fe886dc5e47..a9869cd8c4c0 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -2037,7 +2037,7 @@ mpi_ssp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) atomic_dec(&pm8001_dev->running_req); break; } - pm8001_dbg(pm8001_ha, IO, "scsi_status = 0x%x\n ", + pm8001_dbg(pm8001_ha, IO, "scsi_status = 0x%x\n", psspPayload->ssp_resp_iu.status); spin_lock_irqsave(&t->task_state_lock, flags); t->task_state_flags &= ~SAS_TASK_STATE_PENDING; From 571d81b482f00dfe8912ecbc3de090f99181ee7a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Sep 2024 15:22:52 +0100 Subject: [PATCH 275/655] scsi: megaraid_sas: Remove trailing space after \n newline There is a extraneous space after a newline in a dev_err message. Remove it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240902142252.309232-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 6c79c350a4d5..4ecf5284c0fc 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -6380,7 +6380,7 @@ static int megasas_init_fw(struct megasas_instance *instance) GFP_KERNEL); if (!fusion->stream_detect_by_ld[i]) { dev_err(&instance->pdev->dev, - "unable to allocate stream detect by LD\n "); + "unable to allocate stream detect by LD\n"); for (j = 0; j < i; ++j) kfree(fusion->stream_detect_by_ld[j]); kfree(fusion->stream_detect_by_ld); From d2ce0e5ab505ddaf153c9c20b4f627f0ed034a1e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Sep 2024 15:41:53 +0100 Subject: [PATCH 276/655] scsi: hisi_sas: Remove trailing space after \n newline There is a extraneous space after a newline in a dev_info message. Remove it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240902144153.309920-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index feda9b54b443..4cd3a3eab6f1 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2421,7 +2421,7 @@ static void slot_complete_v3_hw(struct hisi_hba *hisi_hba, spin_lock_irqsave(&device->done_lock, flags); if (test_bit(SAS_HA_FROZEN, &ha->state)) { spin_unlock_irqrestore(&device->done_lock, flags); - dev_info(dev, "slot complete: task(%pK) ignored\n ", + dev_info(dev, "slot complete: task(%pK) ignored\n", task); return; } From fa557da6b05034538ea4ecbf55bac4c23d391e2d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Sep 2024 15:51:38 +0100 Subject: [PATCH 277/655] scsi: qedf: Remove trailing space after \n newline There is a extraneous space after a newline in a QEDF_INFO message. Remove it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240902145138.310883-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 054a51713d55..fcfc3bed02c6 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -310,7 +310,7 @@ struct qedf_ioreq *qedf_alloc_cmd(struct qedf_rport *fcport, u8 cmd_type) if (!free_sqes) { QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, - "Returning NULL, free_sqes=%d.\n ", + "Returning NULL, free_sqes=%d.\n", free_sqes); goto out_failed; } From c7c846fa94c9f71c4cfab3f15cffc5030cd01e39 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Sep 2024 16:00:42 +0100 Subject: [PATCH 278/655] scsi: lpfc: Remove trailing space after \n newline There is a extraneous space after a newline in two lpfc_printf_log() messages. Remove the space. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240902150042.311157-1-colin.i.king@gmail.com Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 332b8d2348e9..0e60eebe53b5 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -8818,7 +8818,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) rc = lpfc_sli4_queue_setup(phba); if (unlikely(rc)) { lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, - "0381 Error %d during queue setup.\n ", rc); + "0381 Error %d during queue setup.\n", rc); goto out_stop_timers; } /* Initialize the driver internal SLI layer lists. */ @@ -21149,7 +21149,7 @@ lpfc_drain_txq(struct lpfc_hba *phba) if (!piocbq) { spin_unlock_irqrestore(&pring->ring_lock, iflags); lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, - "2823 txq empty and txq_cnt is %d\n ", + "2823 txq empty and txq_cnt is %d\n", txq_cnt); break; } From 0557f49870714c8c8cddfdc3c4b805aeae6c3e4e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Sep 2024 18:27:08 +0100 Subject: [PATCH 279/655] scsi: mpt3sas: Remove trailing space after \n newline There is a extraneous space after a newline in an ioc_info message. Remove it and join to split literal strings into one. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240902172708.369741-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 2d3eeda5a6a0..fc013d7e3f2d 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -8882,9 +8882,8 @@ _base_check_ioc_facts_changes(struct MPT3SAS_ADAPTER *ioc) ioc->device_remove_in_progress, pd_handles_sz, GFP_KERNEL); if (!device_remove_in_progress) { ioc_info(ioc, - "Unable to allocate the memory for " - "device_remove_in_progress of sz: %d\n " - , pd_handles_sz); + "Unable to allocate the memory for device_remove_in_progress of sz: %d\n", + pd_handles_sz); return -ENOMEM; } memset(device_remove_in_progress + From 0c40f079f1c808e7e480c795a79009f200366eb1 Mon Sep 17 00:00:00 2001 From: Manish Pandey Date: Tue, 3 Sep 2024 12:07:09 +0530 Subject: [PATCH 280/655] scsi: ufs: qcom: Update MODE_MAX cfg_bw value Commit 8db8f6ce556a ("scsi: ufs: qcom: Add missing interconnect bandwidth values for Gear 5") updated the ufs_qcom_bw_table for Gear 5. However, it missed updating the cfg_bw value for the max mode. Hence update the cfg_bw value for the max mode for UFS 4.x devices. Fixes: 8db8f6ce556a ("scsi: ufs: qcom: Add missing interconnect bandwidth values for Gear 5") Cc: stable@vger.kernel.org Signed-off-by: Manish Pandey Link: https://lore.kernel.org/r/20240903063709.4335-1-quic_mapa@quicinc.com Reviewed-by: Manivannan Sadhasivam Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-qcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 810e637047d0..f994cea05617 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -93,7 +93,7 @@ static const struct __ufs_qcom_bw_table { [MODE_HS_RB][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 }, [MODE_HS_RB][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 }, [MODE_HS_RB][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 }, - [MODE_MAX][0][0] = { 7643136, 307200 }, + [MODE_MAX][0][0] = { 7643136, 819200 }, }; static void ufs_qcom_get_default_testbus_cfg(struct ufs_qcom_host *host); From 24d7071d964574cca41fa72a10c211221af37aec Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Tue, 3 Sep 2024 16:47:29 +0200 Subject: [PATCH 281/655] scsi: mpi3mr: A performance fix Commit 0c52310f2600 ("hrtimer: Ignore slack time for RT tasks in schedule_hrtimeout_range()") effectivelly shortens a sleep in a polling function in the driver. That is causing a performance regression as the new value of just 2us is too low, in certain tests the perf drop is ~30%. Fix this by adjusting the sleep to 20us (close to the previous value). Reported-by: Jan Jurca Signed-off-by: Tomas Henzl Acked-by: Sumit Saxena Link: https://lore.kernel.org/r/20240903144729.37218-1-thenzl@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr.h | 2 +- drivers/scsi/mpi3mr/mpi3mr_fw.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index 1dc640de3efc..0f9101a17ce9 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -178,7 +178,7 @@ extern atomic64_t event_counter; #define MPI3MR_DEFAULT_SDEV_QD 32 /* Definitions for Threaded IRQ poll*/ -#define MPI3MR_IRQ_POLL_SLEEP 2 +#define MPI3MR_IRQ_POLL_SLEEP 20 #define MPI3MR_IRQ_POLL_TRIGGER_IOCOUNT 8 /* Definitions for the controller security status*/ diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 2e1a92d306b2..35ca54ba18e9 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -728,7 +728,7 @@ static irqreturn_t mpi3mr_isr_poll(int irq, void *privdata) mpi3mr_process_op_reply_q(mrioc, intr_info->op_reply_q); - usleep_range(MPI3MR_IRQ_POLL_SLEEP, 10 * MPI3MR_IRQ_POLL_SLEEP); + usleep_range(MPI3MR_IRQ_POLL_SLEEP, MPI3MR_IRQ_POLL_SLEEP + 1); } while (atomic_read(&intr_info->op_reply_q->pend_ios) && (num_op_reply < mrioc->max_host_ios)); From a8598aefae31f3bf27bad1a4cebf2b04a4cdf220 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 4 Sep 2024 14:03:04 -0700 Subject: [PATCH 282/655] scsi: sd: Retry START STOP UNIT commands During system resume, sd_start_stop_device() submits a START STOP UNIT command to the SCSI device that is being resumed. That command is not retried in case of a unit attention and hence may fail. An example: [16575.983359] sd 0:0:0:3: [sdd] Starting disk [16575.983693] sd 0:0:0:3: [sdd] Start/Stop Unit failed: Result: hostbyte=0x00 driverbyte=DRIVER_OK [16575.983712] sd 0:0:0:3: [sdd] Sense Key : 0x6 [16575.983730] sd 0:0:0:3: [sdd] ASC=0x29 ASCQ=0x0 [16575.983738] sd 0:0:0:3: PM: dpm_run_callback(): scsi_bus_resume+0x0/0xa0 returns -5 [16575.983783] sd 0:0:0:3: PM: failed to resume async: error -5 Make the SCSI core retry the START STOP UNIT command if the device reports that it has been powered on or that it has been reset. Cc: Damien Le Moal Cc: Mike Christie Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240904210304.2947789-1-bvanassche@acm.org Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b0e565402f6a..43a723164fc5 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4082,9 +4082,38 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start) { unsigned char cmd[6] = { START_STOP }; /* START_VALID */ struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + { + /* Power on, reset, or bus device reset occurred */ + .sense = UNIT_ATTENTION, + .asc = 0x29, + .ascq = 0, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + /* Power on occurred */ + .sense = UNIT_ATTENTION, + .asc = 0x29, + .ascq = 1, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + /* SCSI bus reset */ + .sense = UNIT_ATTENTION, + .asc = 0x29, + .ascq = 2, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .total_allowed = 3, + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .req_flags = BLK_MQ_REQ_PM, + .failures = &failures, }; struct scsi_device *sdp = sdkp->device; int res; From 4708c9332d975aabc8498ddb85936631535fdc20 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Thu, 5 Sep 2024 10:35:21 +0800 Subject: [PATCH 283/655] scsi: pmcraid: Convert comma to semicolon Replace comma between expressions with semicolons. Using a ',' in place of a ';' can have unintended side effects. Although that is not the case here, it is seems best to use ';' unless ',' is intended. Found by inspection. No functional change intended. Compile tested only. Signed-off-by: Chen Ni Link: https://lore.kernel.org/r/20240905023521.1642862-1-nichen@iscas.ac.cn Signed-off-by: Martin K. Petersen --- drivers/scsi/pmcraid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index a2a084c8075e..8f7307c4d876 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -1946,7 +1946,7 @@ static void pmcraid_soft_reset(struct pmcraid_cmd *cmd) } iowrite32(doorbell, pinstance->int_regs.host_ioa_interrupt_reg); - ioread32(pinstance->int_regs.host_ioa_interrupt_reg), + ioread32(pinstance->int_regs.host_ioa_interrupt_reg); int_reg = ioread32(pinstance->int_regs.ioa_host_interrupt_reg); pmcraid_info("Waiting for IOA to become operational %x:%x\n", From bba20b894e3c2e20f1ac914561b9ac241e0e359e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 7 Sep 2024 08:27:22 +0200 Subject: [PATCH 284/655] scsi: scsi_debug: Remove a useless memset() 'arr' is kzalloc()'ed, so there is no need to call memset(.., 0, ...) on it. It is already cleared. This is a follow up of commit b952eb270df3 ("scsi: scsi_debug: Allocate the MODE SENSE response from the heap"). Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/6296722174e39a51cac74b7fc68b0d75bd0db2a3.1725690433.git.christophe.jaillet@wanadoo.fr Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index a9d8a9c62663..d95f417e24c0 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -2760,7 +2760,6 @@ static int resp_mode_sense(struct scsi_cmnd *scp, else bd_len = 0; alloc_len = msense_6 ? cmd[4] : get_unaligned_be16(cmd + 7); - memset(arr, 0, SDEBUG_MAX_MSENSE_SZ); if (0x3 == pcontrol) { /* Saving values not supported */ mk_sense_buffer(scp, ILLEGAL_REQUEST, SAVING_PARAMS_UNSUP, 0); return check_condition_result; From a141c17a543332fc1238eb5cba562bfc66879126 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 12 Sep 2024 10:58:28 +0200 Subject: [PATCH 285/655] scsi: pm8001: Do not overwrite PCI queue mapping blk_mq_pci_map_queues() maps all queues but right after this, we overwrite these mappings by calling blk_mq_map_queues(). Just use one helper but not both. Fixes: 42f22fe36d51 ("scsi: pm8001: Expose hardware queues for pm80xx") Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20240912-do-not-overwrite-pci-mapping-v1-1-85724b6cec49@suse.de Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_init.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 1e63cb6cd8e3..33e1eba62ca1 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -100,10 +100,12 @@ static void pm8001_map_queues(struct Scsi_Host *shost) struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; - if (pm8001_ha->number_of_intr > 1) + if (pm8001_ha->number_of_intr > 1) { blk_mq_pci_map_queues(qmap, pm8001_ha->pdev, 1); + return; + } - return blk_mq_map_queues(qmap); + blk_mq_map_queues(qmap); } /* From f81eaf08385ddd474a2f41595a7757502870c0eb Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Thu, 12 Sep 2024 15:43:08 +0200 Subject: [PATCH 286/655] scsi: sd: Fix off-by-one error in sd_read_block_characteristics() Ff the device returns page 0xb1 with length 8 (happens with qemu v2.x, for example), sd_read_block_characteristics() may attempt an out-of-bounds memory access when accessing the zoned field at offset 8. Fixes: 7fb019c46eee ("scsi: sd: Switch to using scsi_device VPD pages") Cc: stable@vger.kernel.org Signed-off-by: Martin Wilck Link: https://lore.kernel.org/r/20240912134308.282824-1-mwilck@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 43a723164fc5..64aaf24c6398 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3402,7 +3402,7 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp, rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb1); - if (!vpd || vpd->len < 8) { + if (!vpd || vpd->len <= 8) { rcu_read_unlock(); return; } From 9634bb07083cfae38933d4e944709e06e4c30e74 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Thu, 5 Sep 2024 15:57:49 +0530 Subject: [PATCH 287/655] scsi: mpi3mr: Enhance the Enable Controller retry logic When enabling the IOC request and polling for controller ready status, poll for controller fault and reset history bit. If the controller is faulted or the reset history bit is set, retry the initialization a maximum of three times (2 retries) or if the cumulative time taken for all retries exceeds 510 seconds. Signed-off-by: Prayas Patel Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20240905102753.105310-2-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 2e1a92d306b2..f589df62ead3 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -1362,6 +1362,10 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc) int retval = 0; enum mpi3mr_iocstate ioc_state; u64 base_info; + u8 retry = 0; + u64 start_time, elapsed_time_sec; + +retry_bring_ioc_ready: ioc_status = readl(&mrioc->sysif_regs->ioc_status); ioc_config = readl(&mrioc->sysif_regs->ioc_configuration); @@ -1460,6 +1464,9 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc) ioc_config |= MPI3_SYSIF_IOC_CONFIG_ENABLE_IOC; writel(ioc_config, &mrioc->sysif_regs->ioc_configuration); + if (retry == 0) + start_time = jiffies; + timeout = mrioc->ready_timeout * 10; do { ioc_state = mpi3mr_get_iocstate(mrioc); @@ -1469,6 +1476,12 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc) mpi3mr_iocstate_name(ioc_state)); return 0; } + ioc_status = readl(&mrioc->sysif_regs->ioc_status); + if ((ioc_status & MPI3_SYSIF_IOC_STATUS_RESET_HISTORY) || + (ioc_status & MPI3_SYSIF_IOC_STATUS_FAULT)) { + mpi3mr_print_fault_info(mrioc); + goto out_failed; + } if (!pci_device_is_present(mrioc->pdev)) { mrioc->unrecoverable = 1; ioc_err(mrioc, @@ -1477,9 +1490,19 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc) goto out_device_not_present; } msleep(100); - } while (--timeout); + elapsed_time_sec = jiffies_to_msecs(jiffies - start_time)/1000; + } while (elapsed_time_sec < mrioc->ready_timeout); out_failed: + elapsed_time_sec = jiffies_to_msecs(jiffies - start_time)/1000; + if ((retry < 2) && (elapsed_time_sec < (mrioc->ready_timeout - 60))) { + retry++; + + ioc_warn(mrioc, "retrying to bring IOC ready, retry_count:%d\n" + " elapsed time =%llu\n", retry, elapsed_time_sec); + + goto retry_bring_ioc_ready; + } ioc_state = mpi3mr_get_iocstate(mrioc); ioc_err(mrioc, "failed to bring to ready state, current state: %s\n", From fc1ddda330941b8a1571368bcbade16d377a5eaa Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Thu, 5 Sep 2024 15:57:50 +0530 Subject: [PATCH 288/655] scsi: mpi3mr: Use firmware-provided timestamp update interval Make driver use the timestamp update interval value provided by firmware in the driver page 1. If firmware fails to provide non-zero value, then the driver will fall back to the driver defined macro. Signed-off-by: Prayas Patel Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20240905102753.105310-3-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h | 1 + drivers/scsi/mpi3mr/mpi3mr.h | 4 +++- drivers/scsi/mpi3mr/mpi3mr_fw.c | 27 ++++++++++++++++++++++++++- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h b/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h index 4b7a8f6314a3..b46bd08eac99 100644 --- a/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h +++ b/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h @@ -1327,6 +1327,7 @@ struct mpi3_driver_page0 { struct mpi3_driver_page1 { struct mpi3_config_page_header header; __le32 flags; + u8 time_stamp_update; __le32 reserved0c; __le16 host_diag_trace_max_size; __le16 host_diag_trace_min_size; diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index 1dc640de3efc..4b5f43af9f60 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -1090,6 +1090,7 @@ struct scmd_priv { * @evtack_cmds_bitmap: Event Ack bitmap * @delayed_evtack_cmds_list: Delayed event acknowledgment list * @ts_update_counter: Timestamp update counter + * @ts_update_interval: Timestamp update interval * @reset_in_progress: Reset in progress flag * @unrecoverable: Controller unrecoverable flag * @prev_reset_result: Result of previous reset @@ -1277,7 +1278,8 @@ struct mpi3mr_ioc { unsigned long *evtack_cmds_bitmap; struct list_head delayed_evtack_cmds_list; - u32 ts_update_counter; + u16 ts_update_counter; + u16 ts_update_interval; u8 reset_in_progress; u8 unrecoverable; int prev_reset_result; diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index f589df62ead3..ddb770fae9d3 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -2694,7 +2694,7 @@ static void mpi3mr_watchdog_work(struct work_struct *work) return; } - if (mrioc->ts_update_counter++ >= MPI3MR_TSUPDATE_INTERVAL) { + if (mrioc->ts_update_counter++ >= mrioc->ts_update_interval) { mrioc->ts_update_counter = 0; mpi3mr_sync_timestamp(mrioc); } @@ -3867,6 +3867,29 @@ static int mpi3mr_repost_diag_bufs(struct mpi3mr_ioc *mrioc) return retval; } +/** + * mpi3mr_read_tsu_interval - Update time stamp interval + * @mrioc: Adapter instance reference + * + * Update time stamp interval if its defined in driver page 1, + * otherwise use default value. + * + * Return: Nothing + */ +static void +mpi3mr_read_tsu_interval(struct mpi3mr_ioc *mrioc) +{ + struct mpi3_driver_page1 driver_pg1; + u16 pg_sz = sizeof(driver_pg1); + int retval = 0; + + mrioc->ts_update_interval = MPI3MR_TSUPDATE_INTERVAL; + + retval = mpi3mr_cfg_get_driver_pg1(mrioc, &driver_pg1, pg_sz); + if (!retval && driver_pg1.time_stamp_update) + mrioc->ts_update_interval = (driver_pg1.time_stamp_update * 60); +} + /** * mpi3mr_print_ioc_info - Display controller information * @mrioc: Adapter instance reference @@ -4163,6 +4186,7 @@ int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc) goto out_failed_noretry; } + mpi3mr_read_tsu_interval(mrioc); mpi3mr_print_ioc_info(mrioc); if (!mrioc->cfg_page) { @@ -4344,6 +4368,7 @@ int mpi3mr_reinit_ioc(struct mpi3mr_ioc *mrioc, u8 is_resume) goto out_failed_noretry; } + mpi3mr_read_tsu_interval(mrioc); mpi3mr_print_ioc_info(mrioc); if (is_resume) { From 6e4c825f267ed151596fe2554cfa457b9aaa7edf Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Thu, 5 Sep 2024 15:57:51 +0530 Subject: [PATCH 289/655] scsi: mpi3mr: Update MPI Headers to revision 34 Update MPI Headers to revision 34. Signed-off-by: Prayas Patel Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20240905102753.105310-4-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h | 34 +++++++++++++++++++++-- drivers/scsi/mpi3mr/mpi/mpi30_image.h | 13 +++++++-- drivers/scsi/mpi3mr/mpi/mpi30_ioc.h | 8 ++++++ drivers/scsi/mpi3mr/mpi/mpi30_transport.h | 4 ++- 4 files changed, 52 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h b/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h index b46bd08eac99..00cd18edfad6 100644 --- a/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h +++ b/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h @@ -67,6 +67,7 @@ #define MPI3_SECURITY_PGAD_SLOT_GROUP_MASK (0x0000ff00) #define MPI3_SECURITY_PGAD_SLOT_GROUP_SHIFT (8) #define MPI3_SECURITY_PGAD_SLOT_MASK (0x000000ff) +#define MPI3_INSTANCE_PGAD_INSTANCE_MASK (0x0000ffff) struct mpi3_config_request { __le16 host_tag; u8 ioc_use_only02; @@ -75,7 +76,8 @@ struct mpi3_config_request { u8 ioc_use_only06; u8 msg_flags; __le16 change_count; - __le16 reserved0a; + u8 proxy_ioc_number; + u8 reserved0b; u8 page_version; u8 page_number; u8 page_type; @@ -206,6 +208,9 @@ struct mpi3_config_page_header { #define MPI3_MFGPAGE_DEVID_SAS5116_MPI_MGMT (0x00b5) #define MPI3_MFGPAGE_DEVID_SAS5116_NVME_MGMT (0x00b6) #define MPI3_MFGPAGE_DEVID_SAS5116_PCIE_SWITCH (0x00b8) +#define MPI3_MFGPAGE_DEVID_SAS5248_MPI (0x00f0) +#define MPI3_MFGPAGE_DEVID_SAS5248_MPI_NS (0x00f1) +#define MPI3_MFGPAGE_DEVID_SAS5248_PCIE_SWITCH (0x00f2) struct mpi3_man_page0 { struct mpi3_config_page_header header; u8 chip_revision[8]; @@ -1074,6 +1079,8 @@ struct mpi3_io_unit_page8 { #define MPI3_IOUNIT8_SBSTATE_SVN_UPDATE_PENDING (0x04) #define MPI3_IOUNIT8_SBSTATE_KEY_UPDATE_PENDING (0x02) #define MPI3_IOUNIT8_SBSTATE_SECURE_BOOT_ENABLED (0x01) +#define MPI3_IOUNIT8_SBMODE_CURRENT_KEY_IOUNIT17 (0x10) +#define MPI3_IOUNIT8_SBMODE_HARD_SECURE_RECERTIFIED (0x08) struct mpi3_io_unit_page9 { struct mpi3_config_page_header header; __le32 flags; @@ -1089,6 +1096,8 @@ struct mpi3_io_unit_page9 { #define MPI3_IOUNIT9_FLAGS_UBM_ENCLOSURE_ORDER_BACKPLANE_TYPE (0x00000004) #define MPI3_IOUNIT9_FLAGS_VDFIRST_ENABLED (0x00000001) #define MPI3_IOUNIT9_FIRSTDEVICE_UNKNOWN (0xffff) +#define MPI3_IOUNIT9_FIRSTDEVICE_IN_DRIVER_PAGE_0 (0xfffe) + struct mpi3_io_unit_page10 { struct mpi3_config_page_header header; u8 flags; @@ -1224,6 +1233,19 @@ struct mpi3_io_unit_page15 { #define MPI3_IOUNIT15_FLAGS_EPRSUPPORT_WITHOUT_POWER_BRAKE_GPIO (0x01) #define MPI3_IOUNIT15_FLAGS_EPRSUPPORT_WITH_POWER_BRAKE_GPIO (0x02) #define MPI3_IOUNIT15_NUMPOWERBUDGETDATA_POWER_BUDGETING_DISABLED (0x00) + +struct mpi3_io_unit_page17 { + struct mpi3_config_page_header header; + u8 num_instances; + u8 instance; + __le16 reserved0a; + __le32 reserved0c[4]; + __le16 key_length; + u8 encryption_algorithm; + u8 reserved1f; + __le32 current_key[]; +}; +#define MPI3_IOUNIT17_PAGEVERSION (0x00) struct mpi3_ioc_page0 { struct mpi3_config_page_header header; __le32 reserved08; @@ -1311,7 +1333,7 @@ struct mpi3_driver_page0 { u8 tur_interval; u8 reserved10; u8 security_key_timeout; - __le16 reserved12; + __le16 first_device; __le32 reserved14; __le32 reserved18; }; @@ -1324,11 +1346,13 @@ struct mpi3_driver_page0 { #define MPI3_DRIVER0_BSDOPTS_REGISTRATION_IOC_AND_DEVS (0x00000000) #define MPI3_DRIVER0_BSDOPTS_REGISTRATION_IOC_ONLY (0x00000001) #define MPI3_DRIVER0_BSDOPTS_REGISTRATION_IOC_AND_INTERNAL_DEVS (0x00000002) +#define MPI3_DRIVER0_FIRSTDEVICE_IGNORE1 (0x0000) +#define MPI3_DRIVER0_FIRSTDEVICE_IGNORE2 (0xffff) struct mpi3_driver_page1 { struct mpi3_config_page_header header; __le32 flags; u8 time_stamp_update; - __le32 reserved0c; + u8 reserved0d[3]; __le16 host_diag_trace_max_size; __le16 host_diag_trace_min_size; __le16 host_diag_trace_decrement_size; @@ -2348,6 +2372,10 @@ struct mpi3_device0_vd_format { #define MPI3_DEVICE0_VD_DEVICE_INFO_SAS (0x0001) #define MPI3_DEVICE0_VD_FLAGS_IO_THROTTLE_GROUP_QD_MASK (0xf000) #define MPI3_DEVICE0_VD_FLAGS_IO_THROTTLE_GROUP_QD_SHIFT (12) +#define MPI3_DEVICE0_VD_FLAGS_OSEXPOSURE_MASK (0x0003) +#define MPI3_DEVICE0_VD_FLAGS_OSEXPOSURE_HDD (0x0000) +#define MPI3_DEVICE0_VD_FLAGS_OSEXPOSURE_SSD (0x0001) +#define MPI3_DEVICE0_VD_FLAGS_OSEXPOSURE_NO_GUIDANCE (0x0002) union mpi3_device0_dev_spec_format { struct mpi3_device0_sas_sata_format sas_sata_format; struct mpi3_device0_pcie_format pcie_format; diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_image.h b/drivers/scsi/mpi3mr/mpi/mpi30_image.h index 7df242190135..2c6e548cbd0f 100644 --- a/drivers/scsi/mpi3mr/mpi/mpi30_image.h +++ b/drivers/scsi/mpi3mr/mpi/mpi30_image.h @@ -205,13 +205,14 @@ struct mpi3_encrypted_hash_entry { u8 hash_image_type; u8 hash_algorithm; u8 encryption_algorithm; - u8 reserved03; + u8 flags; __le16 public_key_size; __le16 signature_size; __le32 public_key[MPI3_PUBLIC_KEY_MAX]; }; - -#define MPI3_HASH_IMAGE_TYPE_KEY_WITH_SIGNATURE (0x03) +#define MPI3_HASH_IMAGE_TYPE_KEY_WITH_HASH (0x03) +#define MPI3_HASH_IMAGE_TYPE_KEY_WITH_HASH_1_OF_2 (0x04) +#define MPI3_HASH_IMAGE_TYPE_KEY_WITH_HASH_2_OF_2 (0x05) #define MPI3_HASH_ALGORITHM_VERSION_MASK (0xe0) #define MPI3_HASH_ALGORITHM_VERSION_NONE (0x00) #define MPI3_HASH_ALGORITHM_VERSION_SHA1 (0x20) @@ -230,6 +231,12 @@ struct mpi3_encrypted_hash_entry { #define MPI3_ENCRYPTION_ALGORITHM_RSA4096 (0x05) #define MPI3_ENCRYPTION_ALGORITHM_RSA3072 (0x06) +/* hierarchical signature system (hss) */ +#define MPI3_ENCRYPTION_ALGORITHM_ML_DSA_87 (0x0b) +#define MPI3_ENCRYPTION_ALGORITHM_ML_DSA_65 (0x0c) +#define MPI3_ENCRYPTION_ALGORITHM_ML_DSA_44 (0x0d) +#define MPI3_ENCRYPTED_HASH_ENTRY_FLAGS_PAIRED_KEY_MASK (0x0f) + #ifndef MPI3_ENCRYPTED_HASH_ENTRY_MAX #define MPI3_ENCRYPTED_HASH_ENTRY_MAX (1) #endif diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_ioc.h b/drivers/scsi/mpi3mr/mpi/mpi30_ioc.h index c9fa0d69b75f..c374867f9ba0 100644 --- a/drivers/scsi/mpi3mr/mpi/mpi30_ioc.h +++ b/drivers/scsi/mpi3mr/mpi/mpi30_ioc.h @@ -39,6 +39,12 @@ struct mpi3_ioc_init_request { #define MPI3_WHOINIT_HOST_DRIVER (0x03) #define MPI3_WHOINIT_MANUFACTURER (0x04) +#define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_MASK (0x00000003) +#define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_NO_GUIDANCE (0x00000000) +#define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_NO_SPECIAL (0x00000001) +#define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_REPORT_AS_HDD (0x00000002) +#define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_REPORT_AS_SSD (0x00000003) + struct mpi3_ioc_facts_request { __le16 host_tag; u8 ioc_use_only02; @@ -140,6 +146,8 @@ struct mpi3_ioc_facts_data { #define MPI3_IOCFACTS_EXCEPT_MANUFACT_CHECKSUM_FAIL (0x0020) #define MPI3_IOCFACTS_EXCEPT_FW_CHECKSUM_FAIL (0x0010) #define MPI3_IOCFACTS_EXCEPT_CONFIG_CHECKSUM_FAIL (0x0008) +#define MPI3_IOCFACTS_EXCEPT_BLOCKING_BOOT_EVENT (0x0004) +#define MPI3_IOCFACTS_EXCEPT_SECURITY_SELFTEST_FAILURE (0x0002) #define MPI3_IOCFACTS_EXCEPT_BOOTSTAT_MASK (0x0001) #define MPI3_IOCFACTS_EXCEPT_BOOTSTAT_PRIMARY (0x0000) #define MPI3_IOCFACTS_EXCEPT_BOOTSTAT_SECONDARY (0x0001) diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_transport.h b/drivers/scsi/mpi3mr/mpi/mpi30_transport.h index fdc3d1968e43..b2ab25a1cfeb 100644 --- a/drivers/scsi/mpi3mr/mpi/mpi30_transport.h +++ b/drivers/scsi/mpi3mr/mpi/mpi30_transport.h @@ -18,7 +18,7 @@ union mpi3_version_union { #define MPI3_VERSION_MAJOR (3) #define MPI3_VERSION_MINOR (0) -#define MPI3_VERSION_UNIT (31) +#define MPI3_VERSION_UNIT (34) #define MPI3_VERSION_DEV (0) #define MPI3_DEVHANDLE_INVALID (0xffff) struct mpi3_sysif_oper_queue_indexes { @@ -158,6 +158,7 @@ struct mpi3_sysif_registers { #define MPI3_SYSIF_FAULT_CODE_SOFT_RESET_NEEDED (0x0000f004) #define MPI3_SYSIF_FAULT_CODE_POWER_CYCLE_REQUIRED (0x0000f005) #define MPI3_SYSIF_FAULT_CODE_TEMP_THRESHOLD_EXCEEDED (0x0000f006) +#define MPI3_SYSIF_FAULT_CODE_INSUFFICIENT_PCI_SLOT_POWER (0x0000f007) #define MPI3_SYSIF_FAULT_INFO0_OFFSET (0x00001c14) #define MPI3_SYSIF_FAULT_INFO1_OFFSET (0x00001c18) #define MPI3_SYSIF_FAULT_INFO2_OFFSET (0x00001c1c) @@ -410,6 +411,7 @@ struct mpi3_default_reply { #define MPI3_IOCSTATUS_INSUFFICIENT_RESOURCES (0x0006) #define MPI3_IOCSTATUS_INVALID_FIELD (0x0007) #define MPI3_IOCSTATUS_INVALID_STATE (0x0008) +#define MPI3_IOCSTATUS_SHUTDOWN_ACTIVE (0x0009) #define MPI3_IOCSTATUS_INSUFFICIENT_POWER (0x000a) #define MPI3_IOCSTATUS_INVALID_CHANGE_COUNT (0x000b) #define MPI3_IOCSTATUS_ALLOWED_CMD_BLOCK (0x000c) From 4616a4b3cb8aa736882f8dc392cf146365ead2a4 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Thu, 5 Sep 2024 15:57:52 +0530 Subject: [PATCH 290/655] scsi: mpi3mr: Improve wait logic while controller transitions to READY state During controller transitioning to READY state, if the controller is found in transient states ("becoming ready" or "reset requested"), driver waits for 510 secs even if the controller transitions out of these states early. This causes an unnecessary delay of 510 secs in the overall firmware initialization sequence. Poll the controller state periodically (every 100 milliseconds) while waiting for the controller to come out of the mentioned transient states. Once the controller transits out of the transient states, come out of the wait loop. Signed-off-by: Sumit Saxena Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20240905102753.105310-5-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index ddb770fae9d3..86f7136cd5ae 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -1384,26 +1384,23 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc) ioc_info(mrioc, "controller is in %s state during detection\n", mpi3mr_iocstate_name(ioc_state)); - if (ioc_state == MRIOC_STATE_BECOMING_READY || - ioc_state == MRIOC_STATE_RESET_REQUESTED) { - timeout = mrioc->ready_timeout * 10; - do { - msleep(100); - } while (--timeout); + timeout = mrioc->ready_timeout * 10; + + do { + ioc_state = mpi3mr_get_iocstate(mrioc); + + if (ioc_state != MRIOC_STATE_BECOMING_READY && + ioc_state != MRIOC_STATE_RESET_REQUESTED) + break; if (!pci_device_is_present(mrioc->pdev)) { mrioc->unrecoverable = 1; - ioc_err(mrioc, - "controller is not present while waiting to reset\n"); - retval = -1; + ioc_err(mrioc, "controller is not present while waiting to reset\n"); goto out_device_not_present; } - ioc_state = mpi3mr_get_iocstate(mrioc); - ioc_info(mrioc, - "controller is in %s state after waiting to reset\n", - mpi3mr_iocstate_name(ioc_state)); - } + msleep(100); + } while (--timeout); if (ioc_state == MRIOC_STATE_READY) { ioc_info(mrioc, "issuing message unit reset (MUR) to bring to reset state\n"); From e7d67f3f9f9c89726c2ebcf5fce48db7798a49df Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Thu, 5 Sep 2024 15:57:53 +0530 Subject: [PATCH 291/655] scsi: mpi3mr: Update driver version to 8.12.0.0.50 Update driver version to 8.12.0.0.50. Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20240905102753.105310-6-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index 4b5f43af9f60..856c0928b13b 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -57,8 +57,8 @@ extern struct list_head mrioc_list; extern int prot_mask; extern atomic64_t event_counter; -#define MPI3MR_DRIVER_VERSION "8.10.0.5.50" -#define MPI3MR_DRIVER_RELDATE "08-Aug-2024" +#define MPI3MR_DRIVER_VERSION "8.12.0.0.50" +#define MPI3MR_DRIVER_RELDATE "05-Sept-2024" #define MPI3MR_DRIVER_NAME "mpi3mr" #define MPI3MR_DRIVER_LICENSE "GPL" From 93bcc5f3984bf4f51da1529700aec351872dbfff Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Thu, 12 Sep 2024 16:24:40 -0700 Subject: [PATCH 292/655] scsi: lpfc: Add ELS_RSP cmd to the list of WQEs to flush in lpfc_els_flush_cmd() During HBA stress testing, a spam of received PLOGIs exposes a resource recovery bug causing leakage of lpfc_sqlq entries from the global phba->sli4_hba.lpfc_els_sgl_list. The issue is in lpfc_els_flush_cmd(), where the driver attempts to recover outstanding ELS sgls when walking the txcmplq. Only CMD_ELS_REQUEST64_CRs and CMD_GEN_REQUEST64_CRs are added to the abort and cancel lists. A check for CMD_XMIT_ELS_RSP64_WQE is missing in order to recover LS_ACC usages of the phba->sli4_hba.lpfc_els_sgl_list too. Fix by adding CMD_XMIT_ELS_RSP64_WQE as part of the txcmplq walk when adding WQEs to the abort and cancel list in lpfc_els_flush_cmd(). Also, update naming convention from CRs to WQEs. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240912232447.45607-2-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index de0ec945d2f1..7219b1ada1ea 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -9658,11 +9658,12 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) if (piocb->cmd_flag & LPFC_DRIVER_ABORTED && !mbx_tmo_err) continue; - /* On the ELS ring we can have ELS_REQUESTs or - * GEN_REQUESTs waiting for a response. + /* On the ELS ring we can have ELS_REQUESTs, ELS_RSPs, + * or GEN_REQUESTs waiting for a CQE response. */ ulp_command = get_job_cmnd(phba, piocb); - if (ulp_command == CMD_ELS_REQUEST64_CR) { + if (ulp_command == CMD_ELS_REQUEST64_WQE || + ulp_command == CMD_XMIT_ELS_RSP64_WQE) { list_add_tail(&piocb->dlist, &abort_list); /* If the link is down when flushing ELS commands From fc318cac66ac50398f9fc7cbec7b339e6d08a7e6 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Thu, 12 Sep 2024 16:24:41 -0700 Subject: [PATCH 293/655] scsi: lpfc: Update phba link state conditional before sending CMF_SYNC_WQE It's possible for the driver to send a CMF_SYNC_WQE to nonresponsive firmware during reset of the adapter. The phba link_state conditional check is currently a strict == LPFC_LINK_DOWN, which does not cover initialization states before reaching the LPFC_LINK_UP state. Update the phba->link_state conditional to < LPFC_LINK_UP so that all initialization states are covered before allowing sending CMF_SYNC_WQE. Update taking of the hbalock to be during this link_state check as well. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240912232447.45607-3-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 332b8d2348e9..bb5fd3322273 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1940,12 +1940,15 @@ lpfc_issue_cmf_sync_wqe(struct lpfc_hba *phba, u32 ms, u64 total) atot = atomic_xchg(&phba->cgn_sync_alarm_cnt, 0); wtot = atomic_xchg(&phba->cgn_sync_warn_cnt, 0); + spin_lock_irqsave(&phba->hbalock, iflags); + /* ONLY Managed mode will send the CMF_SYNC_WQE to the HBA */ if (phba->cmf_active_mode != LPFC_CFG_MANAGED || - phba->link_state == LPFC_LINK_DOWN) - return 0; + phba->link_state < LPFC_LINK_UP) { + ret_val = 0; + goto out_unlock; + } - spin_lock_irqsave(&phba->hbalock, iflags); sync_buf = __lpfc_sli_get_iocbq(phba); if (!sync_buf) { lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT, From 05ab4e7846f1103377133c00295a9a910cc6dfc2 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Thu, 12 Sep 2024 16:24:42 -0700 Subject: [PATCH 294/655] scsi: lpfc: Restrict support for 32 byte CDBs to specific HBAs An older generation of HBAs are failing FCP discovery due to usage of an outdated field in FCP command WQEs. Fix by checking the SLI Interface Type register for applicable support of 32 Byte CDB commands, and restore a setting for a WQE path using normal 16 byte CDBs. Fixes: af20bb73ac25 ("scsi: lpfc: Add support for 32 byte CDBs") Cc: stable@vger.kernel.org # v6.10+ Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240912232447.45607-4-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hw4.h | 3 +++ drivers/scsi/lpfc/lpfc_init.c | 21 ++++++++++++++++++--- drivers/scsi/lpfc/lpfc_scsi.c | 2 +- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 500253007b1d..26e1313ebb21 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -4847,6 +4847,7 @@ struct fcp_iwrite64_wqe { #define cmd_buff_len_SHIFT 16 #define cmd_buff_len_MASK 0x00000ffff #define cmd_buff_len_WORD word3 +/* Note: payload_offset_len field depends on ASIC support */ #define payload_offset_len_SHIFT 0 #define payload_offset_len_MASK 0x0000ffff #define payload_offset_len_WORD word3 @@ -4863,6 +4864,7 @@ struct fcp_iread64_wqe { #define cmd_buff_len_SHIFT 16 #define cmd_buff_len_MASK 0x00000ffff #define cmd_buff_len_WORD word3 +/* Note: payload_offset_len field depends on ASIC support */ #define payload_offset_len_SHIFT 0 #define payload_offset_len_MASK 0x0000ffff #define payload_offset_len_WORD word3 @@ -4879,6 +4881,7 @@ struct fcp_icmnd64_wqe { #define cmd_buff_len_SHIFT 16 #define cmd_buff_len_MASK 0x00000ffff #define cmd_buff_len_WORD word3 +/* Note: payload_offset_len field depends on ASIC support */ #define payload_offset_len_SHIFT 0 #define payload_offset_len_MASK 0x0000ffff #define payload_offset_len_WORD word3 diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index e1dfa96c2a55..0c1404dc5f3b 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -4699,6 +4699,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) uint64_t wwn; bool use_no_reset_hba = false; int rc; + u8 if_type; if (lpfc_no_hba_reset_cnt) { if (phba->sli_rev < LPFC_SLI_REV4 && @@ -4773,10 +4774,24 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) shost->max_id = LPFC_MAX_TARGET; shost->max_lun = vport->cfg_max_luns; shost->this_id = -1; - if (phba->sli_rev == LPFC_SLI_REV4) - shost->max_cmd_len = LPFC_FCP_CDB_LEN_32; - else + + /* Set max_cmd_len applicable to ASIC support */ + if (phba->sli_rev == LPFC_SLI_REV4) { + if_type = bf_get(lpfc_sli_intf_if_type, + &phba->sli4_hba.sli_intf); + switch (if_type) { + case LPFC_SLI_INTF_IF_TYPE_2: + fallthrough; + case LPFC_SLI_INTF_IF_TYPE_6: + shost->max_cmd_len = LPFC_FCP_CDB_LEN_32; + break; + default: + shost->max_cmd_len = LPFC_FCP_CDB_LEN; + break; + } + } else { shost->max_cmd_len = LPFC_FCP_CDB_LEN; + } if (phba->sli_rev == LPFC_SLI_REV4) { if (!phba->cfg_fcp_mq_threshold || diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 60cd60ebff38..0eaede8275da 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -4760,7 +4760,7 @@ static int lpfc_scsi_prep_cmnd_buf_s4(struct lpfc_vport *vport, /* Word 3 */ bf_set(payload_offset_len, &wqe->fcp_icmd, - sizeof(struct fcp_cmnd32) + sizeof(struct fcp_rsp)); + sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp)); /* Word 6 */ bf_set(wqe_ctxt_tag, &wqe->generic.wqe_com, From d1a2ef63fc8b3e3dc5b74b7e08636896b32acbdc Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Thu, 12 Sep 2024 16:24:43 -0700 Subject: [PATCH 295/655] scsi: lpfc: Fix kref imbalance on fabric ndlps from dev_loss_tmo handler With a FLOGI outstanding and loss of physical link connection to the fabric for the duration of dev_loss_tmo, there is a fabric ndlp kref imbalance that decrements the kref and sets the NLP_IN_RECOV_POST_DEV_LOSS flag at the same time. The issue is that when the FLOGI completion routine executes, the fabric ndlp could already be freed because of the final kref put from the dev_loss_tmo handler. Fix by early returning before the ndlp kref put if the ndlp is deemed a candidate for NLP_IN_RECOV_POST_DEV_LOSS in the FLOGI completion routine. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240912232447.45607-5-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 35c9181c6608..9241075f72fa 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -527,6 +527,9 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) * the following lpfc_nlp_put is necessary after fabric node is * recovered. */ + spin_lock_irqsave(&ndlp->lock, iflags); + ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS; + spin_unlock_irqrestore(&ndlp->lock, iflags); if (recovering) { lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY | LOG_NODE, @@ -539,6 +542,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) spin_lock_irqsave(&ndlp->lock, iflags); ndlp->save_flags |= NLP_IN_RECOV_POST_DEV_LOSS; spin_unlock_irqrestore(&ndlp->lock, iflags); + return fcf_inuse; } else if (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { /* Fabric node fully recovered before this dev_loss_tmo * queue work is processed. Thus, ignore the @@ -552,15 +556,9 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) ndlp->nlp_DID, kref_read(&ndlp->kref), ndlp, ndlp->nlp_flag, vport->port_state); - spin_lock_irqsave(&ndlp->lock, iflags); - ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS; - spin_unlock_irqrestore(&ndlp->lock, iflags); return fcf_inuse; } - spin_lock_irqsave(&ndlp->lock, iflags); - ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS; - spin_unlock_irqrestore(&ndlp->lock, iflags); lpfc_nlp_put(ndlp); return fcf_inuse; } From 0a3c84f71680684c1d41abb92db05f95c09111e8 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Thu, 12 Sep 2024 16:24:44 -0700 Subject: [PATCH 296/655] scsi: lpfc: Ensure DA_ID handling completion before deleting an NPIV instance Deleting an NPIV instance requires all fabric ndlps to be released before an NPIV's resources can be torn down. Failure to release fabric ndlps beforehand opens kref imbalance race conditions. Fix by forcing the DA_ID to complete synchronously with usage of wait_queue. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240912232447.45607-6-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_ct.c | 12 ++++++++++ drivers/scsi/lpfc/lpfc_disc.h | 7 ++++++ drivers/scsi/lpfc/lpfc_vport.c | 43 ++++++++++++++++++++++++++++------ 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 2dedd1493e5b..1e5db489a00c 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -1647,6 +1647,18 @@ lpfc_cmpl_ct(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, } out: + /* If the caller wanted a synchronous DA_ID completion, signal the + * wait obj and clear flag to reset the vport. + */ + if (ndlp->save_flags & NLP_WAIT_FOR_DA_ID) { + if (ndlp->da_id_waitq) + wake_up(ndlp->da_id_waitq); + } + + spin_lock_irq(&ndlp->lock); + ndlp->save_flags &= ~NLP_WAIT_FOR_DA_ID; + spin_unlock_irq(&ndlp->lock); + lpfc_ct_free_iocb(phba, cmdiocb); lpfc_nlp_put(ndlp); return; diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h index f82615d87c4b..f5ae8cc15820 100644 --- a/drivers/scsi/lpfc/lpfc_disc.h +++ b/drivers/scsi/lpfc/lpfc_disc.h @@ -90,6 +90,8 @@ enum lpfc_nlp_save_flags { NLP_IN_RECOV_POST_DEV_LOSS = 0x1, /* wait for outstanding LOGO to cmpl */ NLP_WAIT_FOR_LOGO = 0x2, + /* wait for outstanding DA_ID to finish */ + NLP_WAIT_FOR_DA_ID = 0x4 }; struct lpfc_nodelist { @@ -159,7 +161,12 @@ struct lpfc_nodelist { uint32_t nvme_fb_size; /* NVME target's supported byte cnt */ #define NVME_FB_BIT_SHIFT 9 /* PRLI Rsp first burst in 512B units. */ uint32_t nlp_defer_did; + + /* These wait objects are NPIV specific. These IOs must complete + * synchronously. + */ wait_queue_head_t *logo_waitq; + wait_queue_head_t *da_id_waitq; }; struct lpfc_node_rrq { diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index 4439167a5188..7a4d4d8e2ad5 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -626,6 +626,7 @@ lpfc_vport_delete(struct fc_vport *fc_vport) struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct lpfc_hba *phba = vport->phba; int rc; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq); if (vport->port_type == LPFC_PHYSICAL_PORT) { lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, @@ -679,21 +680,49 @@ lpfc_vport_delete(struct fc_vport *fc_vport) if (!ndlp) goto skip_logo; + /* Send the DA_ID and Fabric LOGO to cleanup the NPIV fabric entries. */ if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE && phba->link_state >= LPFC_LINK_UP && phba->fc_topology != LPFC_TOPOLOGY_LOOP) { if (vport->cfg_enable_da_id) { - /* Send DA_ID and wait for a completion. */ + /* Send DA_ID and wait for a completion. This is best + * effort. If the DA_ID fails, likely the fabric will + * "leak" NportIDs but at least the driver issued the + * command. + */ + ndlp = lpfc_findnode_did(vport, NameServer_DID); + if (!ndlp) + goto issue_logo; + + spin_lock_irq(&ndlp->lock); + ndlp->da_id_waitq = &waitq; + ndlp->save_flags |= NLP_WAIT_FOR_DA_ID; + spin_unlock_irq(&ndlp->lock); + rc = lpfc_ns_cmd(vport, SLI_CTNS_DA_ID, 0, 0); - if (rc) { - lpfc_printf_log(vport->phba, KERN_WARNING, - LOG_VPORT, - "1829 CT command failed to " - "delete objects on fabric, " - "rc %d\n", rc); + if (!rc) { + wait_event_timeout(waitq, + !(ndlp->save_flags & NLP_WAIT_FOR_DA_ID), + msecs_to_jiffies(phba->fc_ratov * 2000)); } + + lpfc_printf_vlog(vport, KERN_INFO, LOG_VPORT | LOG_ELS, + "1829 DA_ID issue status %d. " + "SFlag x%x NState x%x, NFlag x%x " + "Rpi x%x\n", + rc, ndlp->save_flags, ndlp->nlp_state, + ndlp->nlp_flag, ndlp->nlp_rpi); + + /* Remove the waitq and save_flags. It no + * longer matters if the wake happened. + */ + spin_lock_irq(&ndlp->lock); + ndlp->da_id_waitq = NULL; + ndlp->save_flags &= ~NLP_WAIT_FOR_DA_ID; + spin_unlock_irq(&ndlp->lock); } +issue_logo: /* * If the vpi is not registered, then a valid FDISC doesn't * exist and there is no need for a ELS LOGO. Just cleanup From 1af9af1f8ab38f1285b27581a5e6920ec58296ba Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Thu, 12 Sep 2024 16:24:45 -0700 Subject: [PATCH 297/655] scsi: lpfc: Revise TRACE_EVENT log flag severities from KERN_ERR to KERN_WARNING Revise certain log messages marked as KERN_ERR LOG_TRACE_EVENT to KERN_WARNING and use the lpfc_vlog_msg() macro to still log the event. The benefit is that events of interest are still logged and the entire trace buffer is not dumped with extraneous logging information when using default lpfc_log_verbose driver parameter settings. Also, delete the keyword "fail" from such log messages as they aren't really causes for concern. The log messages are more for warnings to a SAN admin about SAN activity. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240912232447.45607-7-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_ct.c | 10 +-- drivers/scsi/lpfc/lpfc_els.c | 125 +++++++++++++++++------------------ 2 files changed, 64 insertions(+), 71 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 1e5db489a00c..134bc96dd134 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -1572,8 +1572,8 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, } } } else - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "3065 GFT_ID failed x%08x\n", ulp_status); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_DISCOVERY, + "3065 GFT_ID status x%08x\n", ulp_status); out: lpfc_ct_free_iocb(phba, cmdiocb); @@ -2258,7 +2258,7 @@ lpfc_cmpl_ct_disc_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, } lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, - "0229 FDMI cmd %04x failed, latt = %d " + "0229 FDMI cmd %04x latt = %d " "ulp_status: x%x, rid x%x\n", be16_to_cpu(fdmi_cmd), latt, ulp_status, ulp_word4); @@ -2275,9 +2275,9 @@ lpfc_cmpl_ct_disc_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Check for a CT LS_RJT response */ cmd = be16_to_cpu(fdmi_cmd); if (be16_to_cpu(fdmi_rsp) == SLI_CT_RESPONSE_FS_RJT) { - /* FDMI rsp failed */ + /* Log FDMI reject */ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY | LOG_ELS, - "0220 FDMI cmd failed FS_RJT Data: x%x", cmd); + "0220 FDMI cmd FS_RJT Data: x%x", cmd); /* Should we fallback to FDMI-2 / FDMI-1 ? */ switch (cmd) { diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 7219b1ada1ea..d737b897ddd8 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -979,7 +979,7 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, phba->fcoe_cvl_eventtag_attn = phba->fcoe_cvl_eventtag; lpfc_printf_log(phba, KERN_WARNING, LOG_FIP | LOG_ELS, - "2611 FLOGI failed on FCF (x%x), " + "2611 FLOGI FCF (x%x), " "status:x%x/x%x, tmo:x%x, perform " "roundrobin FCF failover\n", phba->fcf.current_rec.fcf_indx, @@ -997,11 +997,11 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, if (!(ulp_status == IOSTAT_LOCAL_REJECT && ((ulp_word4 & IOERR_PARAM_MASK) == IOERR_LOOP_OPEN_FAILURE))) - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "2858 FLOGI failure Status:x%x/x%x TMO" - ":x%x Data x%lx x%x\n", - ulp_status, ulp_word4, tmo, - phba->hba_flag, phba->fcf.fcf_flag); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "2858 FLOGI Status:x%x/x%x TMO" + ":x%x Data x%lx x%x\n", + ulp_status, ulp_word4, tmo, + phba->hba_flag, phba->fcf.fcf_flag); /* Check for retry */ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { @@ -1023,7 +1023,7 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, lpfc_nlp_put(ndlp); lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, - "0150 FLOGI failure Status:x%x/x%x " + "0150 FLOGI Status:x%x/x%x " "xri x%x TMO:x%x refcnt %d\n", ulp_status, ulp_word4, cmdiocb->sli4_xritag, tmo, kref_read(&ndlp->kref)); @@ -1032,11 +1032,11 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, if (!(ulp_status == IOSTAT_LOCAL_REJECT && ((ulp_word4 & IOERR_PARAM_MASK) == IOERR_LOOP_OPEN_FAILURE))) { - /* FLOGI failure */ - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "0100 FLOGI failure Status:x%x/x%x " - "TMO:x%x\n", - ulp_status, ulp_word4, tmo); + /* Warn FLOGI status */ + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "0100 FLOGI Status:x%x/x%x " + "TMO:x%x\n", + ulp_status, ulp_word4, tmo); goto flogifail; } @@ -1964,16 +1964,16 @@ lpfc_cmpl_els_rrq(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, if (ulp_status) { /* Check for retry */ - /* RRQ failed Don't print the vport to vport rjts */ + /* Warn RRQ status Don't print the vport to vport rjts */ if (ulp_status != IOSTAT_LS_RJT || (((ulp_word4) >> 16 != LSRJT_INVALID_CMD) && ((ulp_word4) >> 16 != LSRJT_UNABLE_TPC)) || (phba)->pport->cfg_log_verbose & LOG_ELS) - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "2881 RRQ failure DID:%06X Status:" - "x%x/x%x\n", - ndlp->nlp_DID, ulp_status, - ulp_word4); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "2881 RRQ DID:%06X Status:" + "x%x/x%x\n", + ndlp->nlp_DID, ulp_status, + ulp_word4); } lpfc_clr_rrq_active(phba, rrq->xritag, rrq); @@ -2077,16 +2077,16 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, } goto out; } - /* PLOGI failed Don't print the vport to vport rjts */ + /* Warn PLOGI status Don't print the vport to vport rjts */ if (ulp_status != IOSTAT_LS_RJT || (((ulp_word4) >> 16 != LSRJT_INVALID_CMD) && ((ulp_word4) >> 16 != LSRJT_UNABLE_TPC)) || (phba)->pport->cfg_log_verbose & LOG_ELS) - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "2753 PLOGI failure DID:%06X " - "Status:x%x/x%x\n", - ndlp->nlp_DID, ulp_status, - ulp_word4); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "2753 PLOGI DID:%06X " + "Status:x%x/x%x\n", + ndlp->nlp_DID, ulp_status, + ulp_word4); /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ if (!lpfc_error_lost_link(vport, ulp_status, ulp_word4)) @@ -2323,7 +2323,6 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_vport *vport = cmdiocb->vport; struct lpfc_nodelist *ndlp; char *mode; - u32 loglevel; u32 ulp_status; u32 ulp_word4; bool release_node = false; @@ -2372,17 +2371,14 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, * could be expected. */ if (test_bit(FC_FABRIC, &vport->fc_flag) || - vport->cfg_enable_fc4_type != LPFC_ENABLE_BOTH) { - mode = KERN_ERR; - loglevel = LOG_TRACE_EVENT; - } else { + vport->cfg_enable_fc4_type != LPFC_ENABLE_BOTH) + mode = KERN_WARNING; + else mode = KERN_INFO; - loglevel = LOG_ELS; - } - /* PRLI failed */ - lpfc_printf_vlog(vport, mode, loglevel, - "2754 PRLI failure DID:%06X Status:x%x/x%x, " + /* Warn PRLI status */ + lpfc_printf_vlog(vport, mode, LOG_ELS, + "2754 PRLI DID:%06X Status:x%x/x%x, " "data: x%x x%x x%x\n", ndlp->nlp_DID, ulp_status, ulp_word4, ndlp->nlp_state, @@ -2854,11 +2850,11 @@ lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, } goto out; } - /* ADISC failed */ - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "2755 ADISC failure DID:%06X Status:x%x/x%x\n", - ndlp->nlp_DID, ulp_status, - ulp_word4); + /* Warn ADISC status */ + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "2755 ADISC DID:%06X Status:x%x/x%x\n", + ndlp->nlp_DID, ulp_status, + ulp_word4); lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_ADISC); @@ -3045,12 +3041,12 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, * discovery. The PLOGI will retry. */ if (ulp_status) { - /* LOGO failed */ - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "2756 LOGO failure, No Retry DID:%06X " - "Status:x%x/x%x\n", - ndlp->nlp_DID, ulp_status, - ulp_word4); + /* Warn LOGO status */ + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "2756 LOGO, No Retry DID:%06X " + "Status:x%x/x%x\n", + ndlp->nlp_DID, ulp_status, + ulp_word4); if (lpfc_error_lost_link(vport, ulp_status, ulp_word4)) skip_recovery = 1; @@ -4837,11 +4833,10 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && (cmd == ELS_CMD_FDISC) && (stat.un.b.lsRjtRsnCodeExp == LSEXP_OUT_OF_RESOURCE)){ - lpfc_printf_vlog(vport, KERN_ERR, - LOG_TRACE_EVENT, - "0125 FDISC Failed (x%x). " - "Fabric out of resources\n", - stat.un.lsRjtError); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "0125 FDISC (x%x). " + "Fabric out of resources\n", + stat.un.lsRjtError); lpfc_vport_set_state(vport, FC_VPORT_NO_FABRIC_RSCS); } @@ -4877,11 +4872,10 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, LSEXP_NOTHING_MORE) { vport->fc_sparam.cmn.bbRcvSizeMsb &= 0xf; retry = 1; - lpfc_printf_vlog(vport, KERN_ERR, - LOG_TRACE_EVENT, - "0820 FLOGI Failed (x%x). " - "BBCredit Not Supported\n", - stat.un.lsRjtError); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "0820 FLOGI (x%x). " + "BBCredit Not Supported\n", + stat.un.lsRjtError); } break; @@ -4891,11 +4885,10 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ((stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_PNAME) || (stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_NPORT_ID)) ) { - lpfc_printf_vlog(vport, KERN_ERR, - LOG_TRACE_EVENT, - "0122 FDISC Failed (x%x). " - "Fabric Detected Bad WWN\n", - stat.un.lsRjtError); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "0122 FDISC (x%x). " + "Fabric Detected Bad WWN\n", + stat.un.lsRjtError); lpfc_vport_set_state(vport, FC_VPORT_FABRIC_REJ_WWN); } @@ -5355,8 +5348,8 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, u32 ulp_status, ulp_word4, tmo, did, iotag; if (!vport) { - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, - "3177 ELS response failed\n"); + lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, + "3177 null vport in ELS rsp\n"); goto out; } if (cmdiocb->context_un.mbox) @@ -11328,10 +11321,10 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Check for retry */ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) goto out; - /* FDISC failed */ - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "0126 FDISC failed. (x%x/x%x)\n", - ulp_status, ulp_word4); + /* Warn FDISC status */ + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "0126 FDISC cmpl status: x%x/x%x)\n", + ulp_status, ulp_word4); goto fdisc_failed; } From eeb85c658e1bceaccb6ca3ffc1796741abd7b687 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Thu, 12 Sep 2024 16:24:46 -0700 Subject: [PATCH 298/655] scsi: lpfc: Support loopback tests with VMID enabled The VMID feature adds an extra application services header to each frame. As such, the loopback test path is updated to accommodate the extra application header. Changes include filling in APPID and WQES bit fields for XMIT_SEQUENCE64 commands, a special loopback source APPID for verifying received loopback data matches what is sent, and increasing ELS WQ size to accommodate the APPID field in loopback test mode. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240912232447.45607-8-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_bsg.c | 3 +++ drivers/scsi/lpfc/lpfc_hw.h | 21 +++++++++++++++++++ drivers/scsi/lpfc/lpfc_init.c | 11 ++++++++-- drivers/scsi/lpfc/lpfc_sli.c | 39 +++++++++++++++++++++++++++++++++-- 4 files changed, 70 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 4156419c52c7..8738c1b0e388 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -3208,6 +3208,9 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job) cmdiocbq->num_bdes = num_bde; cmdiocbq->cmd_flag |= LPFC_IO_LIBDFC; cmdiocbq->cmd_flag |= LPFC_IO_LOOPBACK; + if (phba->cfg_vmid_app_header) + cmdiocbq->cmd_flag |= LPFC_IO_VMID; + cmdiocbq->vport = phba->pport; cmdiocbq->cmd_cmpl = NULL; cmdiocbq->bpl_dmabuf = txbmp; diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index 2108b4cb7815..d5c15742f7f2 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -561,6 +561,27 @@ struct fc_vft_header { #include +/* + * Application Header + */ +struct fc_app_header { + uint32_t dst_app_id; + uint32_t src_app_id; +#define LOOPBACK_SRC_APPID 0x4321 + uint32_t word2; + uint32_t word3; +}; + +/* + * dfctl optional header definition + */ +enum lpfc_fc_dfctl { + LPFC_FC_NO_DEVICE_HEADER, + LPFC_FC_16B_DEVICE_HEADER, + LPFC_FC_32B_DEVICE_HEADER, + LPFC_FC_64B_DEVICE_HEADER, +}; + /* * Extended Link Service LS_COMMAND codes (Payload Word 0) */ diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 0c1404dc5f3b..48a3dfdd51d3 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -10451,6 +10451,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) struct lpfc_vector_map_info *cpup; struct lpfc_vector_map_info *eqcpup; struct lpfc_eq_intr_info *eqi; + u32 wqesize; /* * Create HBA Record arrays. @@ -10670,9 +10671,15 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) * Create ELS Work Queues */ - /* Create slow-path ELS Work Queue */ + /* + * Create slow-path ELS Work Queue. + * Increase the ELS WQ size when WQEs contain an embedded cdb + */ + wqesize = (phba->fcp_embed_io) ? + LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize; + qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, - phba->sli4_hba.wq_esize, + wqesize, phba->sli4_hba.wq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index bb5fd3322273..a44afb84cd3d 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -11093,9 +11093,17 @@ __lpfc_sli_prep_xmit_seq64_s4(struct lpfc_iocbq *cmdiocbq, /* Word 9 */ bf_set(wqe_rcvoxid, &wqe->xmit_sequence.wqe_com, ox_id); - /* Word 12 */ - if (cmdiocbq->cmd_flag & (LPFC_IO_LIBDFC | LPFC_IO_LOOPBACK)) + if (cmdiocbq->cmd_flag & (LPFC_IO_LIBDFC | LPFC_IO_LOOPBACK)) { + /* Word 10 */ + if (cmdiocbq->cmd_flag & LPFC_IO_VMID) { + bf_set(wqe_appid, &wqe->xmit_sequence.wqe_com, 1); + bf_set(wqe_wqes, &wqe->xmit_sequence.wqe_com, 1); + wqe->words[31] = LOOPBACK_SRC_APPID; + } + + /* Word 12 */ wqe->xmit_sequence.xmit_len = full_size; + } else wqe->xmit_sequence.xmit_len = wqe->xmit_sequence.bde.tus.f.bdeSize; @@ -18434,6 +18442,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) { /* make rctl_names static to save stack space */ struct fc_vft_header *fc_vft_hdr; + struct fc_app_header *fc_app_hdr; uint32_t *header = (uint32_t *) fc_hdr; #define FC_RCTL_MDS_DIAGS 0xF4 @@ -18489,6 +18498,32 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) goto drop; } + if (unlikely(phba->link_flag == LS_LOOPBACK_MODE && + phba->cfg_vmid_app_header)) { + /* Application header is 16B device header */ + if (fc_hdr->fh_df_ctl & LPFC_FC_16B_DEVICE_HEADER) { + fc_app_hdr = (struct fc_app_header *) (fc_hdr + 1); + if (be32_to_cpu(fc_app_hdr->src_app_id) != + LOOPBACK_SRC_APPID) { + lpfc_printf_log(phba, KERN_WARNING, + LOG_ELS | LOG_LIBDFC, + "1932 Loopback src app id " + "not matched, app_id:x%x\n", + be32_to_cpu(fc_app_hdr->src_app_id)); + + goto drop; + } + } else { + lpfc_printf_log(phba, KERN_WARNING, + LOG_ELS | LOG_LIBDFC, + "1933 Loopback df_ctl bit not set, " + "df_ctl:x%x\n", + fc_hdr->fh_df_ctl); + + goto drop; + } + } + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, "2538 Received frame rctl:x%x, type:x%x, " "frame Data:%08x %08x %08x %08x %08x %08x %08x\n", From b071c1a9099c7bc5fe24089117e7a15e52d4198f Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Thu, 12 Sep 2024 16:24:47 -0700 Subject: [PATCH 299/655] scsi: lpfc: Update lpfc version to 14.4.0.5 Update lpfc version to 14.4.0.5 Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240912232447.45607-9-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index 2fe0386a1fee..e70f163fab90 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "14.4.0.4" +#define LPFC_DRIVER_VERSION "14.4.0.5" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ From 77d48d39e99170b528e4f2e9fc5d1d64cdedd386 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 12 Sep 2024 17:45:49 +0200 Subject: [PATCH 300/655] efistub/tpm: Use ACPI reclaim memory for event log to avoid corruption The TPM event log table is a Linux specific construct, where the data produced by the GetEventLog() boot service is cached in memory, and passed on to the OS using an EFI configuration table. The use of EFI_LOADER_DATA here results in the region being left unreserved in the E820 memory map constructed by the EFI stub, and this is the memory description that is passed on to the incoming kernel by kexec, which is therefore unaware that the region should be reserved. Even though the utility of the TPM2 event log after a kexec is questionable, any corruption might send the parsing code off into the weeds and crash the kernel. So let's use EFI_ACPI_RECLAIM_MEMORY instead, which is always treated as reserved by the E820 conversion logic. Cc: Reported-by: Breno Leitao Tested-by: Usama Arif Reviewed-by: Ilias Apalodimas Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c index df3182f2e63a..1fd6823248ab 100644 --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -96,7 +96,7 @@ static void efi_retrieve_tcg2_eventlog(int version, efi_physical_addr_t log_loca } /* Allocate space for the logs and copy them. */ - status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY, sizeof(*log_tbl) + log_size, (void **)&log_tbl); if (status != EFI_SUCCESS) { From 6a36d828bdef0e02b1e6c12e2160f5b83be6aab5 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 13 Sep 2024 02:09:55 +0100 Subject: [PATCH 301/655] driver core: attribute_container: Remove unused functions I can't find any use of 'attribute_container_add_class_device_adapter' or 'attribute_container_trigger' in git history. Their export decls went in 2006: commit 1740757e8f94 ("[PATCH] Driver Core: remove unused exports") and their docs disappeared in 2016: commit 47cb398dd75a ("Docs: sphinxify device-drivers.tmpl") Remove them. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20240913010955.1393995-1-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/attribute_container.c | 48 +---------------------------- include/linux/attribute_container.h | 6 ---- 2 files changed, 1 insertion(+), 53 deletions(-) diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c index 01ef796c2055..b6f941a6ab69 100644 --- a/drivers/base/attribute_container.c +++ b/drivers/base/attribute_container.c @@ -346,8 +346,7 @@ attribute_container_device_trigger_safe(struct device *dev, * @fn: the function to execute for each classdev. * * This function is for executing a trigger when you need to know both - * the container and the classdev. If you only care about the - * container, then use attribute_container_trigger() instead. + * the container and the classdev. */ void attribute_container_device_trigger(struct device *dev, @@ -378,33 +377,6 @@ attribute_container_device_trigger(struct device *dev, mutex_unlock(&attribute_container_mutex); } -/** - * attribute_container_trigger - trigger a function for each matching container - * - * @dev: The generic device to activate the trigger for - * @fn: the function to trigger - * - * This routine triggers a function that only needs to know the - * matching containers (not the classdev) associated with a device. - * It is more lightweight than attribute_container_device_trigger, so - * should be used in preference unless the triggering function - * actually needs to know the classdev. - */ -void -attribute_container_trigger(struct device *dev, - int (*fn)(struct attribute_container *, - struct device *)) -{ - struct attribute_container *cont; - - mutex_lock(&attribute_container_mutex); - list_for_each_entry(cont, &attribute_container_list, node) { - if (cont->match(cont, dev)) - fn(cont, dev); - } - mutex_unlock(&attribute_container_mutex); -} - /** * attribute_container_add_attrs - add attributes * @@ -458,24 +430,6 @@ attribute_container_add_class_device(struct device *classdev) return attribute_container_add_attrs(classdev); } -/** - * attribute_container_add_class_device_adapter - simple adapter for triggers - * - * @cont: the container to register. - * @dev: the generic device to activate the trigger for - * @classdev: the class device to add - * - * This function is identical to attribute_container_add_class_device except - * that it is designed to be called from the triggers - */ -int -attribute_container_add_class_device_adapter(struct attribute_container *cont, - struct device *dev, - struct device *classdev) -{ - return attribute_container_add_class_device(classdev); -} - /** * attribute_container_remove_attrs - remove any attribute files * diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index e4004d1e6725..b3643de9931d 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -61,14 +61,8 @@ int attribute_container_device_trigger_safe(struct device *dev, int (*undo)(struct attribute_container *, struct device *, struct device *)); -void attribute_container_trigger(struct device *dev, - int (*fn)(struct attribute_container *, - struct device *)); int attribute_container_add_attrs(struct device *classdev); int attribute_container_add_class_device(struct device *classdev); -int attribute_container_add_class_device_adapter(struct attribute_container *cont, - struct device *dev, - struct device *classdev); void attribute_container_remove_attrs(struct device *classdev); void attribute_container_class_device_del(struct device *classdev); struct attribute_container *attribute_container_classdev_to_container(struct device *); From 04736f7d1945722117def1462fd3602c72c02272 Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Tue, 3 Sep 2024 22:41:15 +0800 Subject: [PATCH 302/655] efi: Remove redundant null pointer checks in efi_debugfs_init() Since the debugfs_create_dir() never returns a null pointer, checking the return value for a null pointer is redundant, and using IS_ERR is safe enough. Signed-off-by: Li Zetao Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index fdf07dd6f459..70490bf2697b 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -349,7 +349,7 @@ static void __init efi_debugfs_init(void) int i = 0; efi_debugfs = debugfs_create_dir("efi", NULL); - if (IS_ERR_OR_NULL(efi_debugfs)) + if (IS_ERR(efi_debugfs)) return; for_each_efi_memory_desc(md) { From ce47f7cbbcadbc716325ccdd3be5d71f1e10a966 Mon Sep 17 00:00:00 2001 From: Chunhui Li Date: Wed, 11 Sep 2024 11:28:02 +0800 Subject: [PATCH 303/655] module: abort module loading when sysfs setup suffer errors When insmod a kernel module, if fails in add_notes_attrs or add_sysfs_attrs such as memory allocation fail, mod_sysfs_setup will still return success, but we can't access user interface on android device. Patch for make mod_sysfs_setup can check the error of add_notes_attrs and add_sysfs_attrs [mcgrof: the section stuff comes from linux history.git [0]] Fixes: 3f7b0672086b ("Module section offsets in /sys/module") [0] Fixes: 6d76013381ed ("Add /sys/module/name/notes") Acked-by: Luis Chamberlain Reviewed-by: Petr Pavlu Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409010016.3XIFSmRA-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202409072018.qfEzZbO7-lkp@intel.com/ Link: https://git.kernel.org/pub/scm/linux/kernel/git/history/history.git/commit/?id=3f7b0672086b97b2d7f322bdc289cbfa203f10ef [0] Signed-off-by: Xion Wang Signed-off-by: Chunhui Li Signed-off-by: Luis Chamberlain --- kernel/module/sysfs.c | 63 ++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/kernel/module/sysfs.c b/kernel/module/sysfs.c index 26efe1305c12..456358e1fdc4 100644 --- a/kernel/module/sysfs.c +++ b/kernel/module/sysfs.c @@ -69,12 +69,13 @@ static void free_sect_attrs(struct module_sect_attrs *sect_attrs) kfree(sect_attrs); } -static void add_sect_attrs(struct module *mod, const struct load_info *info) +static int add_sect_attrs(struct module *mod, const struct load_info *info) { unsigned int nloaded = 0, i, size[2]; struct module_sect_attrs *sect_attrs; struct module_sect_attr *sattr; struct bin_attribute **gattr; + int ret; /* Count loaded sections and allocate structures */ for (i = 0; i < info->hdr->e_shnum; i++) @@ -85,7 +86,7 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info) size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.bin_attrs[0]); sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL); if (!sect_attrs) - return; + return -ENOMEM; /* Setup section attributes. */ sect_attrs->grp.name = "sections"; @@ -103,8 +104,10 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info) sattr->address = sec->sh_addr; sattr->battr.attr.name = kstrdup(info->secstrings + sec->sh_name, GFP_KERNEL); - if (!sattr->battr.attr.name) + if (!sattr->battr.attr.name) { + ret = -ENOMEM; goto out; + } sect_attrs->nsections++; sattr->battr.read = module_sect_read; sattr->battr.size = MODULE_SECT_READ_SIZE; @@ -113,13 +116,15 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info) } *gattr = NULL; - if (sysfs_create_group(&mod->mkobj.kobj, §_attrs->grp)) + ret = sysfs_create_group(&mod->mkobj.kobj, §_attrs->grp); + if (ret) goto out; mod->sect_attrs = sect_attrs; - return; + return 0; out: free_sect_attrs(sect_attrs); + return ret; } static void remove_sect_attrs(struct module *mod) @@ -158,15 +163,12 @@ static void free_notes_attrs(struct module_notes_attrs *notes_attrs, kfree(notes_attrs); } -static void add_notes_attrs(struct module *mod, const struct load_info *info) +static int add_notes_attrs(struct module *mod, const struct load_info *info) { unsigned int notes, loaded, i; struct module_notes_attrs *notes_attrs; struct bin_attribute *nattr; - - /* failed to create section attributes, so can't create notes */ - if (!mod->sect_attrs) - return; + int ret; /* Count notes sections and allocate structures. */ notes = 0; @@ -176,12 +178,12 @@ static void add_notes_attrs(struct module *mod, const struct load_info *info) ++notes; if (notes == 0) - return; + return 0; notes_attrs = kzalloc(struct_size(notes_attrs, attrs, notes), GFP_KERNEL); if (!notes_attrs) - return; + return -ENOMEM; notes_attrs->notes = notes; nattr = ¬es_attrs->attrs[0]; @@ -201,19 +203,23 @@ static void add_notes_attrs(struct module *mod, const struct load_info *info) } notes_attrs->dir = kobject_create_and_add("notes", &mod->mkobj.kobj); - if (!notes_attrs->dir) + if (!notes_attrs->dir) { + ret = -ENOMEM; goto out; + } - for (i = 0; i < notes; ++i) - if (sysfs_create_bin_file(notes_attrs->dir, - ¬es_attrs->attrs[i])) + for (i = 0; i < notes; ++i) { + ret = sysfs_create_bin_file(notes_attrs->dir, ¬es_attrs->attrs[i]); + if (ret) goto out; + } mod->notes_attrs = notes_attrs; - return; + return 0; out: free_notes_attrs(notes_attrs, i); + return ret; } static void remove_notes_attrs(struct module *mod) @@ -223,9 +229,15 @@ static void remove_notes_attrs(struct module *mod) } #else /* !CONFIG_KALLSYMS */ -static inline void add_sect_attrs(struct module *mod, const struct load_info *info) { } +static inline int add_sect_attrs(struct module *mod, const struct load_info *info) +{ + return 0; +} static inline void remove_sect_attrs(struct module *mod) { } -static inline void add_notes_attrs(struct module *mod, const struct load_info *info) { } +static inline int add_notes_attrs(struct module *mod, const struct load_info *info) +{ + return 0; +} static inline void remove_notes_attrs(struct module *mod) { } #endif /* CONFIG_KALLSYMS */ @@ -385,11 +397,20 @@ int mod_sysfs_setup(struct module *mod, if (err) goto out_unreg_modinfo_attrs; - add_sect_attrs(mod, info); - add_notes_attrs(mod, info); + err = add_sect_attrs(mod, info); + if (err) + goto out_del_usage_links; + + err = add_notes_attrs(mod, info); + if (err) + goto out_unreg_sect_attrs; return 0; +out_unreg_sect_attrs: + remove_sect_attrs(mod); +out_del_usage_links: + del_usage_links(mod); out_unreg_modinfo_attrs: module_remove_modinfo_attrs(mod, -1); out_unreg_param: From b319cea80539df9bea0ad98cb5e4b2fcb7e1a34b Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Tue, 10 Sep 2024 08:31:23 +0100 Subject: [PATCH 304/655] module: Refine kmemleak scanned areas commit ac3b43283923 ("module: replace module_layout with module_memory") introduced a set of memory regions for the module layout sharing the same attributes. However, it didn't update the kmemleak scanned areas which intended to limit kmemleak scan to sections containing writable data. This means sections such as .text and .rodata are scanned by kmemleak. Refine the scanned areas for modules by limiting it to MOD_TEXT and MOD_INIT_TEXT mod_mem regions. CC: Song Liu Reviewed-by: Catalin Marinas Signed-off-by: Vincent Donnefort Signed-off-by: Luis Chamberlain --- kernel/module/debug_kmemleak.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/kernel/module/debug_kmemleak.c b/kernel/module/debug_kmemleak.c index 12a569d361e8..b4cc03842d70 100644 --- a/kernel/module/debug_kmemleak.c +++ b/kernel/module/debug_kmemleak.c @@ -12,19 +12,9 @@ void kmemleak_load_module(const struct module *mod, const struct load_info *info) { - unsigned int i; - - /* only scan the sections containing data */ - kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL); - - for (i = 1; i < info->hdr->e_shnum; i++) { - /* Scan all writable sections that's not executable */ - if (!(info->sechdrs[i].sh_flags & SHF_ALLOC) || - !(info->sechdrs[i].sh_flags & SHF_WRITE) || - (info->sechdrs[i].sh_flags & SHF_EXECINSTR)) - continue; - - kmemleak_scan_area((void *)info->sechdrs[i].sh_addr, - info->sechdrs[i].sh_size, GFP_KERNEL); + /* only scan writable, non-executable sections */ + for_each_mod_mem_type(type) { + if (type != MOD_DATA && type != MOD_INIT_DATA) + kmemleak_no_scan(mod->mem[type].base); } } From c8691cd0fc11197515ed148de0780d927bfca38b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 13 Sep 2024 15:05:18 +0200 Subject: [PATCH 305/655] Revert "dm: requeue IO if mapping table not yet available" This reverts commit fa247089de9936a46e290d4724cb5f0b845600f5. The following sequence of commands causes a livelock - there will be workqueue process looping and consuming 100% CPU: dmsetup create --notable test truncate -s 1MiB testdata losetup /dev/loop0 testdata dmsetup load test --table '0 2048 linear /dev/loop0 0' dd if=/dev/zero of=/dev/dm-0 bs=16k count=1 conv=fdatasync The livelock is caused by the commit fa247089de99. The commit claims that it fixes a race condition, however, it is unknown what the actual race condition is and what program is involved in the race condition. When the inactive table is loaded, the nodes /dev/dm-0 and /sys/block/dm-0 are created. /dev/dm-0 has zero size at this point. When the device is suspended and resumed, the nodes /dev/mapper/test and /dev/disk/* are created. If some program opens a block device before it is created by dmsetup or lvm, the program is buggy, so dm could just report an error as it used to do before. Reported-by: Zdenek Kabelac Signed-off-by: Mikulas Patocka Fixes: fa247089de99 ("dm: requeue IO if mapping table not yet available") --- drivers/md/dm-rq.c | 4 +++- drivers/md/dm.c | 11 ++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index f7e9a3632eb3..499f8cc8a39f 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -496,8 +496,10 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, map = dm_get_live_table(md, &srcu_idx); if (unlikely(!map)) { + DMERR_LIMIT("%s: mapping table unavailable, erroring io", + dm_device_name(md)); dm_put_live_table(md, srcu_idx); - return BLK_STS_RESOURCE; + return BLK_STS_IOERR; } ti = dm_table_find_target(map, 0); dm_put_live_table(md, srcu_idx); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 87bb90303435..ff4a6b570b76 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2030,10 +2030,15 @@ static void dm_submit_bio(struct bio *bio) struct dm_table *map; map = dm_get_live_table(md, &srcu_idx); + if (unlikely(!map)) { + DMERR_LIMIT("%s: mapping table unavailable, erroring io", + dm_device_name(md)); + bio_io_error(bio); + goto out; + } - /* If suspended, or map not yet available, queue this IO for later */ - if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || - unlikely(!map)) { + /* If suspended, queue this IO for later */ + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); else if (bio->bi_opf & REQ_RAHEAD) From 9498f2e24ee0133d486667c9fa4c27ecdaadc272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Fri, 6 Sep 2024 10:00:33 +0300 Subject: [PATCH 306/655] drm/i915/psr: Do not wait for PSR being idle on on Panel Replay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do not have ALPM on DP Panel Replay. Due to this SRD_STATUS[SRD State] doesn't change from SRDENT_ON after Panel Replay is enabled until it gets disabled. On eDP Panel Replay DEEP_SLEEP is not reached. _psr2_ready_for_pipe_update_locked is waiting DEEP_SLEEP bit getting reset. Take these into account in Panel Replay code by not waiting PSR getting idle after enabling VBI. Fixes: 29fb595d4875 ("drm/i915/psr: Panel replay uses SRD_STATUS to track it's status") Cc: Animesh Manna Signed-off-by: Jouni Högander Reviewed-by: Animesh Manna Link: https://patchwork.freedesktop.org/patch/msgid/20240906070033.289015-5-jouni.hogander@intel.com (cherry picked from commit a2d98feb4b0013ef4f9db0d8f642a8ac1f5ecbb9) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_psr.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 1f83b3b67ea6..3d8037b5401e 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -2784,13 +2784,6 @@ static int _psr1_ready_for_pipe_update_locked(struct intel_dp *intel_dp) EDP_PSR_STATUS_STATE_MASK, 50); } -static int _panel_replay_ready_for_pipe_update_locked(struct intel_dp *intel_dp) -{ - return intel_dp_is_edp(intel_dp) ? - _psr2_ready_for_pipe_update_locked(intel_dp) : - _psr1_ready_for_pipe_update_locked(intel_dp); -} - /** * intel_psr_wait_for_idle_locked - wait for PSR be ready for a pipe update * @new_crtc_state: new CRTC state @@ -2813,12 +2806,10 @@ void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_stat lockdep_assert_held(&intel_dp->psr.lock); - if (!intel_dp->psr.enabled) + if (!intel_dp->psr.enabled || intel_dp->psr.panel_replay_enabled) continue; - if (intel_dp->psr.panel_replay_enabled) - ret = _panel_replay_ready_for_pipe_update_locked(intel_dp); - else if (intel_dp->psr.sel_update_enabled) + if (intel_dp->psr.sel_update_enabled) ret = _psr2_ready_for_pipe_update_locked(intel_dp); else ret = _psr1_ready_for_pipe_update_locked(intel_dp); From fcd33d434d31a210bc9f209b5bfd92f3b91a2dda Mon Sep 17 00:00:00 2001 From: Arun R Murthy Date: Tue, 27 Aug 2024 13:42:05 +0530 Subject: [PATCH 307/655] drm/i915/display: BMG supports UHBR13.5 UHBR20 is not supported by battlemage and the maximum link rate supported is UHBR13.5 v2: Replace IS_DGFX with IS_BATTLEMAGE (Jani) HSD: 16023263677 Signed-off-by: Arun R Murthy Reviewed-by: Mika Kahola Fixes: 98b1c87a5e51 ("drm/i915/xe2hpd: Set maximum DP rate to UHBR13.5") Signed-off-by: Suraj Kandpal Link: https://patchwork.freedesktop.org/patch/msgid/20240827081205.136569-1-arun.r.murthy@intel.com (cherry picked from commit 9c2338ac4543e0fab3a1e0f9f025591e0f0d9f8f) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index a1fcedfd404b..b1b512e258ab 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -531,6 +531,10 @@ static void intel_dp_set_source_rates(struct intel_dp *intel_dp) { /* The values must be in increasing order */ + static const int bmg_rates[] = { + 162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000, + 810000, 1000000, 1350000, + }; static const int mtl_rates[] = { 162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000, 810000, 1000000, 2000000, @@ -561,8 +565,13 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) intel_dp->source_rates || intel_dp->num_source_rates); if (DISPLAY_VER(dev_priv) >= 14) { - source_rates = mtl_rates; - size = ARRAY_SIZE(mtl_rates); + if (IS_BATTLEMAGE(dev_priv)) { + source_rates = bmg_rates; + size = ARRAY_SIZE(bmg_rates); + } else { + source_rates = mtl_rates; + size = ARRAY_SIZE(mtl_rates); + } max_rate = mtl_max_source_rate(intel_dp); } else if (DISPLAY_VER(dev_priv) >= 11) { source_rates = icl_rates; From ec2231b8dd2dc515912ff7816c420153b4a95e92 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 10 Sep 2024 14:18:47 +0300 Subject: [PATCH 308/655] drm/i915/dp: Fix AUX IO power enabling for eDP PSR Panel Self Refresh on eDP requires the AUX IO power to be enabled whenever the output (main link) is enabled. This is required by the AUX_PHY_WAKE/ML_PHY_LOCK signaling initiated by the HW automatically to re-enable the main link after it got disabled in power saving states (see eDP v1.4b, sections 5.1, 6.1.3.3.1.1). The Panel Replay mode on non-eDP outputs on the other hand is only supported by keeping the main link active, thus not requiring the above AUX_PHY_WAKE/ML_PHY_LOCK signaling (eDP v1.4b, section 6.1.3.3.1.2). Thus enabling the AUX IO power for this case is not required either. Based on the above enable the AUX IO power only for eDP/PSR outputs. Bspec: 49274, 53370 v2: - Add a TODO comment to adjust the requirement for AUX IO based on whether the ALPM/main-link off mode gets enabled. (Rodrigo) Cc: Animesh Manna Fixes: b8cf5b5d266e ("drm/i915/panelreplay: Initializaton and compute config for panel replay") Reviewed-by: Rodrigo Vivi Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240910111847.2995725-1-imre.deak@intel.com (cherry picked from commit f7c2ed9d4ce80a2570c492825de239dc8b500f2e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- drivers/gpu/drm/i915/display/intel_psr.c | 19 +++++++++++++++++++ drivers/gpu/drm/i915/display/intel_psr.h | 2 ++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 00fbe9f8c03a..b1c294236cc8 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -916,7 +916,7 @@ intel_ddi_main_link_aux_domain(struct intel_digital_port *dig_port, * instead of a specific AUX_IO_ reference without powering up any * extra wells. */ - if (intel_encoder_can_psr(&dig_port->base)) + if (intel_psr_needs_aux_io_power(&dig_port->base, crtc_state)) return intel_display_power_aux_io_domain(i915, dig_port->aux_ch); else if (DISPLAY_VER(i915) < 14 && (intel_crtc_has_dp_encoder(crtc_state) || diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 3d8037b5401e..136a0d6ca970 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -203,6 +203,25 @@ bool intel_encoder_can_psr(struct intel_encoder *encoder) return false; } +bool intel_psr_needs_aux_io_power(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + /* + * For PSR/PR modes only eDP requires the AUX IO power to be enabled whenever + * the output is enabled. For non-eDP outputs the main link is always + * on, hence it doesn't require the HW initiated AUX wake-up signaling used + * for eDP. + * + * TODO: + * - Consider leaving AUX IO disabled for eDP / PR as well, in case + * the ALPM with main-link off mode is not enabled. + * - Leave AUX IO enabled for DP / PR, once support for ALPM with + * main-link off mode is added for it and this mode gets enabled. + */ + return intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + intel_encoder_can_psr(encoder); +} + static bool psr_global_enabled(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index 4e09c10908e4..6eb5f15f674f 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -25,6 +25,8 @@ struct intel_plane_state; (intel_dp)->psr.source_panel_replay_support) bool intel_encoder_can_psr(struct intel_encoder *encoder); +bool intel_psr_needs_aux_io_power(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); void intel_psr_init_dpcd(struct intel_dp *intel_dp); void intel_psr_enable_sink(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); From 5bab087507ae99250579f1d36071eb6c867065b1 Mon Sep 17 00:00:00 2001 From: Christoph Schlameuss Date: Wed, 7 Aug 2024 17:45:08 +0200 Subject: [PATCH 309/655] selftests: kvm: s390: Add VM run test case Add test case running code interacting with registers within a ucontrol VM. * Add uc_gprs test case The test uses the same VM setup using the fixture and debug macros introduced in earlier patches in this series. Signed-off-by: Christoph Schlameuss Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20240807154512.316936-7-schlameuss@linux.ibm.com [frankja@linux.ibm.com: Removed leftover comment line] Signed-off-by: Janosch Frank Message-ID: <20240807154512.316936-7-schlameuss@linux.ibm.com> --- .../selftests/kvm/s390x/ucontrol_test.c | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c index d103a92e7495..f257beec1430 100644 --- a/tools/testing/selftests/kvm/s390x/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390x/ucontrol_test.c @@ -7,6 +7,7 @@ * Authors: * Christoph Schlameuss */ +#include "debug_print.h" #include "kselftest_harness.h" #include "kvm_util.h" #include "processor.h" @@ -40,6 +41,23 @@ void require_ucontrol_admin(void) TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL)); } +/* Test program setting some registers and looping */ +extern char test_gprs_asm[]; +asm("test_gprs_asm:\n" + "xgr %r0, %r0\n" + "lgfi %r1,1\n" + "lgfi %r2,2\n" + "lgfi %r3,3\n" + "lgfi %r4,4\n" + "lgfi %r5,5\n" + "lgfi %r6,6\n" + "lgfi %r7,7\n" + "0:\n" + " diag 0,0,0x44\n" + " ahi %r0,1\n" + " j 0b\n" +); + FIXTURE(uc_kvm) { struct kvm_s390_sie_block *sie_block; @@ -204,4 +222,111 @@ TEST(uc_cap_hpage) close(kvm_fd); } +/* verify SIEIC exit + * * fail on codes not expected in the test cases + */ +static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + struct kvm_run *run = self->run; + + /* check SIE interception code */ + pr_info("sieic: 0x%.2x 0x%.4x 0x%.4x\n", + run->s390_sieic.icptcode, + run->s390_sieic.ipa, + run->s390_sieic.ipb); + switch (run->s390_sieic.icptcode) { + case ICPT_INST: + /* end execution in caller on intercepted instruction */ + pr_info("sie instruction interception\n"); + return false; + case ICPT_OPEREXC: + /* operation exception */ + TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb); + default: + TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode); + } + return true; +} + +/* verify VM state on exit */ +static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) * self) +{ + struct kvm_run *run = self->run; + + switch (run->exit_reason) { + case KVM_EXIT_S390_SIEIC: + return uc_handle_sieic(self); + default: + pr_info("exit_reason %2d not handled\n", run->exit_reason); + } + return true; +} + +/* run the VM until interrupted */ +static int uc_run_once(FIXTURE_DATA(uc_kvm) * self) +{ + int rc; + + rc = ioctl(self->vcpu_fd, KVM_RUN, NULL); + print_run(self->run, self->sie_block); + print_regs(self->run); + pr_debug("run %d / %d %s\n", rc, errno, strerror(errno)); + return rc; +} + +static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) * self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + + /* assert vm was interrupted by diag 0x0044 */ + TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); + TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); + TEST_ASSERT_EQ(0x8300, sie_block->ipa); + TEST_ASSERT_EQ(0x440000, sie_block->ipb); +} + +TEST_F(uc_kvm, uc_gprs) +{ + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + struct kvm_run *run = self->run; + struct kvm_regs regs = {}; + + /* Set registers to values that are different from the ones that we expect below */ + for (int i = 0; i < 8; i++) + sync_regs->gprs[i] = 8; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* copy test_gprs_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE); + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + /* run and expect interception of diag 44 */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* Retrieve and check guest register values */ + ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); + for (int i = 0; i < 8; i++) { + ASSERT_EQ(i, regs.gprs[i]); + ASSERT_EQ(i, sync_regs->gprs[i]); + } + + /* run and expect interception of diag 44 again */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* check continued increment of register 0 value */ + ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); + ASSERT_EQ(1, regs.gprs[0]); + ASSERT_EQ(1, sync_regs->gprs[0]); +} + TEST_HARNESS_MAIN From f9b56b2c31e5733c04464da1b73bafb9eff6569f Mon Sep 17 00:00:00 2001 From: Christoph Schlameuss Date: Wed, 7 Aug 2024 17:45:12 +0200 Subject: [PATCH 310/655] s390: Enable KVM_S390_UCONTROL config in debug_defconfig To simplify testing enable UCONTROL KVM by default in debug kernels. Signed-off-by: Christoph Schlameuss Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20240807154512.316936-11-schlameuss@linux.ibm.com Signed-off-by: Janosch Frank Message-ID: <20240807154512.316936-11-schlameuss@linux.ibm.com> --- arch/s390/configs/debug_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index ea63a7342f5f..0c989caed19a 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -59,6 +59,7 @@ CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_S390_HYPFS_FS=y CONFIG_KVM=m +CONFIG_KVM_S390_UCONTROL=y CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m CONFIG_S390_MODULES_SANITY_TEST=m From 7427c5b34fbe191451a48c69c392c85b648caa4f Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Fri, 13 Sep 2024 07:58:29 +0300 Subject: [PATCH 311/655] pinctrl: ep93xx: Fix raster pins typo Fix raster_on_sdram0_pins used in PMX_GROUP() instead of raster_on_sdram3_pins as it should be. Fixes: d1661439f5a3 ("pinctrl: add a Cirrus ep93xx SoC pin controller") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409130426.1OzNAC9M-lkp@intel.com/ Signed-off-by: Nikita Shubin Acked-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20240913045829.11904-1-nikita.shubin@maquefel.me Signed-off-by: Arnd Bergmann --- drivers/pinctrl/pinctrl-ep93xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-ep93xx.c b/drivers/pinctrl/pinctrl-ep93xx.c index 458cb0e99c65..abafbbb8fd6a 100644 --- a/drivers/pinctrl/pinctrl-ep93xx.c +++ b/drivers/pinctrl/pinctrl-ep93xx.c @@ -714,7 +714,7 @@ static const struct ep93xx_pin_group ep9307_pin_groups[] = { PMX_GROUP("i2s_on_ac97", ac97_ep9301_pins, EP93XX_SYSCON_DEVCFG_I2SONAC97, EP93XX_SYSCON_DEVCFG_I2SONAC97), PMX_GROUP("rasteronsdram0grp", raster_on_sdram0_pins, EP93XX_SYSCON_DEVCFG_RASONP3, 0), - PMX_GROUP("rasteronsdram3grp", raster_on_sdram0_pins, EP93XX_SYSCON_DEVCFG_RASONP3, + PMX_GROUP("rasteronsdram3grp", raster_on_sdram3_pins, EP93XX_SYSCON_DEVCFG_RASONP3, EP93XX_SYSCON_DEVCFG_RASONP3), PMX_GROUP("gpio2agrp", gpio2a_9307_pins, EP93XX_SYSCON_DEVCFG_GONK, EP93XX_SYSCON_DEVCFG_GONK), @@ -1159,7 +1159,7 @@ static const struct ep93xx_pin_group ep9312_pin_groups[] = { PMX_GROUP("i2s_on_ac97", ac97_ep9312_pins, EP93XX_SYSCON_DEVCFG_I2SONAC97, EP93XX_SYSCON_DEVCFG_I2SONAC97), PMX_GROUP("rasteronsdram0grp", raster_on_sdram0_pins, EP93XX_SYSCON_DEVCFG_RASONP3, 0), - PMX_GROUP("rasteronsdram3grp", raster_on_sdram0_pins, EP93XX_SYSCON_DEVCFG_RASONP3, + PMX_GROUP("rasteronsdram3grp", raster_on_sdram3_pins, EP93XX_SYSCON_DEVCFG_RASONP3, EP93XX_SYSCON_DEVCFG_RASONP3), PMX_GROUP("gpio2agrp", gpio2a_9312_pins, EP93XX_SYSCON_DEVCFG_GONK, EP93XX_SYSCON_DEVCFG_GONK), From f3eeba0645dcb48c90f64ae6193148bf881429a8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Sep 2024 17:35:11 +0300 Subject: [PATCH 312/655] dmaengine: ep93xx: Fix a NULL vs IS_ERR() check in probe() This was intended to be an IS_ERR() check, not a NULL check. The ep93xx_dma_of_probe() function doesn't return NULL pointers. Fixes: 4e8ad5ed845b ("dmaengine: cirrus: Convert to DT for Cirrus EP93xx") Signed-off-by: Dan Carpenter Reviewed-by: Alexander Sverdlin Signed-off-by: Arnd Bergmann --- drivers/dma/ep93xx_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index 43c4241af7f5..7de04e8ace44 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -1504,7 +1504,7 @@ static int ep93xx_dma_probe(struct platform_device *pdev) int ret; edma = ep93xx_dma_of_probe(pdev); - if (!edma) + if (IS_ERR(edma)) return PTR_ERR(edma); dma_dev = &edma->dma_dev; From d7333f9d33772ba93f0144b1e3969866f80fdb9a Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Thu, 12 Sep 2024 16:21:06 +0200 Subject: [PATCH 313/655] dmaengine: cirrus: use snprintf() to calm down gcc 13.3.0 Even though it's a false positive (highest channel number is "9"), refer to "struct ep93xx_edma_data edma_m2p", we can avoid new warning by using snprintf(). drivers/dma/ep93xx_dma.c: In function 'ep93xx_dma_of_probe': >> drivers/dma/ep93xx_dma.c:1365:51: warning: '%u' directive writing between 1 and 8 bytes into a region of size 2 [-Wformat-overflow=] 1365 | sprintf(dma_clk_name, "m2p%u", i); | ^~ drivers/dma/ep93xx_dma.c:1365:47: note: directive argument in the range [0, 16777216] 1365 | sprintf(dma_clk_name, "m2p%u", i); | ^~~~~~~ drivers/dma/ep93xx_dma.c:1365:25: note: 'sprintf' output between 5 and 12 bytes into a destination of size 5 1365 | sprintf(dma_clk_name, "m2p%u", i); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 4e8ad5ed845b ("dmaengine: cirrus: Convert to DT for Cirrus EP93xx") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409122133.NctarRoK-lkp@intel.com/ Signed-off-by: Alexander Sverdlin Signed-off-by: Arnd Bergmann --- drivers/dma/ep93xx_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index 7de04e8ace44..8514b9ab8517 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -1404,9 +1404,9 @@ static struct ep93xx_dma_engine *ep93xx_dma_of_probe(struct platform_device *pde edmac->edma = edma; if (edma->m2m) - sprintf(dma_clk_name, "m2m%u", i); + snprintf(dma_clk_name, sizeof(dma_clk_name), "m2m%u", i); else - sprintf(dma_clk_name, "m2p%u", i); + snprintf(dma_clk_name, sizeof(dma_clk_name), "m2p%u", i); edmac->clk = devm_clk_get(dev, dma_clk_name); if (IS_ERR(edmac->clk)) { From 21d1d72475809734a5149ecfffdc7551503b042b Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Tue, 10 Sep 2024 09:21:25 -0400 Subject: [PATCH 314/655] drm/amdkfd: Move queue fs deletion after destroy check We were removing the kernfs entry for queue info before checking if the queue could be destroyed. If it failed to get destroyed (e.g. during some GPU resets), then we would try to delete it later during pqm teardown, but the file was already removed. This led to a kernel WARN trying to remove size, gpuid and type. Move the remove to after the destroy check. Signed-off-by: Kent Russell Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index b439d4d0bd84..01b960b15274 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -517,7 +517,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval) goto err_destroy_queue; - kfd_procfs_del_queue(pqn->q); dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval) { @@ -527,6 +526,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval != -ETIME) goto err_destroy_queue; } + kfd_procfs_del_queue(pqn->q); kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); uninit_queue(pqn->q); From 52755373ea6197dac40b9804ce967611b5e989bf Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Tue, 10 Sep 2024 13:17:30 +0800 Subject: [PATCH 315/655] drm/amdkfd: clean up code for interrupt v10 Variable hub_inst is unused. Fixes: e28604d8337e ("drm/amdkfd: Drop poison hanlding from gfx v10") Signed-off-by: Jesse Zhang Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index bb8cbfc39b90..37b69fe0ede3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -306,23 +306,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); - int hub_inst = 0; struct kfd_hsa_memory_exception_data exception_data; - /* gfxhub */ - if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { - hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, - node_id); - if (hub_inst < 0) - hub_inst = 0; - } - - /* mmhub */ - if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) - hub_inst = node_id / 4; - info.vmid = vmid; info.mc_id = client_id; info.page_addr = ih_ring_entry[4] | From 0da531c82a0fcac65407d28ecdb2a1e19c833df5 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 7 Aug 2024 17:33:42 +0800 Subject: [PATCH 316/655] drm/amdgpu: ensure the connector is not null before using it This resolves the dereference null return value warning reported by Coverity. Signed-off-by: Tim Huang Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 092ec11258cd..046d4c4e0299 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1474,7 +1474,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) && ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) || ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) && - connector->display_info.is_hdmi && + connector && connector->display_info.is_hdmi && amdgpu_display_is_hdtv_mode(mode)))) { if (amdgpu_encoder->underscan_hborder != 0) amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder; From c389a0604cfbcdb1f8f53a76560eb31e0700e206 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 9 Sep 2024 18:51:42 +0800 Subject: [PATCH 317/655] drm/amdgpu: disable GPU RAS bad page feature for specific ASIC The feature is not applicable to specific app platform. v2: update the disablement condition and commit description v3: move the setting to amdgpu_ras_check_supported Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 61a2f386d9fb..2e4c867e1d42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3468,6 +3468,11 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) /* aca is disabled by default */ adev->aca.is_enabled = false; + + /* bad page feature is not applicable to specific app platform */ + if (adev->gmc.is_app_apu && + amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(12, 0, 0)) + amdgpu_bad_page_threshold = 0; } static void amdgpu_ras_counte_dw(struct work_struct *work) From 28b0ef922738b74335c20b8ed4bf8e259353a3a3 Mon Sep 17 00:00:00 2001 From: Bob Zhou Date: Fri, 6 Sep 2024 17:48:20 +0800 Subject: [PATCH 318/655] drm/amdgpu: Fix missing check pcie_p2p module param The module param pcie_p2p should be checked for kfd p2p feature, so add it. Fixes: 75f0efbc4b3b ("drm/amdgpu: Take IOMMU remapping into account for p2p checks") Signed-off-by: Bob Zhou Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f4628412dac4..afb643e03d3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6189,7 +6189,7 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, p2p_addressable = !(adev->gmc.aper_base & address_mask || aper_limit & address_mask); } - return is_large_bar && p2p_access && p2p_addressable; + return pcie_p2p && is_large_bar && p2p_access && p2p_addressable; #else return false; #endif From da5b2ad1c2f18834cb1ce429e2e5a5cf5cbdf21b Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 17 Sep 2024 22:23:09 +0800 Subject: [PATCH 319/655] objtool: Handle frame pointer related instructions After commit a0f7085f6a63 ("LoongArch: Add RANDOMIZE_KSTACK_OFFSET support"), there are three new instructions "addi.d $fp, $sp, 32", "sub.d $sp, $sp, $t0" and "addi.d $sp, $fp, -32" for the secondary stack in do_syscall(), then there is a objtool warning "return with modified stack frame" and no handle_syscall() which is the previous frame of do_syscall() in the call trace when executing the command "echo l > /proc/sysrq-trigger". objdump shows something like this: 0000000000000000 : 0: 02ff8063 addi.d $sp, $sp, -32 4: 29c04076 st.d $fp, $sp, 16 8: 29c02077 st.d $s0, $sp, 8 c: 29c06061 st.d $ra, $sp, 24 10: 02c08076 addi.d $fp, $sp, 32 ... 74: 0011b063 sub.d $sp, $sp, $t0 ... a8: 4c000181 jirl $ra, $t0, 0 ... dc: 02ff82c3 addi.d $sp, $fp, -32 e0: 28c06061 ld.d $ra, $sp, 24 e4: 28c04076 ld.d $fp, $sp, 16 e8: 28c02077 ld.d $s0, $sp, 8 ec: 02c08063 addi.d $sp, $sp, 32 f0: 4c000020 jirl $zero, $ra, 0 The instruction "sub.d $sp, $sp, $t0" changes the stack bottom and the new stack size is a random value, in order to find the return address of do_syscall() which is stored in the original stack frame after executing "jirl $ra, $t0, 0", it should use fp which points to the original stack top. At the beginning, the thought is tended to decode the secondary stack instruction "sub.d $sp, $sp, $t0" and set it as a label, then check this label for the two frame pointer instructions to change the cfa base and cfa offset during the period of secondary stack in update_cfi_state(). This is valid for GCC but invalid for Clang due to there are different secondary stack instructions for ClangBuiltLinux on LoongArch, something like this: 0000000000000000 : ... 88: 00119064 sub.d $a0, $sp, $a0 8c: 00150083 or $sp, $a0, $zero ... Actually, it equals to a single instruction "sub.d $sp, $sp, $a0", but there is no proper condition to check it as a label like GCC, and so the beginning thought is not a good way. Essentially, there are two special frame pointer instructions which are "addi.d $fp, $sp, imm" and "addi.d $sp, $fp, imm", the first one points fp to the original stack top and the second one restores the original stack bottom from fp. Based on the above analysis, in order to avoid adding an arch-specific update_cfi_state(), we just add a member "frame_pointer" in the "struct symbol" as a label to avoid affecting the current normal case, then set it as true only if there is "addi.d $sp, $fp, imm". The last is to check this label for the two frame pointer instructions to change the cfa base and cfa offset in update_cfi_state(). Tested with the following two configs: (1) CONFIG_RANDOMIZE_KSTACK_OFFSET=y && CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=n (2) CONFIG_RANDOMIZE_KSTACK_OFFSET=y && CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y By the way, there is no effect for x86 with this patch, tested on the x86 machine with Fedora 40 system. Cc: stable@vger.kernel.org # 6.9+ Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- tools/objtool/arch/loongarch/decode.c | 11 ++++++++++- tools/objtool/check.c | 23 ++++++++++++++++++++--- tools/objtool/include/objtool/elf.h | 1 + 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c index aee479d2191c..69b66994f2a1 100644 --- a/tools/objtool/arch/loongarch/decode.c +++ b/tools/objtool/arch/loongarch/decode.c @@ -122,7 +122,7 @@ static bool decode_insn_reg2i12_fomat(union loongarch_instruction inst, switch (inst.reg2i12_format.opcode) { case addid_op: if ((inst.reg2i12_format.rd == CFI_SP) || (inst.reg2i12_format.rj == CFI_SP)) { - /* addi.d sp,sp,si12 or addi.d fp,sp,si12 */ + /* addi.d sp,sp,si12 or addi.d fp,sp,si12 or addi.d sp,fp,si12 */ insn->immediate = sign_extend64(inst.reg2i12_format.immediate, 11); ADD_OP(op) { op->src.type = OP_SRC_ADD; @@ -132,6 +132,15 @@ static bool decode_insn_reg2i12_fomat(union loongarch_instruction inst, op->dest.reg = inst.reg2i12_format.rd; } } + if ((inst.reg2i12_format.rd == CFI_SP) && (inst.reg2i12_format.rj == CFI_FP)) { + /* addi.d sp,fp,si12 */ + struct symbol *func = find_func_containing(insn->sec, insn->offset); + + if (!func) + return false; + + func->frame_pointer = true; + } break; case ldd_op: if (inst.reg2i12_format.rj == CFI_SP) { diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 01237d167223..af9cfed7f4ec 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2993,10 +2993,27 @@ static int update_cfi_state(struct instruction *insn, break; } - if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) { + if (op->dest.reg == CFI_BP && op->src.reg == CFI_SP && + insn->sym->frame_pointer) { + /* addi.d fp,sp,imm on LoongArch */ + if (cfa->base == CFI_SP && cfa->offset == op->src.offset) { + cfa->base = CFI_BP; + cfa->offset = 0; + } + break; + } - /* lea disp(%rbp), %rsp */ - cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset); + if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) { + /* addi.d sp,fp,imm on LoongArch */ + if (cfa->base == CFI_BP && cfa->offset == 0) { + if (insn->sym->frame_pointer) { + cfa->base = CFI_SP; + cfa->offset = -op->src.offset; + } + } else { + /* lea disp(%rbp), %rsp */ + cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset); + } break; } diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 2b8a69de4db8..d7e815c2fd15 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -68,6 +68,7 @@ struct symbol { u8 warned : 1; u8 embedded_insn : 1; u8 local_label : 1; + u8 frame_pointer : 1; struct list_head pv_target; struct reloc *relocs; }; From b8468bd92ae19939d4844899fa05147888732519 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 17 Sep 2024 22:23:09 +0800 Subject: [PATCH 320/655] LoongArch: Enable objtool for Clang For now, it can enable objtool for Clang, just remove !CC_IS_CLANG for HAVE_OBJTOOL in arch/loongarch/Kconfig. Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 0e3abf7b0bd3..8dff6ddaf9c5 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -147,7 +147,7 @@ config LOONGARCH select HAVE_LIVEPATCH select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI - select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB && !CC_IS_CLANG + select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS From a7e0837724562ea8c1d869dd1a5cb1119ef651c3 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 17 Sep 2024 22:23:09 +0800 Subject: [PATCH 321/655] LoongArch: Set AS_HAS_THIN_ADD_SUB as y if AS_IS_LLVM When building kernel with "make CC=clang defconfig", LLVM Assembler is used due to LLVM_IAS=0 is not specified, then AS_HAS_THIN_ADD_SUB is not set, thus objtool can not be built after enable it for Clang. config AS_HAS_THIN_ADD_SUB is to check whether -mthin-add-sub option is available to know R_LARCH_{32,64}_PCREL are supported for GNU Assembler, there is no such an option for LLVM Assembler. The minimal version of Clang is 18 for building LoongArch kernel, and Clang >= 17 has already supported R_LARCH_{32,64}_PCREL, that is to say, there is no need to depend on AS_HAS_THIN_ADD_SUB for Clang, so just set AS_HAS_THIN_ADD_SUB as y if AS_IS_LLVM. Fixes: 120dd4118e58 ("LoongArch: Only allow OBJTOOL & ORC unwinder if toolchain supports -mthin-add-sub") Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 8dff6ddaf9c5..34531dc8a61f 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -266,7 +266,7 @@ config AS_HAS_FCSR_CLASS def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0) config AS_HAS_THIN_ADD_SUB - def_bool $(cc-option,-Wa$(comma)-mthin-add-sub) + def_bool $(cc-option,-Wa$(comma)-mthin-add-sub) || AS_IS_LLVM config AS_HAS_LSX_EXTENSION def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0) From 0eb0bd21e8382d10be8108952a0bb819915e1e2d Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 17 Sep 2024 22:23:10 +0800 Subject: [PATCH 322/655] LoongArch: Remove STACK_FRAME_NON_STANDARD(do_syscall) For now, we can remove STACK_FRAME_NON_STANDARD(do_syscall) because there is no objtool warning "do_syscall+0x11c: return with modified stack frame", then there is handle_syscall() which is the previous frame of do_syscall() in the call trace when executing the command "echo l > /proc/sysrq-trigger". Fixes: a0f7085f6a63 ("LoongArch: Add RANDOMIZE_KSTACK_OFFSET support") Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/syscall.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index ba5d0930a74f..168bd97540f8 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -79,7 +79,3 @@ void noinstr __no_stack_protector do_syscall(struct pt_regs *regs) syscall_exit_to_user_mode(regs); } - -#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET -STACK_FRAME_NON_STANDARD(do_syscall); -#endif From 7ab9ebc580617831355843f19224f1e31bb8e983 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 12 Sep 2024 22:38:12 +0200 Subject: [PATCH 323/655] drm/xe/guc: Fix GUC_{SUBMIT,FIRMWARE}_VER helper macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those macros rely on non-existing MAKE_VER_STRUCT macro, while the correct one that should be used is named MAKE_GUC_VER_STRUCT. Fixes: 4eb0aab6e443 ("drm/xe/guc: Bump minimum required GuC version to v70.29.2") Signed-off-by: Michal Wajdeczko Cc: Julia Filipchuk Cc: John Harrison Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20240912203817.1880-2-michal.wajdeczko@intel.com (cherry picked from commit 02fdf821ed79f59c40d766a85947aa7cc25d4364) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index c3e6b51f7a09..42116b167c98 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -18,8 +18,10 @@ */ #define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) #define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) -#define GUC_SUBMIT_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) -#define GUC_FIRMWARE_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) +#define GUC_SUBMIT_VER(guc) \ + MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) +#define GUC_FIRMWARE_VER(guc) \ + MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) struct drm_printer; From ee06c09ded3c2f722be4e240ed06287e23596bda Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 16 Sep 2024 09:49:12 +0100 Subject: [PATCH 324/655] drm/xe/vram: fix ccs offset calculation Spec says SW is expected to round up to the nearest 128K, if not already aligned for the CC unit view of CCS. We are seeing the assert sometimes pop on BMG to tell us that there is a hole between GSM and CCS, as well as popping other asserts with having a vram size with strange alignment, which is likely caused by misaligned offset here. v2 (Shuicheng): - Do the round_up() on final SW address. BSpec: 68023 Fixes: b5c2ca0372dc ("drm/xe/xe2hpg: Determine flat ccs offset for vram") Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Akshata Jahagirdar Cc: Lucas De Marchi Cc: Shuicheng Lin Cc: Matt Roper Cc: stable@vger.kernel.org # v6.10+ Reviewed-by: Himal Prasad Ghimiray Tested-by: Shuicheng Lin Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240916084911.13119-2-matthew.auld@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 37173392741c425191b959acb3adf70c9a4610c0) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_vram.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index 5bcd59190353..80ba2fc78837 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -182,6 +182,7 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) offset = offset_hi << 32; /* HW view bits 39:32 */ offset |= offset_lo << 6; /* HW view bits 31:6 */ offset *= num_enabled; /* convert to SW view */ + offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */ /* We don't expect any holes */ xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size), From 99b1f7493bfa757b03d41ee6d7f7d00f81fcba5d Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 11 Sep 2024 16:55:27 +0100 Subject: [PATCH 325/655] drm/xe/client: fix deadlock in show_meminfo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a real deadlock as well as sleeping in atomic() bug in here, if the bo put happens to be the last ref, since bo destruction wants to grab the same spinlock and sleeping locks. Fix that by dropping the ref using xe_bo_put_deferred(), and moving the final commit outside of the lock. Dropping the lock around the put is tricky since the bo can go out of scope and delete itself from the list, making it difficult to navigate to the next list entry. Fixes: 0845233388f8 ("drm/xe: Implement fdinfo memory stats printing") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2727 Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Tejas Upadhyay Cc: "Thomas Hellström" Cc: # v6.8+ Reviewed-by: Matthew Brost Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240911155527.178910-5-matthew.auld@intel.com (cherry picked from commit 0083b8e6f11d7662283a267d4ce7c966812ffd8a) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_drm_client.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index e64f4b645e2e..badfa045ead8 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -196,6 +196,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) struct xe_drm_client *client; struct drm_gem_object *obj; struct xe_bo *bo; + LLIST_HEAD(deferred); unsigned int id; u32 mem_type; @@ -215,11 +216,14 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) list_for_each_entry(bo, &client->bos_list, client_link) { if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue; + bo_meminfo(bo, stats); - xe_bo_put(bo); + xe_bo_put_deferred(bo, &deferred); } spin_unlock(&client->bos_lock); + xe_bo_put_commit(&deferred); + for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { if (!xe_mem_type_to_name[mem_type]) continue; From 69bbe3adf36de47315498c9384f99a1ff9171694 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 11 Sep 2024 16:55:28 +0100 Subject: [PATCH 326/655] drm/xe/client: add missing bo locking in show_meminfo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bo_meminfo() wants to inspect bo state like tt and the ttm resource, however this state can change at any point leading to stuff like NPD and UAF, if the bo lock is not held. Grab the bo lock when calling bo_meminfo(), ensuring we drop any spinlocks first. In the case of object_idr we now also need to hold a ref. v2 (MattB) - Also add xe_bo_assert_held() Fixes: 0845233388f8 ("drm/xe: Implement fdinfo memory stats printing") Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Tejas Upadhyay Cc: "Thomas Hellström" Cc: # v6.8+ Reviewed-by: Matthew Brost Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240911155527.178910-6-matthew.auld@intel.com (cherry picked from commit 4f63d712fa104c3ebefcb289d1e733e86d8698c7) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_drm_client.c | 39 +++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index badfa045ead8..95a05c5bc897 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -10,6 +10,7 @@ #include #include +#include "xe_assert.h" #include "xe_bo.h" #include "xe_bo_types.h" #include "xe_device_types.h" @@ -151,10 +152,13 @@ void xe_drm_client_add_bo(struct xe_drm_client *client, */ void xe_drm_client_remove_bo(struct xe_bo *bo) { + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); struct xe_drm_client *client = bo->client; + xe_assert(xe, !kref_read(&bo->ttm.base.refcount)); + spin_lock(&client->bos_lock); - list_del(&bo->client_link); + list_del_init(&bo->client_link); spin_unlock(&client->bos_lock); xe_drm_client_put(client); @@ -166,6 +170,8 @@ static void bo_meminfo(struct xe_bo *bo, u64 sz = bo->size; u32 mem_type; + xe_bo_assert_held(bo); + if (bo->placement.placement) mem_type = bo->placement.placement->mem_type; else @@ -207,7 +213,20 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) idr_for_each_entry(&file->object_idr, obj, id) { struct xe_bo *bo = gem_to_xe_bo(obj); - bo_meminfo(bo, stats); + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + xe_bo_get(bo); + spin_unlock(&file->table_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + xe_bo_put(bo); + spin_lock(&file->table_lock); + } } spin_unlock(&file->table_lock); @@ -217,7 +236,21 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue; - bo_meminfo(bo, stats); + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + spin_unlock(&client->bos_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + spin_lock(&client->bos_lock); + /* The bo ref will prevent this bo from being removed from the list */ + xe_assert(xef->xe, !list_empty(&bo->client_link)); + } + xe_bo_put_deferred(bo, &deferred); } spin_unlock(&client->bos_lock); From 73d10c7788f6d2b7badf9973afbdea7ca433c15d Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 11 Sep 2024 16:55:29 +0100 Subject: [PATCH 327/655] drm/xe/client: use mem_type from the current resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather extract the mem_type from the current resource. Checking the first potential placement doesn't really tell us where the bo is currently allocated, especially if there are multiple potential placements. Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Tejas Upadhyay Cc: "Thomas Hellström" Reviewed-by: Matthew Brost Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240911155527.178910-7-matthew.auld@intel.com (cherry picked from commit fbd73b7d2ae29ef0f604f376bcc22b886a49329e) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_drm_client.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 95a05c5bc897..c4add8b38bbd 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -168,15 +168,10 @@ static void bo_meminfo(struct xe_bo *bo, struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) { u64 sz = bo->size; - u32 mem_type; + u32 mem_type = bo->ttm.resource->mem_type; xe_bo_assert_held(bo); - if (bo->placement.placement) - mem_type = bo->placement.placement->mem_type; - else - mem_type = XE_PL_TT; - if (drm_gem_object_is_shared_for_memory_stats(&bo->ttm.base)) stats[mem_type].shared += sz; else From ddc73c465628ab3e60f7eb5b4063b644c18b6336 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 11 Sep 2024 16:55:30 +0100 Subject: [PATCH 328/655] drm/xe/bo: add some annotations in bo_put() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the put() triggers bo destroy then there is at least one potential sleeping lock. Also annotate bos_lock and ggtt lock. Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Tejas Upadhyay Cc: "Thomas Hellström" Reviewed-by: Matthew Brost Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240911155527.178910-8-matthew.auld@intel.com (cherry picked from commit 3b04c2cfd71c54117237c72f2a08ff0ae1f602e2) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo.c | 14 ++++++++++++++ drivers/gpu/drm/xe/xe_bo.h | 6 +----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 06911e9a3bf5..f379df3a12bf 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2320,6 +2320,20 @@ void xe_bo_put_commit(struct llist_head *deferred) drm_gem_object_free(&bo->ttm.base.refcount); } +void xe_bo_put(struct xe_bo *bo) +{ + might_sleep(); + if (bo) { +#ifdef CONFIG_PROC_FS + if (bo->client) + might_lock(&bo->client->bos_lock); +#endif + if (bo->ggtt_node && bo->ggtt_node->ggtt) + might_lock(&bo->ggtt_node->ggtt->lock); + drm_gem_object_put(&bo->ttm.base); + } +} + /** * xe_bo_dumb_create - Create a dumb bo as backing for a fb * @file_priv: ... diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index dbfb3209615d..6e4be52306df 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -126,11 +126,7 @@ static inline struct xe_bo *xe_bo_get(struct xe_bo *bo) return bo; } -static inline void xe_bo_put(struct xe_bo *bo) -{ - if (bo) - drm_gem_object_put(&bo->ttm.base); -} +void xe_bo_put(struct xe_bo *bo); static inline void __xe_bo_unset_bulk_move(struct xe_bo *bo) { From 9460f4bd5970f2e46fe190a0cb9814697bd7f21a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 10 Sep 2024 18:18:20 -0700 Subject: [PATCH 329/655] drm/xe: Do not run GPU page fault handler on a closed VM Closing a VM removes page table memory thus we shouldn't touch page tables when a VM is closed. Do not run the GPU page fault handler once the VM is closed to avoid touching page tables. Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240911011820.825127-1-matthew.brost@intel.com (cherry picked from commit f96dbf7c321d70834d46f3aedb75a671e839b51e) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 730eec07795e..00af059a8971 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -212,6 +212,12 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) * TODO: Change to read lock? Using write lock for simplicity. */ down_write(&vm->lock); + + if (xe_vm_is_closed(vm)) { + err = -ENOENT; + goto unlock_vm; + } + vma = lookup_vma(vm, pf->page_addr); if (!vma) { err = -EINVAL; From 6c10ba06bb1b48acce6d4d9c1e33beb9954f1788 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 12 Sep 2024 08:38:42 -0700 Subject: [PATCH 330/655] drm/xe/oa: Fix overflow in oa batch buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default xe_bb_create_job() appends a MI_BATCH_BUFFER_END to batch buffer, this is not a problem if batch buffer is only used once but oa reuses the batch buffer for the same metric and at each call it appends a MI_BATCH_BUFFER_END, printing the warning below and then overflowing. [ 381.072016] ------------[ cut here ]------------ [ 381.072019] xe 0000:00:02.0: [drm] Assertion `bb->len * 4 + bb_prefetch(q->gt) <= size` failed! platform: LUNARLAKE subplatform: 1 graphics: Xe2_LPG / Xe2_HPG 20.04 step B0 media: Xe2_LPM / Xe2_HPM 20.00 step B0 tile: 0 VRAM 0 B GT: 0 type 1 So here checking if batch buffer already have MI_BATCH_BUFFER_END if not append it. v2: - simply fix, suggestion from Ashutosh Cc: Ashutosh Dixit Signed-off-by: José Roberto de Souza Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240912153842.35813-1-jose.souza@intel.com (cherry picked from commit 9ba0e0f30ca42a98af3689460063edfb6315718a) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index a13e0b3a169e..ef777dbdf4ec 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -65,7 +65,8 @@ __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) { u32 size = drm_suballoc_size(bb->bo); - bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END) + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size); From 6ff2c290147a65027fb04b154a52723a6efabced Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 16 Sep 2024 15:28:00 +0200 Subject: [PATCH 331/655] MAINTAINERS: make vDSO getrandom matches more generic Rather than specifying matches for every architecture, what we actually want is an extglob-like thing, such as: F: **/vdso/*getrandom* Unfortunately, this isn't possible, but get_maintainers.pl does support a "N:" directive that uses regexes, so just encode that extglob statement as an ordinary regex. Signed-off-by: Jason A. Donenfeld --- MAINTAINERS | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8a76c256229c..cba8bfedeadd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19217,10 +19217,7 @@ F: drivers/char/random.c F: include/linux/random.h F: include/uapi/linux/random.h F: drivers/virt/vmgenid.c -F: include/vdso/getrandom.h -F: lib/vdso/getrandom.c -F: arch/x86/entry/vdso/vgetrandom* -F: arch/x86/include/asm/vdso/getrandom* +N: ^.*/vdso/[^/]*getrandom[^/]+$ RAPIDIO SUBSYSTEM M: Matt Porter From c5391c0e04f1b6ede3623962192b08a4eb224491 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 18 Sep 2024 15:05:29 +0200 Subject: [PATCH 332/655] dm-crypt: Use up_read() together with key_put() only once in crypt_set_keyring_key() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The combination of the calls “up_read(&key->sem)” and “key_put(key)” was immediately used after a return code check for a set_key() call in this function implementation. Thus use such a function call pair only once instead directly before the check. This issue was transformed by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mikulas Patocka --- drivers/md/dm-crypt.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d5533b43054e..dae2fe3cb182 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2621,16 +2621,13 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string down_read(&key->sem); ret = set_key(cc, key); + up_read(&key->sem); + key_put(key); if (ret < 0) { - up_read(&key->sem); - key_put(key); kfree_sensitive(new_key_string); return ret; } - up_read(&key->sem); - key_put(key); - /* clear the flag since following operations may invalidate previously valid key */ clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); From 5d49054ef616095d160c1072ba458e16e2f825de Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 18 Sep 2024 15:34:45 +0200 Subject: [PATCH 333/655] dm-crypt: Use common error handling code in crypt_set_keyring_key() Add a jump target so that a bit of exception handling can be better reused at the end of this function implementation. Signed-off-by: Markus Elfring Signed-off-by: Mikulas Patocka --- drivers/md/dm-crypt.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index dae2fe3cb182..5228b03b6fe0 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2614,32 +2614,31 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string key = request_key(type, key_desc + 1, NULL); if (IS_ERR(key)) { - kfree_sensitive(new_key_string); - return PTR_ERR(key); + ret = PTR_ERR(key); + goto free_new_key_string; } down_read(&key->sem); - ret = set_key(cc, key); up_read(&key->sem); key_put(key); - if (ret < 0) { - kfree_sensitive(new_key_string); - return ret; - } + if (ret < 0) + goto free_new_key_string; /* clear the flag since following operations may invalidate previously valid key */ clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); ret = crypt_setkey(cc); + if (ret) + goto free_new_key_string; - if (!ret) { - set_bit(DM_CRYPT_KEY_VALID, &cc->flags); - kfree_sensitive(cc->key_string); - cc->key_string = new_key_string; - } else - kfree_sensitive(new_key_string); + set_bit(DM_CRYPT_KEY_VALID, &cc->flags); + kfree_sensitive(cc->key_string); + cc->key_string = new_key_string; + return 0; +free_new_key_string: + kfree_sensitive(new_key_string); return ret; } From 9bcd923952078e08cd4570e15f751a6c9ec7633f Mon Sep 17 00:00:00 2001 From: Shen Lichuan Date: Sat, 14 Sep 2024 14:38:08 +0800 Subject: [PATCH 334/655] dm vdo indexer: Convert comma to semicolon To ensure code clarity and prevent potential errors, it's advisable to employ the ';' as a statement separator, except when ',' are intentionally used for specific purposes. Signed-off-by: Shen Lichuan Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/indexer/chapter-index.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-vdo/indexer/chapter-index.c b/drivers/md/dm-vdo/indexer/chapter-index.c index 7e32a25d3f2f..fb1db41c794b 100644 --- a/drivers/md/dm-vdo/indexer/chapter-index.c +++ b/drivers/md/dm-vdo/indexer/chapter-index.c @@ -177,7 +177,7 @@ int uds_pack_open_chapter_index_page(struct open_chapter_index *chapter_index, if (list_number < 0) return UDS_OVERFLOW; - next_list = first_list + list_number--, + next_list = first_list + list_number--; result = uds_start_delta_index_search(delta_index, next_list, 0, &entry); if (result != UDS_SUCCESS) From bfc00a7754c40544c7446d3b664049d6e00ee0bd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 Sep 2024 09:33:30 -0400 Subject: [PATCH 335/655] drm/amdgpu/gfx9.4.3: drop extra wrapper Drop wrapper used in one place. gfx_v9_4_3_xcc_cp_enable() is used in one place. gfx_v9_4_3_xcc_cp_compute_enable() is used everywhere else. Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 408e5600bb61..b940d2ad57db 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2299,12 +2299,6 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_xcc_cp_enable(struct amdgpu_device *adev, bool enable, - int xcc_id) -{ - gfx_v9_4_3_xcc_cp_compute_enable(adev, enable, xcc_id); -} - static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id) { if (amdgpu_gfx_disable_kcq(adev, xcc_id)) @@ -2336,7 +2330,7 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id) } gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id); - gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id); + gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id); } static int gfx_v9_4_3_hw_init(void *handle) From 902b4027216aeaabe1562e1db070550c06f3ec89 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 Sep 2024 10:19:43 -0400 Subject: [PATCH 336/655] drm/amdgpu: fix spelling in amd_shared.h Fix spelling in documentation. Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/amd_shared.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 745fd052840d..3f91926a50e9 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -85,7 +85,7 @@ enum amd_apu_flags { * @AMD_IP_BLOCK_TYPE_MES: Micro-Engine Scheduler * @AMD_IP_BLOCK_TYPE_JPEG: JPEG Engine * @AMD_IP_BLOCK_TYPE_VPE: Video Processing Engine -* @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Schduler for Multimedia +* @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Scheduler for Multimedia * @AMD_IP_BLOCK_TYPE_ISP: Image Signal Processor * @AMD_IP_BLOCK_TYPE_NUM: Total number of IP block types */ From 0110ac11952f06419d267f51a3989e989b17e67a Mon Sep 17 00:00:00 2001 From: Yan Zhen Date: Wed, 11 Sep 2024 12:27:38 +0800 Subject: [PATCH 337/655] drm/amdgpu: fix typo in the comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correctly spelled comments make it easier for the reader to understand the code. Replace 'udpate' with 'update' in the comment & replace 'recieved' with 'received' in the comment & replace 'dsiable' with 'disable' in the comment & replace 'Initiailize' with 'Initialize' in the comment & replace 'disble' with 'disable' in the comment & replace 'Disbale' with 'Disable' in the comment & replace 'enogh' with 'enough' in the comment & replace 'availabe' with 'available' in the comment. Acked-by: Christian König Signed-off-by: Yan Zhen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/amd/amdgpu/imu_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 57bda66e85ef..2ca127173135 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -511,7 +511,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h return -EINVAL; } - /* udpate aca bank to aca source error_cache first */ + /* update aca bank to aca source error_cache first */ ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 189574d53ebd..e9e599ff3bd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2853,7 +2853,7 @@ static int psp_load_non_psp_fw(struct psp_context *psp) if (ret) return ret; - /* Start rlc autoload after psp recieved all the gfx firmware */ + /* Start rlc autoload after psp received all the gfx firmware */ if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ? adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) { ret = psp_rlc_autoload_start(psp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 2e4c867e1d42..1a1395c5fff1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -882,7 +882,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, if (ret) return ret; - /* gfx block ras dsiable cmd must send to ras-ta */ + /* gfx block ras disable cmd must send to ras-ta */ if (head->block == AMDGPU_RAS_BLOCK__GFX) con->features |= BIT(head->block); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b8bc7fa8c375..74adb983ab03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1970,7 +1970,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of GTT memory ready.\n", (unsigned int)(gtt_size / (1024 * 1024))); - /* Initiailize doorbell pool on PCI BAR */ + /* Initialize doorbell pool on PCI BAR */ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE); if (r) { DRM_ERROR("Failed initializing doorbell heap.\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index 6c1891889c4d..d4f72e47ae9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -153,7 +153,7 @@ static void imu_v11_0_setup(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val); } - //disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB + //disable imu Rtavfs, SmsRepair, DfllBTC, and ClkB imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10); imu_reg_val |= 0x10007; WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index fa479dfa1ec1..739fce4fa8fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -365,7 +365,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; } else { - /* Disbale ASPM L1 */ + /* Disable ASPM L1 */ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; /* Disable ASPM TxL0s */ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index aa637541da58..e65194fe94af 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -710,7 +710,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) upper_32_bits(wptr_gpu_addr)); wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); if (ring->use_pollmem) { - /*wptr polling is not enogh fast, directly clean the wptr register */ + /*wptr polling is not enough fast, directly clean the wptr register */ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c index e4e30b9d481b..c04fdd2d5b38 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c @@ -60,7 +60,7 @@ static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *f { u32 data; - /* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */ + /* CGTT_ROM_CLK_CTRL0 is not available for APUs */ if (adev->flags & AMD_IS_APU) return; From c400ec6990fb04a2ec9929b253dafa7e77c7f555 Mon Sep 17 00:00:00 2001 From: Andrew Kreimer Date: Tue, 10 Sep 2024 01:41:05 +0300 Subject: [PATCH 338/655] drm/amdgpu: Fix a typo Fix a typo in comments. Reported-by: Matthew Wilcox Signed-off-by: Andrew Kreimer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index aab8077e5098..f28f6b4ba765 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -58,7 +58,7 @@ #define EEPROM_I2C_MADDR_4 0x40000 /* - * The 2 macros bellow represent the actual size in bytes that + * The 2 macros below represent the actual size in bytes that * those entities occupy in the EEPROM memory. * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which * uses uint64 to store 6b fields such as retired_page. From c77a46bebe99e70600e636a3a1f285637c479f46 Mon Sep 17 00:00:00 2001 From: Yan Zhen Date: Thu, 12 Sep 2024 15:12:09 +0800 Subject: [PATCH 339/655] drm/amd/display: fix typo in the comment Correctly spelled comments make it easier for the reader to understand the code. Replace 'maxium' with 'maximum' in the comment & replace 'diffculty' with 'difficulty' in the comment & replace 'suppluy' with 'supply' in the comment & replace 'Congiuration' with 'Configuration' in the comment & replace 'eanbled' with 'enabled' in the comment. Signed-off-by: Yan Zhen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c | 2 +- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 6 +++--- drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c index e47e9db062f4..681799468487 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c @@ -569,7 +569,7 @@ static void calculate_bandwidth( break; } data->lb_partitions[i] = bw_floor2(bw_div(data->lb_size_per_component[i], data->lb_line_pitch), bw_int_to_fixed(1)); - /*clamp the partitions to the maxium number supported by the lb*/ + /* clamp the partitions to the maximum number supported by the lb */ if ((surface_type[i] != bw_def_graphics || dceip->graphics_lb_nodownscaling_multi_line_prefetching == 1)) { data->lb_partitions_max[i] = bw_int_to_fixed(10); } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 547dfcc80fde..d851c081e376 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -8926,7 +8926,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc // The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement // If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature - // if possible, then will try to program for the best power saving features in order of diffculty (dram, fclk, stutter) + // if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter) s->iteration = 0; s->MaxTotalRDBandwidth = 0; s->AllPrefetchModeTested = false; @@ -9977,7 +9977,7 @@ void dml_core_get_row_heights( dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); #endif - // just suppluy with enough parameters to calculate meta and dte + // just supply with enough parameters to calculate meta and dte CalculateVMAndRowBytes( 0, // dml_bool_t ViewportStationary, 1, // dml_bool_t DCCEnable, @@ -10110,7 +10110,7 @@ dml_bool_t dml_mode_support( /// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK /// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values. /// @param state_idx Power state idx chosen -/// @param display_cfg Display Congiuration +/// @param display_cfg Display Configuration /// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming /// TODO: Add clk_cfg input, could be useful for standalone mode dml_bool_t dml_mode_programming( diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index 42c52284a868..bded33575493 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -455,7 +455,7 @@ bool dcn30_mmhubbub_warmup( struct mcif_wb *mcif_wb; struct mcif_warmup_params warmup_params = {0}; unsigned int i, i_buf; - /*make sure there is no active DWB eanbled */ + /* make sure there is no active DWB enabled */ for (i = 0; i < num_dwb; i++) { dwb = dc->res_pool->dwbc[wb_info[i].dwb_pipe_inst]; if (dwb->dwb_is_efc_transition || dwb->dwb_is_drc) { From 2ed186df27f078eb75c52d09e04aa7b0f9920f57 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Tue, 10 Sep 2024 09:32:13 -0400 Subject: [PATCH 340/655] drm/amdgpu: Retry i2c transfer once if it fails on SMU13.0.6 During init, there can be some collisions on the i2c bus that result in the EEPROM read failing. This has been mitigated in the PMFW to a degree, but there is still a small chance that the bus will be busy. When the read fails during RAS init, that disables page retirement altogether, which is obviously not ideal. To try to avoid that situation, set the eeprom_read function to retry once if the first read fails, specifically for smu_v13_0_6. Signed-off-by: Kent Russell Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 9974c9f8135e..55ed6247eb61 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2107,8 +2107,12 @@ static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap, } mutex_lock(&adev->pm.mutex); r = smu_v13_0_6_request_i2c_xfer(smu, req); - if (r) - goto fail; + if (r) { + /* Retry once, in case of an i2c collision */ + r = smu_v13_0_6_request_i2c_xfer(smu, req); + if (r) + goto fail; + } for (c = i = 0; i < num_msgs; i++) { if (!(msg[i].flags & I2C_M_RD)) { From 3c75518cf27aa5a7e22e1f8f33339ded3779079b Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 11 Sep 2024 16:24:35 +0800 Subject: [PATCH 341/655] drm/amdgpu/mes12: switch SET_SHADER_DEBUGGER pkt to mes schq pipe The SET_SHADER_DEBUGGER packet must work with the added hardware queue, switch the packet submitting to mes schq pipe. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index e499b2857a01..ef05a4116230 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -479,6 +479,11 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, union MESAPI__MISC misc_pkt; int pipe; + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + memset(&misc_pkt, 0, sizeof(misc_pkt)); misc_pkt.header.type = MES_API_TYPE_SCHEDULER; @@ -513,6 +518,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; break; case MES_MISC_OP_SET_SHADER_DEBUGGER: + pipe = AMDGPU_MES_SCHED_PIPE; misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; misc_pkt.set_shader_debugger.process_context_addr = input->set_shader_debugger.process_context_addr; @@ -530,11 +536,6 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - if (mes->adev->enable_uni_mes) - pipe = AMDGPU_MES_KIQ_PIPE; - else - pipe = AMDGPU_MES_SCHED_PIPE; - return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); From 03b5038c0ad069380fab7e251d2bf3f1540d20f4 Mon Sep 17 00:00:00 2001 From: David Belanger Date: Wed, 11 Sep 2024 11:16:50 -0400 Subject: [PATCH 342/655] drm/amdgpu: Fix selfring initialization sequence on soc24 Move enable_doorbell_selfring_aperture from common_hw_init to common_late_init in soc24, otherwise selfring aperture is initialized with an incorrect doorbell aperture base. Port changes from this commit from soc21 to soc24: commit 1c312e816c40 ("drm/amdgpu: Enable doorbell selfring after resize FB BAR") Signed-off-by: David Belanger Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/amdgpu/soc24.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index b0c3678cfb31..fd4c3d4f8387 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -250,13 +250,6 @@ static void soc24_program_aspm(struct amdgpu_device *adev) adev->nbio.funcs->program_aspm(adev); } -static void soc24_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) -{ - adev->nbio.funcs->enable_doorbell_aperture(adev, enable); - adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); -} - const struct amdgpu_ip_block_version soc24_common_ip_block = { .type = AMD_IP_BLOCK_TYPE_COMMON, .major = 1, @@ -454,6 +447,11 @@ static int soc24_common_late_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_nv_mailbox_get_irq(adev); + /* Enable selfring doorbell aperture late because doorbell BAR + * aperture will change if resize BAR successfully in gmc sw_init. + */ + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true); + return 0; } @@ -491,7 +489,7 @@ static int soc24_common_hw_init(void *handle) adev->df.funcs->hw_init(adev); /* enable the doorbell aperture */ - soc24_enable_doorbell_aperture(adev, true); + adev->nbio.funcs->enable_doorbell_aperture(adev, true); return 0; } @@ -500,8 +498,13 @@ static int soc24_common_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* disable the doorbell aperture */ - soc24_enable_doorbell_aperture(adev, false); + /* Disable the doorbell aperture and selfring doorbell aperture + * separately in hw_fini because soc21_enable_doorbell_aperture + * has been removed and there is no need to delay disabling + * selfring doorbell. + */ + adev->nbio.funcs->enable_doorbell_aperture(adev, false); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); if (amdgpu_sriov_vf(adev)) xgpu_nv_mailbox_put_irq(adev); From 797fb1533315571ff9e55e80154f48cd47f3dbe5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 12 Sep 2024 13:08:12 -0400 Subject: [PATCH 343/655] drm/amdgpu/gfx9.4.3: set additional bits on MEC halt Need to set the pipe reset and cache invalidation bits on halt otherwise we can get stale state if the CP firmware changes (e.g., on module unload and reload). Tested-by: Amber Lin Reviewed-by: Amber Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index b940d2ad57db..611659404703 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1701,7 +1701,15 @@ static void gfx_v9_4_3_xcc_cp_compute_enable(struct amdgpu_device *adev, WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0); } else { WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, - (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK | + CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK | + CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_HALT_MASK | + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); adev->gfx.kiq[xcc_id].ring.sched.ready = false; } udelay(50); From c1de938fb7e5edc4c71d33f73e9fc5c77feb02a0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 12 Sep 2024 16:15:55 -0400 Subject: [PATCH 344/655] drm/amdgpu/gfx9.4.3: Explicitly halt MEC before init Need to make sure it's halted as we don't know what state the GPU may have been left in previously. Tested-by: Amber Lin Acked-by: Amber Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 611659404703..c100845409f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2248,6 +2248,8 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id) r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id); if (r) return r; + } else { + gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id); } r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id); From 7181faaa4703705939580abffaf9cb5d6b50dbb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 27 Aug 2024 16:12:11 +0200 Subject: [PATCH 345/655] drm/amdgpu: nuke the VM PD/PT shadow handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was only used as workaround for recovering the page tables after VRAM was lost and is no longer necessary after the function amdgpu_vm_bo_reset_state_machine() started to do the same. Compute never used shadows either, so the only proplematic case left is SVM and that is most likely not recoverable in any way when VRAM is lost. Signed-off-by: Christian König Acked-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 87 +-------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 67 +--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 21 ----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 56 +------------ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 19 +---- 7 files changed, 6 insertions(+), 265 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index dcd59040c449..9b1e0ede05a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1083,10 +1083,6 @@ struct amdgpu_device { struct amdgpu_virt virt; - /* link all shadow bo */ - struct list_head shadow_list; - struct mutex shadow_list_lock; - /* record hw reset is performed */ bool has_hw_reset; u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index afb643e03d3c..c2394c8b4d6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4107,9 +4107,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->mm_stats.lock); spin_lock_init(&adev->wb.lock); - INIT_LIST_HEAD(&adev->shadow_list); - mutex_init(&adev->shadow_list_lock); - INIT_LIST_HEAD(&adev->reset_list); INIT_LIST_HEAD(&adev->ras_list); @@ -5029,80 +5026,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) return 0; } -/** - * amdgpu_device_recover_vram - Recover some VRAM contents - * - * @adev: amdgpu_device pointer - * - * Restores the contents of VRAM buffers from the shadows in GTT. Used to - * restore things like GPUVM page tables after a GPU reset where - * the contents of VRAM might be lost. - * - * Returns: - * 0 on success, negative error code on failure. - */ -static int amdgpu_device_recover_vram(struct amdgpu_device *adev) -{ - struct dma_fence *fence = NULL, *next = NULL; - struct amdgpu_bo *shadow; - struct amdgpu_bo_vm *vmbo; - long r = 1, tmo; - - if (amdgpu_sriov_runtime(adev)) - tmo = msecs_to_jiffies(8000); - else - tmo = msecs_to_jiffies(100); - - dev_info(adev->dev, "recover vram bo from shadow start\n"); - mutex_lock(&adev->shadow_list_lock); - list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) { - /* If vm is compute context or adev is APU, shadow will be NULL */ - if (!vmbo->shadow) - continue; - shadow = vmbo->shadow; - - /* No need to recover an evicted BO */ - if (!shadow->tbo.resource || - shadow->tbo.resource->mem_type != TTM_PL_TT || - shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || - shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM) - continue; - - r = amdgpu_bo_restore_shadow(shadow, &next); - if (r) - break; - - if (fence) { - tmo = dma_fence_wait_timeout(fence, false, tmo); - dma_fence_put(fence); - fence = next; - if (tmo == 0) { - r = -ETIMEDOUT; - break; - } else if (tmo < 0) { - r = tmo; - break; - } - } else { - fence = next; - } - } - mutex_unlock(&adev->shadow_list_lock); - - if (fence) - tmo = dma_fence_wait_timeout(fence, false, tmo); - dma_fence_put(fence); - - if (r < 0 || tmo <= 0) { - dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); - return -EIO; - } - - dev_info(adev->dev, "recover vram bo from shadow done\n"); - return 0; -} - - /** * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * @@ -5165,12 +5088,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; - if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { + if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) amdgpu_inc_vram_lost(adev); - r = amdgpu_device_recover_vram(adev); - } - if (r) - return r; /* need to be called during full access so we can't do it later like * bare-metal does. @@ -5569,9 +5488,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, } } - if (!r) - r = amdgpu_device_recover_vram(tmp_adev); - else + if (r) tmp_adev->asic_reset_res = r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e32161f6b67a..a987f671b1d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -77,24 +77,6 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_destroy(tbo); } -static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo; - struct amdgpu_bo_vm *vmbo; - - bo = shadow_bo->parent; - vmbo = to_amdgpu_bo_vm(bo); - /* in case amdgpu_device_recover_vram got NULL of bo->parent */ - if (!list_empty(&vmbo->shadow_list)) { - mutex_lock(&adev->shadow_list_lock); - list_del_init(&vmbo->shadow_list); - mutex_unlock(&adev->shadow_list_lock); - } - - amdgpu_bo_destroy(tbo); -} - /** * amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo * @bo: buffer object to be checked @@ -108,8 +90,7 @@ static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) { if (bo->destroy == &amdgpu_bo_destroy || - bo->destroy == &amdgpu_bo_user_destroy || - bo->destroy == &amdgpu_bo_vm_destroy) + bo->destroy == &amdgpu_bo_user_destroy) return true; return false; @@ -722,52 +703,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev, return r; } -/** - * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list - * - * @vmbo: BO that will be inserted into the shadow list - * - * Insert a BO to the shadow list. - */ -void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(vmbo->bo.tbo.bdev); - - mutex_lock(&adev->shadow_list_lock); - list_add_tail(&vmbo->shadow_list, &adev->shadow_list); - vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo); - vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy; - mutex_unlock(&adev->shadow_list_lock); -} - -/** - * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow - * - * @shadow: &amdgpu_bo shadow to be restored - * @fence: dma_fence associated with the operation - * - * Copies a buffer object's shadow content back to the object. - * This is used for recovering a buffer from its shadow in case of a gpu - * reset where vram context may be lost. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence) - -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev); - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - uint64_t shadow_addr, parent_addr; - - shadow_addr = amdgpu_bo_gpu_offset(shadow); - parent_addr = amdgpu_bo_gpu_offset(shadow->parent); - - return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, - amdgpu_bo_size(shadow), NULL, fence, - true, false, 0); -} - /** * amdgpu_bo_kmap - map an &amdgpu_bo buffer object * @bo: &amdgpu_bo buffer object to be mapped diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index d7e27957013f..e42c34a3289d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -136,8 +136,6 @@ struct amdgpu_bo_user { struct amdgpu_bo_vm { struct amdgpu_bo bo; - struct amdgpu_bo *shadow; - struct list_head shadow_list; struct amdgpu_vm_bo_base entries[]; }; @@ -275,22 +273,6 @@ static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED; } -/** - * amdgpu_bo_shadowed - check if the BO is shadowed - * - * @bo: BO to be tested. - * - * Returns: - * NULL if not shadowed or else return a BO pointer. - */ -static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo) -{ - if (bo->tbo.type == ttm_bo_type_kernel) - return to_amdgpu_bo_vm(bo)->shadow; - - return NULL; -} - bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain); @@ -349,9 +331,6 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); void amdgpu_bo_get_memory(struct amdgpu_bo *bo, struct amdgpu_mem_stats *stats); -void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo); -int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, - struct dma_fence **fence); uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 2452dfa6314f..985b89242b44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -465,7 +465,6 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, { uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); struct amdgpu_vm_bo_base *bo_base; - struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r; @@ -486,16 +485,10 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_unlock(&vm->status_lock); bo = bo_base->bo; - shadow = amdgpu_bo_shadowed(bo); r = validate(param, bo); if (r) return r; - if (shadow) { - r = validate(param, shadow); - if (r) - return r; - } if (bo->tbo.type != ttm_bo_type_kernel) { amdgpu_vm_bo_moved(bo_base); @@ -2149,10 +2142,6 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, { struct amdgpu_vm_bo_base *bo_base; - /* shadow bo doesn't have bo base, its validation needs its parent */ - if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo)) - bo = bo->parent; - for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; @@ -2482,7 +2471,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, root_bo = amdgpu_bo_ref(&root->bo); r = amdgpu_bo_reserve(root_bo, true); if (r) { - amdgpu_bo_unref(&root->shadow); amdgpu_bo_unref(&root_bo); goto error_free_delayed; } @@ -2575,11 +2563,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->last_update = dma_fence_get_stub(); vm->is_compute_context = true; - /* Free the shadow bo for compute VM */ - amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow); - - goto unreserve_bo; - unreserve_bo: amdgpu_bo_unreserve(vm->root.bo); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index a076f43097e4..f78a0434a48f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -383,14 +383,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) return r; - if (vmbo->shadow) { - struct amdgpu_bo *shadow = vmbo->shadow; - - r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); - if (r) - return r; - } - if (!drm_dev_enter(adev_to_drm(adev), &idx)) return -ENODEV; @@ -448,10 +440,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) { struct amdgpu_bo_param bp; - struct amdgpu_bo *bo; - struct dma_resv *resv; unsigned int num_entries; - int r; memset(&bp, 0, sizeof(bp)); @@ -484,42 +473,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (vm->root.bo) bp.resv = vm->root.bo->tbo.base.resv; - r = amdgpu_bo_create_vm(adev, &bp, vmbo); - if (r) - return r; - - bo = &(*vmbo)->bo; - if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { - (*vmbo)->shadow = NULL; - return 0; - } - - if (!bp.resv) - WARN_ON(dma_resv_lock(bo->tbo.base.resv, - NULL)); - resv = bp.resv; - memset(&bp, 0, sizeof(bp)); - bp.size = amdgpu_vm_pt_size(adev, level); - bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; - bp.type = ttm_bo_type_kernel; - bp.resv = bo->tbo.base.resv; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - bp.xcp_id_plus1 = xcp_id + 1; - - r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); - - if (!resv) - dma_resv_unlock(bo->tbo.base.resv); - - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - amdgpu_bo_add_to_shadow_list(*vmbo); - - return 0; + return amdgpu_bo_create_vm(adev, &bp, vmbo); } /** @@ -569,7 +523,6 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, return 0; error_free_pt: - amdgpu_bo_unref(&pt->shadow); amdgpu_bo_unref(&pt_bo); return r; } @@ -581,17 +534,10 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, */ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) { - struct amdgpu_bo *shadow; - if (!entry->bo) return; entry->bo->vm_bo = NULL; - shadow = amdgpu_bo_shadowed(entry->bo); - if (shadow) { - ttm_bo_set_bulk_move(&shadow->tbo, NULL); - amdgpu_bo_unref(&shadow); - } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); spin_lock(&entry->vm->status_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 4772fba33285..46d9fb433ab2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -35,16 +35,7 @@ */ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) { - int r; - - r = amdgpu_ttm_alloc_gart(&table->bo.tbo); - if (r) - return r; - - if (table->shadow) - r = amdgpu_ttm_alloc_gart(&table->shadow->tbo); - - return r; + return amdgpu_ttm_alloc_gart(&table->bo.tbo); } /* Allocate a new job for @count PTE updates */ @@ -265,17 +256,13 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, if (!p->pages_addr) { /* set page commands needed */ - if (vmbo->shadow) - amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr, - count, incr, flags); amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, incr, flags); return 0; } /* copy commands needed */ - ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * - (vmbo->shadow ? 2 : 1); + ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; /* for padding */ ndw -= 7; @@ -290,8 +277,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, pte[i] |= flags; } - if (vmbo->shadow) - amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes); amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes); pe += nptes * 8; From 151b1813919d4ab932e69ca4032761ee0789b04c Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Thu, 18 Jul 2024 18:09:17 +0800 Subject: [PATCH 346/655] drm/amd/pm: Update SMUv13.0.6 PMFW headers Update PMFW interface headers for updated metrics table with gfx activity per xcd Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index 0b3c2f54a343..822c6425d90e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -123,7 +123,7 @@ typedef enum { VOLTAGE_GUARDBAND_COUNT } GFX_GUARDBAND_e; -#define SMU_METRICS_TABLE_VERSION 0xC +#define SMU_METRICS_TABLE_VERSION 0xD typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; @@ -227,6 +227,10 @@ typedef struct __attribute__((packed, aligned(4))) { // PCIE LINK Speed and width uint32_t PCIeLinkSpeed; uint32_t PCIeLinkWidth; + + // PER XCD ACTIVITY + uint32_t GfxBusy[8]; + uint64_t GfxBusyAcc[8]; } MetricsTableX_t; typedef struct __attribute__((packed, aligned(4))) { From 2ae6cd583c4c86c2b7e879b07effb8ffb10756bc Mon Sep 17 00:00:00 2001 From: Le Ma Date: Tue, 10 Sep 2024 17:53:42 +0800 Subject: [PATCH 347/655] drm/amdgpu: add psp funcs callback to check if aux fw is needed Query pmfw version to determine if aux sos fw needs to be loaded in psp v13.0. v2: refine callback to check if aux_fw loading is needed instead of getting pmfw version barely v3: return the comparison directly Signed-off-by: Le Ma Reviewed-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 17 +++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 74a96516c913..e8abbbcb4326 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -138,6 +138,7 @@ struct psp_funcs { int (*vbflash_stat)(struct psp_context *psp); int (*fatal_error_recovery_quirk)(struct psp_context *psp); bool (*get_ras_capability)(struct psp_context *psp); + bool (*is_aux_sos_load_required)(struct psp_context *psp); }; struct ta_funcs { @@ -464,6 +465,9 @@ struct amdgpu_psp_funcs { ((psp)->funcs->fatal_error_recovery_quirk ? \ (psp)->funcs->fatal_error_recovery_quirk((psp)) : 0) +#define psp_is_aux_sos_load_required(psp) \ + ((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 1251ee38a676..51e470e8d67d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -81,6 +81,8 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin"); /* memory training timeout define */ #define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 +#define regMP1_PUB_SCRATCH0 0x3b10090 + static int psp_v13_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -807,6 +809,20 @@ static bool psp_v13_0_get_ras_capability(struct psp_context *psp) } } +static bool psp_v13_0_is_aux_sos_load_required(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + u32 pmfw_ver; + + if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) + return false; + + /* load 4e version of sos if pmfw version less than 85.115.0 */ + pmfw_ver = RREG32(regMP1_PUB_SCRATCH0 / 4); + + return (pmfw_ver < 0x557300); +} + static const struct psp_funcs psp_v13_0_funcs = { .init_microcode = psp_v13_0_init_microcode, .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state, @@ -830,6 +846,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .vbflash_stat = psp_v13_0_vbflash_status, .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk, .get_ras_capability = psp_v13_0_get_ras_capability, + .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required, }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) From 2778701b165eda674756537054d460fb4b0cf2e2 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Tue, 10 Sep 2024 20:10:45 +0800 Subject: [PATCH 348/655] drm/amdgpu: load sos binary properly on the basis of pmfw version To be compatible with legacy IFWI, driver needs to carry legacy tOS and query pmfw version to load them accordingly. Add psp_firmware_header_v2_1 to handle the combined sos binary. Double the sos count limit for the case of aux sos fw packed. v2: pass the correct fw_bin_desc to parse_sos_bin_descriptor Signed-off-by: Le Ma Reviewed-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 29 ++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 11 ++++++++- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e9e599ff3bd4..0b28b2cf1517 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3425,9 +3425,11 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; const struct psp_firmware_header_v1_3 *sos_hdr_v1_3; const struct psp_firmware_header_v2_0 *sos_hdr_v2_0; - int err = 0; + const struct psp_firmware_header_v2_1 *sos_hdr_v2_1; + int fw_index, fw_bin_count, start_index = 0; + const struct psp_fw_bin_desc *fw_bin; uint8_t *ucode_array_start_addr; - int fw_index = 0; + int err = 0; err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name); if (err) @@ -3478,15 +3480,30 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) case 2: sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data; - if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) { + fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); + + if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) { dev_err(adev->dev, "packed SOS count exceeds maximum limit\n"); err = -EINVAL; goto out; } - for (fw_index = 0; fw_index < le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) { - err = parse_sos_bin_descriptor(psp, - &sos_hdr_v2_0->psp_fw_bin[fw_index], + if (sos_hdr_v2_0->header.header_version_minor == 1) { + sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 *)adev->psp.sos_fw->data; + + fw_bin = sos_hdr_v2_1->psp_fw_bin; + + if (psp_is_aux_sos_load_required(psp)) + start_index = le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index); + else + fw_bin_count -= le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index); + + } else { + fw_bin = sos_hdr_v2_0->psp_fw_bin; + } + + for (fw_index = start_index; fw_index < fw_bin_count; fw_index++) { + err = parse_sos_bin_descriptor(psp, fw_bin + fw_index, sos_hdr_v2_0); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 5bc37acd3981..4e23419b92d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -136,6 +136,14 @@ struct psp_firmware_header_v2_0 { struct psp_fw_bin_desc psp_fw_bin[]; }; +/* version_major=2, version_minor=1 */ +struct psp_firmware_header_v2_1 { + struct common_firmware_header header; + uint32_t psp_fw_bin_count; + uint32_t psp_aux_fw_bin_index; + struct psp_fw_bin_desc psp_fw_bin[]; +}; + /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; @@ -426,6 +434,7 @@ union amdgpu_firmware_header { struct psp_firmware_header_v1_1 psp_v1_1; struct psp_firmware_header_v1_3 psp_v1_3; struct psp_firmware_header_v2_0 psp_v2_0; + struct psp_firmware_header_v2_0 psp_v2_1; struct ta_firmware_header_v1_0 ta; struct ta_firmware_header_v2_0 ta_v2_0; struct gfx_firmware_header_v1_0 gfx; @@ -447,7 +456,7 @@ union amdgpu_firmware_header { uint8_t raw[0x100]; }; -#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) +#define UCODE_MAX_PSP_PACKAGING (((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2) /* * fw loading support From c03fca619fc687338a3b6511fdbed94096abdf79 Mon Sep 17 00:00:00 2001 From: Robin Chen Date: Fri, 23 Aug 2024 15:00:28 +0800 Subject: [PATCH 349/655] drm/amd/display: Round calculated vtotal [WHY] The calculated vtotal may has 1 line deviation. To get precisely vtotal number, round the vtotal result. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Anthony Koo Signed-off-by: Robin Chen Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index a40e6590215a..bbd259cea4f4 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -134,7 +134,7 @@ unsigned int mod_freesync_calc_v_total_from_refresh( v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), - stream->timing.h_total), 1000000); + stream->timing.h_total) + 500000, 1000000); /* v_total cannot be less than nominal */ if (v_total < stream->timing.v_total) { From c2ed7002c0614c5eab6c8f62a7a76be5df5805cf Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Fri, 23 Aug 2024 16:57:33 -0400 Subject: [PATCH 350/655] drm/amd/display: Use SDR white level to calculate matrix coefficients [WHY] Certain profiles have higher HDR multiplier than SDR white level max which is not currently supported. [HOW] Use SDR white level when calculating matrix coefficients for HDR RGB MPO path instead of HDR multiplier. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Jun Lei Signed-off-by: Samson Tam Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 12 ++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 3 +++ .../gpu/drm/amd/display/dc/dc_spl_translate.c | 9 +-------- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 17 +++++++++++------ .../gpu/drm/amd/display/dc/spl/dc_spl_types.h | 2 +- 5 files changed, 28 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ae788154896c..243928b0a39f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2596,6 +2596,12 @@ static enum surface_update_type det_surface_update(const struct dc *dc, elevate_update_type(&overall_type, UPDATE_TYPE_MED); } + if (u->sdr_white_level_nits) + if (u->sdr_white_level_nits != u->surface->sdr_white_level_nits) { + update_flags->bits.sdr_white_level_nits = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FULL); + } + if (u->cm2_params) { if ((u->cm2_params->component_settings.shaper_3dlut_setting != u->surface->mcm_shaper_3dlut_setting) @@ -2876,6 +2882,10 @@ static void copy_surface_update_to_plane( surface->hdr_mult = srf_update->hdr_mult; + if (srf_update->sdr_white_level_nits) + surface->sdr_white_level_nits = + srf_update->sdr_white_level_nits; + if (srf_update->blend_tf) memcpy(&surface->blend_tf, srf_update->blend_tf, sizeof(surface->blend_tf)); @@ -4679,6 +4689,8 @@ static bool full_update_required(struct dc *dc, srf_updates[i].scaling_info || (srf_updates[i].hdr_mult.value && srf_updates[i].hdr_mult.value != srf_updates->surface->hdr_mult.value) || + (srf_updates[i].sdr_white_level_nits && + srf_updates[i].sdr_white_level_nits != srf_updates->surface->sdr_white_level_nits) || srf_updates[i].in_transfer_func || srf_updates[i].func_shaper || srf_updates[i].lut3d_func || diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4c94dd38be4b..dcf8a90e961d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1269,6 +1269,7 @@ union surface_update_flags { uint32_t tmz_changed:1; uint32_t mcm_transfer_function_enable_change:1; /* disable or enable MCM transfer func */ uint32_t full_update:1; + uint32_t sdr_white_level_nits:1; } bits; uint32_t raw; @@ -1351,6 +1352,7 @@ struct dc_plane_state { bool adaptive_sharpness_en; int sharpness_level; enum linear_light_scaling linear_light_scaling; + unsigned int sdr_white_level_nits; }; struct dc_plane_info { @@ -1508,6 +1510,7 @@ struct dc_surface_update { */ struct dc_cm2_parameters *cm2_params; const struct dc_csc_transform *cursor_csc_color_matrix; + unsigned int sdr_white_level_nits; }; /* diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index cd6de93eb91c..f711fc2e3e65 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -191,14 +191,7 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl */ spl_in->is_fullscreen = dm_helpers_is_fullscreen(pipe_ctx->stream->ctx, pipe_ctx->stream); spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream); - spl_in->hdr_multx100 = 0; - if (spl_in->is_hdr_on) { - spl_in->hdr_multx100 = (uint32_t)dc_fixpt_floor(dc_fixpt_mul(plane_state->hdr_mult, - dc_fixpt_from_int(100))); - /* Disable sharpness for HDR Mult > 6.0 */ - if (spl_in->hdr_multx100 > 600) - spl_in->adaptive_sharpness.enable = false; - } + spl_in->sdr_white_level_nits = plane_state->sdr_white_level_nits; } /// @brief Translate SPL output parameters to pipe context diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 15f7eda903e6..a59aa6b59687 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -1155,14 +1155,19 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *sp } /* Calculate C0-C3 coefficients based on HDR_mult */ -static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t hdr_multx100) +static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t sdr_white_level_nits) { struct spl_fixed31_32 hdr_mult, c0_mult, c1_mult, c2_mult; struct spl_fixed31_32 c0_calc, c1_calc, c2_calc; struct spl_custom_float_format fmt; + uint32_t hdr_multx100_int; - SPL_ASSERT(hdr_multx100); - hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100, 100LL); + if ((sdr_white_level_nits >= 80) && (sdr_white_level_nits <= 480)) + hdr_multx100_int = sdr_white_level_nits * 100 / 80; + else + hdr_multx100_int = 100; /* default for 80 nits otherwise */ + + hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100_int, 100LL); c0_mult = spl_fixpt_from_fraction(2126LL, 10000LL); c1_mult = spl_fixpt_from_fraction(7152LL, 10000LL); c2_mult = spl_fixpt_from_fraction(722LL, 10000LL); @@ -1191,7 +1196,7 @@ static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint3 static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *spl_out, bool enable_easf_v, bool enable_easf_h, enum linear_light_scaling lls_pref, enum spl_pixel_format format, enum system_setup setup, - uint32_t hdr_multx100) + uint32_t sdr_white_level_nits) { struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; if (enable_easf_v) { @@ -1499,7 +1504,7 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s dscl_prog_data->easf_ltonl_en = 1; // Linear input if ((setup == HDR_L) && (spl_is_rgb8(format))) { /* Calculate C0-C3 coefficients based on HDR multiplier */ - spl_calculate_c0_c3_hdr(dscl_prog_data, hdr_multx100); + spl_calculate_c0_c3_hdr(dscl_prog_data, sdr_white_level_nits); } else { // HDR_L ( DWM ) and SDR_L dscl_prog_data->easf_matrix_c0 = 0x4EF7; // fp1.5.10, C0 coefficient (LN_rec709: 0.2126 * (2^14)/125 = 27.86590720) @@ -1750,7 +1755,7 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) // Set EASF spl_set_easf_data(&spl_scratch, spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, - spl_in->basic_in.format, setup, spl_in->hdr_multx100); + spl_in->basic_in.format, setup, spl_in->sdr_white_level_nits); // Set iSHARP vratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.vert); diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 85b19ebe2c57..74f2a8c42f4f 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -518,7 +518,7 @@ struct spl_in { bool is_hdr_on; int h_active; int v_active; - int hdr_multx100; + int sdr_white_level_nits; }; // end of SPL inputs From f588da30a20cf184f150420e4098b694908a4207 Mon Sep 17 00:00:00 2001 From: Ryan Seto Date: Mon, 19 Aug 2024 17:06:56 -0400 Subject: [PATCH 351/655] drm/amd/display: Implement new DPCD register handling [WHY] There are some monitor timings that seem to be supported without DSC but actually require DSC to be displayed. A VESA SCR introduced a new max uncompressed pixel rate cap register that we can use to handle these edge cases. [HOW] SST: Read caps from link and invalidate timings that exceed the max limit but do not support DSC. Then check for options override when determining BPP. MST: Read caps from virtual DPCD peer device or daisy chained SST monitor and set validation set BPPs to max if pixel rate exceeds uncompressed limit. Validation set optimization continues as normal. Reviewed-by: Wenjing Liu Signed-off-by: Ryan Seto Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 12 ++++++++++++ drivers/gpu/drm/amd/display/dc/dc_dsc.h | 1 + drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 10 +++++----- .../gpu/drm/amd/display/dc/link/link_validation.c | 7 +++++++ .../display/dc/link/protocols/link_dp_capability.c | 5 +++++ 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 519c3df78ee5..41bd95e9177a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -969,6 +969,14 @@ union dp_sink_video_fallback_formats { uint8_t raw; }; +union dpcd_max_uncompressed_pixel_rate_cap { + struct { + uint16_t max_uncompressed_pixel_rate_cap :15; + uint16_t valid :1; + } bits; + uint8_t raw[2]; +}; + union dp_fec_capability1 { struct { uint8_t AGGREGATED_ERROR_COUNTERS_CAPABLE :1; @@ -1170,6 +1178,7 @@ struct dpcd_caps { struct dc_lttpr_caps lttpr_caps; struct adaptive_sync_caps adaptive_sync_caps; struct dpcd_usb4_dp_tunneling_info usb4_dp_tun_info; + union dpcd_max_uncompressed_pixel_rate_cap max_uncompressed_pixel_rate_cap; union dp_128b_132b_supported_link_rates dp_128b_132b_supported_link_rates; union dp_main_line_channel_coding_cap channel_coding_cap; @@ -1340,6 +1349,9 @@ struct dp_trace { #ifndef DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX #define DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX 0x110 #endif +#ifndef DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP +#define DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP 0x221c +#endif #ifndef DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE #define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50 #endif diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h index fe3078b8789e..2a5120ecf48b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h @@ -59,6 +59,7 @@ struct dc_dsc_config_options { uint32_t max_target_bpp_limit_override_x16; uint32_t slice_height_granularity; uint32_t dsc_force_odm_hslice_override; + bool force_dsc_when_not_needed; }; bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index a1727e5bf024..79c426425911 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -668,6 +668,7 @@ static bool decide_dsc_bandwidth_range( */ static bool decide_dsc_target_bpp_x16( const struct dc_dsc_policy *policy, + const struct dc_dsc_config_options *options, const struct dsc_enc_caps *dsc_common_caps, const int target_bandwidth_kbps, const struct dc_crtc_timing *timing, @@ -682,7 +683,7 @@ static bool decide_dsc_target_bpp_x16( if (decide_dsc_bandwidth_range(policy->min_target_bpp * 16, policy->max_target_bpp * 16, num_slices_h, dsc_common_caps, timing, link_encoding, &range)) { if (target_bandwidth_kbps >= range.stream_kbps) { - if (policy->enable_dsc_when_not_needed) + if (policy->enable_dsc_when_not_needed || options->force_dsc_when_not_needed) /* enable max bpp even dsc is not needed */ *target_bpp_x16 = range.max_target_bpp_x16; } else if (target_bandwidth_kbps >= range.max_kbps) { @@ -1080,6 +1081,7 @@ static bool setup_dsc_config( if (target_bandwidth_kbps > 0) { is_dsc_possible = decide_dsc_target_bpp_x16( &policy, + options, &dsc_common_caps, target_bandwidth_kbps, timing, @@ -1235,10 +1237,7 @@ void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, policy->max_target_bpp = max_target_bpp_limit_override_x16 / 16; /* enable DSC when not needed, default false */ - if (dsc_policy_enable_dsc_when_not_needed) - policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed; - else - policy->enable_dsc_when_not_needed = false; + policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed; } void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit) @@ -1267,4 +1266,5 @@ void dc_dsc_get_default_config_option(const struct dc *dc, struct dc_dsc_config_ options->dsc_force_odm_hslice_override = dc->debug.force_odm_combine; options->max_target_bpp_limit_override_x16 = 0; options->slice_height_granularity = 1; + options->force_dsc_when_not_needed = false; } diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index 1aed55b0ab6a..60f15a9ba7a5 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -287,6 +287,13 @@ static bool dp_validate_mode_timing( req_bw = dc_bandwidth_in_kbps_from_timing(timing, dc_link_get_highest_encoding_format(link)); max_bw = dp_link_bandwidth_kbps(link, link_setting); + bool is_max_uncompressed_pixel_rate_exceeded = link->dpcd_caps.max_uncompressed_pixel_rate_cap.bits.valid && + timing->pix_clk_100hz > link->dpcd_caps.max_uncompressed_pixel_rate_cap.bits.max_uncompressed_pixel_rate_cap * 10000; + + if (is_max_uncompressed_pixel_rate_exceeded && !timing->flags.DSC) { + return false; + } + if (req_bw <= max_bw) { /* remember the biggest mode here, during * initial link training (to get diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 34a618a7278b..d78c8ec4de79 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1942,6 +1942,11 @@ static bool retrieve_link_cap(struct dc_link *link) DC_LOG_DP2("\tFEC aggregated error counters are supported"); } + core_link_read_dpcd(link, + DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP, + link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw, + sizeof(link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw)); + retrieve_cable_id(link); dpcd_write_cable_id_to_dprx(link); From d18a56b247f4f3f7dbdd3adeeebd05c23f1e3d3e Mon Sep 17 00:00:00 2001 From: Daniel Sa Date: Fri, 23 Aug 2024 11:29:23 -0400 Subject: [PATCH 352/655] drm/amd/display: Emulate Display Hotplug Hang [WHY] Driver reports 0 display when the virtual display is still present, and causes P-state hang in FW. [HOW] When enumerating through streams, check for active planes and use that to indicate number of displays. Reviewed-by: Dillon Varone Signed-off-by: Daniel Sa Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index f770828df149..0e243f4344d0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -59,6 +59,7 @@ int clk_mgr_helper_get_active_display_cnt( display_count = 0; for (i = 0; i < context->stream_count; i++) { const struct dc_stream_state *stream = context->streams[i]; + const struct dc_stream_status *stream_status = &context->stream_status[i]; /* Don't count SubVP phantom pipes as part of active * display count @@ -66,13 +67,7 @@ int clk_mgr_helper_get_active_display_cnt( if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) continue; - /* - * Only notify active stream or virtual stream. - * Need to notify virtual stream to work around - * headless case. HPD does not fire when system is in - * S0i2. - */ - if (!stream->dpms_off || stream->signal == SIGNAL_TYPE_VIRTUAL) + if (!stream->dpms_off || (stream_status && stream_status->plane_count)) display_count++; } From 5a3d3e11349c2e298c0b6b4d37c8241f44d37e3d Mon Sep 17 00:00:00 2001 From: Roman Li Date: Wed, 21 Aug 2024 10:53:15 -0400 Subject: [PATCH 353/655] drm/amd/display: Add dmub hpd sense callback [WHY] HPD sense notification has been implemented in DMUB, which can occur during low power states and need to be notified from firmware to driver. [HOW] Define callback and register new HPD sense notification. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Roman Li Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 20 +++++++++++++++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0cff66735cfe..4ab329e00714 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -807,6 +807,20 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, } } +/** + * dmub_hpd_sense_callback - DMUB HPD sense processing callback. + * @adev: amdgpu_device pointer + * @notify: dmub notification structure + * + * HPD sense changes can occur during low power states and need to be + * notified from firmware to driver. + */ +static void dmub_hpd_sense_callback(struct amdgpu_device *adev, + struct dmub_notification *notify) +{ + DRM_DEBUG_DRIVER("DMUB HPD SENSE callback.\n"); +} + /** * register_dmub_notify_callback - Sets callback for DMUB notify * @adev: amdgpu_device pointer @@ -3808,6 +3822,12 @@ static int register_hpd_handlers(struct amdgpu_device *adev) DRM_ERROR("amdgpu: fail to register dmub hpd callback"); return -EINVAL; } + + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY, + dmub_hpd_sense_callback, true)) { + DRM_ERROR("amdgpu: fail to register dmub hpd sense callback"); + return -EINVAL; + } } list_for_each_entry(connector, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 2d7755e2b6c3..15d4690c74d6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -50,7 +50,7 @@ #define AMDGPU_DM_MAX_NUM_EDP 2 -#define AMDGPU_DMUB_NOTIFICATION_MAX 6 +#define AMDGPU_DMUB_NOTIFICATION_MAX 7 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40 From f57b77d667dc6bd2b114d08d04b03869539209f6 Mon Sep 17 00:00:00 2001 From: Yihan Zhu Date: Mon, 26 Aug 2024 14:44:04 -0400 Subject: [PATCH 354/655] drm/amd/display: Enable DML2 override_det_buffer_size_kbytes [WHY] Corrupted screen will be observed when 4k144 DP/HDMI display and 4k144 eDP are connected, changing eDP refresh rate from 60Hz to 144Hz. [HOW] override_det_buffer_size_kbytes should be true for DCN35/DCN351. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Roman Li Reviewed-by: Nicholas Kazlauskas Signed-off-by: Yihan Zhu Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 1 + drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 46ad684fe192..893a9d9ee870 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -2155,6 +2155,7 @@ static bool dcn35_resource_construct( dc->dml2_options.max_segments_per_hubp = 24; dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/ + dc->dml2_options.override_det_buffer_size_kbytes = true; if (dc->config.sdpif_request_limit_words_per_umc == 0) dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 4c5e722baa3a..514c6d56925d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -2133,6 +2133,7 @@ static bool dcn351_resource_construct( dc->dml2_options.max_segments_per_hubp = 24; dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/ + dc->dml2_options.override_det_buffer_size_kbytes = true; if (dc->config.sdpif_request_limit_words_per_umc == 0) dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/ From 0765b2afc1118a6ab5fee624e206c782d70db28a Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Mon, 26 Aug 2024 17:08:33 -0400 Subject: [PATCH 355/655] drm/amd/display: Block timing sync for different output formats in pmo [WHY & HOW] If the output format is different for HDMI TMDS signals, they are not synchronizable. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index d63558ee3135..1cf9015e854a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -940,9 +940,11 @@ static void build_synchronized_timing_groups( /* find synchronizable timing groups */ for (j = i + 1; j < display_config->display_config.num_streams; j++) { if (memcmp(master_timing, - &display_config->display_config.stream_descriptors[j].timing, - sizeof(struct dml2_timing_cfg)) == 0 && - display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder) { + &display_config->display_config.stream_descriptors[j].timing, + sizeof(struct dml2_timing_cfg)) == 0 && + display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder && + (display_config->display_config.stream_descriptors[i].output.output_encoder != dml2_hdmi || //hdmi requires formats match + display_config->display_config.stream_descriptors[i].output.output_format == display_config->display_config.stream_descriptors[j].output.output_format)) { set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j); set_bit_in_bitfield(&stream_mapped_mask, j); } From 09cb922c4e14e6531979bff4e6bb3babcd9cb188 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Tue, 27 Aug 2024 11:53:10 -0400 Subject: [PATCH 356/655] drm/amd/display: Add debug options to change sharpen policies [WHY] Add options to change sharpen policy based on surface format and scaling ratios. [HOW] Add sharpen_policy to change policy based on surface format and scale_to_sharpness_policy based on scaling ratios. Reviewed-by: Jun Lei Signed-off-by: Samson Tam Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 + .../gpu/drm/amd/display/dc/dc_spl_translate.c | 5 ++ drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 34 ++++++-- .../display/dc/spl/dc_spl_isharp_filters.c | 85 +++++++++++++++++-- .../display/dc/spl/dc_spl_isharp_filters.h | 9 +- .../gpu/drm/amd/display/dc/spl/dc_spl_types.h | 12 +++ 6 files changed, 128 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index dcf8a90e961d..7c4812dd1a71 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1056,6 +1056,8 @@ struct dc_debug_options { unsigned int force_lls; bool notify_dpia_hr_bw; bool enable_ips_visual_confirm; + unsigned int sharpen_policy; + unsigned int scale_to_sharpness_policy; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index f711fc2e3e65..603552dbd771 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -186,6 +186,11 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active; spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active; + + spl_in->debug.sharpen_policy = (enum sharpen_policy)pipe_ctx->stream->ctx->dc->debug.sharpen_policy; + spl_in->debug.scale_to_sharpness_policy = + (enum scale_to_sharpness_policy)pipe_ctx->stream->ctx->dc->debug.scale_to_sharpness_policy; + /* Check if it is stream is in fullscreen and if its HDR. * Use this to determine sharpness levels */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index a59aa6b59687..f7a654b3a092 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -813,6 +813,14 @@ static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch) return skip_easf; } +/* Check if video is in fullscreen mode */ +static bool spl_is_video_fullscreen(struct spl_in *spl_in) +{ + if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen) + return true; + return false; +} + static bool spl_get_isharp_en(struct spl_in *spl_in, struct spl_scratch *spl_scratch) { @@ -820,6 +828,7 @@ static bool spl_get_isharp_en(struct spl_in *spl_in, int vratio = 0; int hratio = 0; struct spl_taps taps = spl_scratch->scl_data.taps; + bool fullscreen = spl_is_video_fullscreen(spl_in); /* Return if adaptive sharpness is disabled */ if (spl_in->adaptive_sharpness.enable == false) @@ -835,9 +844,15 @@ static bool spl_get_isharp_en(struct spl_in *spl_in, // Scaling is up to 1:1 (no scaling) or upscaling /* - * Apply sharpness to all RGB surfaces and to - * NV12/P010 surfaces + * Apply sharpness to RGB and YUV (NV12/P010) + * surfaces based on policy setting */ + if (!spl_is_yuv420(spl_in->basic_in.format) && + (spl_in->debug.sharpen_policy == SHARPEN_YUV)) + return enable_isharp; + else if ((spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) && + (spl_in->debug.sharpen_policy == SHARPEN_RGB_FULLSCREEN_YUV)) + return enable_isharp; /* * Apply sharpness if supports horizontal taps 4,6 AND @@ -1562,7 +1577,7 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, struct adaptive_sharpness adp_sharpness, bool enable_isharp, enum linear_light_scaling lls_pref, enum spl_pixel_format format, const struct spl_scaler_data *data, struct spl_fixed31_32 ratio, - enum system_setup setup) + enum system_setup setup, enum scale_to_sharpness_policy scale_to_sharpness_policy) { /* Turn off sharpener if not required */ if (!enable_isharp) { @@ -1570,6 +1585,11 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, return; } + spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness, + scale_to_sharpness_policy); + dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut(setup); + dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level; + dscl_prog_data->isharp_en = 1; // ISHARP_EN // Set ISHARP_NOISEDET_MODE if htaps = 6-tap if (data->taps.h_taps == 6) { @@ -1667,11 +1687,6 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format } - - spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness); - dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut(setup); - dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level; - // Program the nldelta soft clip values if (lls_pref == LLS_PREF_YES) { dscl_prog_data->isharp_nldelta_sclip.enable_p = 0; /* ISHARP_NLDELTA_SCLIP_EN_P */ @@ -1766,7 +1781,8 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.horz; spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp, - spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup); + spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup, + spl_in->debug.scale_to_sharpness_policy); return res; } diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c index 33712f50d303..e0572252c640 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -500,6 +500,15 @@ struct isharp_1D_lut_pregen filter_isharp_1D_lut_pregen[NUM_SHARPNESS_SETUPS] = }, }; +struct scale_ratio_to_sharpness_level_adj sharpness_level_adj[NUM_SHARPNESS_ADJ_LEVELS] = { + {1125, 1000, 0}, + {11, 10, 1}, + {1075, 1000, 2}, + {105, 100, 3}, + {1025, 1000, 4}, + {1, 1, 5}, +}; + const uint32_t *spl_get_filter_isharp_1D_lut_0(void) { return filter_isharp_1D_lut_0; @@ -541,19 +550,72 @@ uint16_t *spl_get_filter_isharp_bs_3tap_64p(void) return filter_isharp_bs_3tap_64p_s1_12; } -static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level, enum system_setup setup, - struct spl_sharpness_range sharpness_range) +static unsigned int spl_calculate_sharpness_level_adj(struct spl_fixed31_32 ratio) +{ + int j; + struct spl_fixed31_32 ratio_level; + struct scale_ratio_to_sharpness_level_adj *lookup_ptr; + unsigned int sharpness_level_down_adj; + + /* + * Adjust sharpness level based on current scaling ratio + * + * We have 5 discrete scaling ratios which we will use to adjust the + * sharpness level down by 1 as we pass each ratio. The ratios + * are + * + * 1.125 upscale and higher - no adj + * 1.100 - under 1.125 - adj level down 1 + * 1.075 - under 1.100 - adj level down 2 + * 1.050 - under 1.075 - adj level down 3 + * 1.025 - under 1.050 - adj level down 4 + * 1.000 - under 1.025 - adj level down 5 + * + */ + j = 0; + sharpness_level_down_adj = 0; + lookup_ptr = sharpness_level_adj; + while (j < NUM_SHARPNESS_ADJ_LEVELS) { + ratio_level = spl_fixpt_from_fraction(lookup_ptr->ratio_numer, + lookup_ptr->ratio_denom); + if (ratio.value >= ratio_level.value) { + sharpness_level_down_adj = lookup_ptr->level_down_adj; + break; + } + lookup_ptr++; + j++; + } + return sharpness_level_down_adj; +} + +static unsigned int spl_calculate_sharpness_level(struct spl_fixed31_32 ratio, + int discrete_sharpness_level, enum system_setup setup, + struct spl_sharpness_range sharpness_range, + enum scale_to_sharpness_policy scale_to_sharpness_policy) { unsigned int sharpness_level = 0; + unsigned int sharpness_level_down_adj = 0; int min_sharpness, max_sharpness, mid_sharpness; + /* + * Adjust sharpness level if policy requires we adjust it based on + * scale ratio. Based on scale ratio, we may adjust the sharpness + * level down by a certain number of steps. We will not select + * a sharpness value of 0 so the lowest sharpness level will be + * 0 or 1 depending on what the min_sharpness is + * + * If the policy is no required, this code maybe removed at a later + * date + */ switch (setup) { case HDR_L: min_sharpness = sharpness_range.hdr_rgb_min; max_sharpness = sharpness_range.hdr_rgb_max; mid_sharpness = sharpness_range.hdr_rgb_mid; + if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL) + sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio); break; case HDR_NL: /* currently no use case, use Non-linear SDR values for now */ @@ -561,15 +623,26 @@ static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level, min_sharpness = sharpness_range.sdr_yuv_min; max_sharpness = sharpness_range.sdr_yuv_max; mid_sharpness = sharpness_range.sdr_yuv_mid; + if (scale_to_sharpness_policy >= SCALE_TO_SHARPNESS_ADJ_YUV) + sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio); break; case SDR_L: default: min_sharpness = sharpness_range.sdr_rgb_min; max_sharpness = sharpness_range.sdr_rgb_max; mid_sharpness = sharpness_range.sdr_rgb_mid; + if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL) + sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio); break; } + if ((min_sharpness == 0) && (sharpness_level_down_adj >= discrete_sharpness_level)) + discrete_sharpness_level = 1; + else if (sharpness_level_down_adj >= discrete_sharpness_level) + discrete_sharpness_level = 0; + else + discrete_sharpness_level -= sharpness_level_down_adj; + int lower_half_step_size = (mid_sharpness - min_sharpness) / 5; int upper_half_step_size = (max_sharpness - mid_sharpness) / 5; @@ -584,7 +657,7 @@ static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level, } void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, - struct adaptive_sharpness sharpness) + struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy) { uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst; struct spl_fixed31_32 sharp_base, sharp_calc, sharp_level; @@ -594,8 +667,9 @@ void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, en uint32_t filter_pregen_store[ISHARP_LUT_TABLE_SIZE]; /* Custom sharpnessX1000 value */ - unsigned int sharpnessX1000 = spl_calculate_sharpness_level(sharpness.sharpness_level, - setup, sharpness.sharpness_range); + unsigned int sharpnessX1000 = spl_calculate_sharpness_level(ratio, + sharpness.sharpness_level, setup, + sharpness.sharpness_range, scale_to_sharpness_policy); sharp_level = spl_fixpt_from_fraction(sharpnessX1000, 1000); /* @@ -606,7 +680,6 @@ void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, en (filter_isharp_1D_lut_pregen[setup].sharpness_denom == 1000)) return; - /* * Calculate LUT_128_gained with this equation: * diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h index fe0b12571f2c..afcc66206ca2 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h @@ -20,11 +20,11 @@ uint16_t *spl_get_filter_isharp_bs_3tap_64p(void); const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void); uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps); -struct scale_ratio_to_sharpness_level_lookup { +#define NUM_SHARPNESS_ADJ_LEVELS 6 +struct scale_ratio_to_sharpness_level_adj { unsigned int ratio_numer; unsigned int ratio_denom; - unsigned int sharpness_numer; - unsigned int sharpness_denom; + unsigned int level_down_adj; /* adjust sharpness level down */ }; struct isharp_1D_lut_pregen { @@ -45,6 +45,7 @@ void spl_init_blur_scale_coeffs(void); void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, const struct spl_scaler_data *data); -void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, struct adaptive_sharpness sharpness); +void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, + struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy); uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum system_setup setup); #endif /* __DC_SPL_ISHARP_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 74f2a8c42f4f..425d4a282c7a 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -487,6 +487,16 @@ enum linear_light_scaling { // convert it in translation logic LLS_PREF_YES, LLS_PREF_NO }; +enum sharpen_policy { + SHARPEN_ALWAYS = 0, + SHARPEN_YUV = 1, + SHARPEN_RGB_FULLSCREEN_YUV = 2 +}; +enum scale_to_sharpness_policy { + NO_SCALE_TO_SHARPNESS_ADJ = 0, + SCALE_TO_SHARPNESS_ADJ_YUV = 1, + SCALE_TO_SHARPNESS_ADJ_ALL = 2 +}; struct spl_funcs { void (*spl_calc_lb_num_partitions) (bool alpha_en, @@ -499,6 +509,8 @@ struct spl_funcs { struct spl_debug { int visual_confirm_base_offset; int visual_confirm_dpp_offset; + enum sharpen_policy sharpen_policy; + enum scale_to_sharpness_policy scale_to_sharpness_policy; }; struct spl_in { From 401c90c4d64f2227fc2f4c02d2ad23296bf5ca6f Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 27 Aug 2024 14:13:10 -0400 Subject: [PATCH 357/655] drm/amd/display: Block dynamic IPS2 on DCN35 for incompatible FW versions [WHY] Hangs with Z8 can occur if running an older unfixed PMFW version. [HOW] Fallback to RCG only for dynamic IPS2 states if it's not newer than 93.12. Limit to DCN35. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Charlene Liu Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 97164b5585a8..b46a3afe48ca 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -1222,6 +1222,12 @@ void dcn35_clk_mgr_construct( ctx->dc->debug.disable_dpp_power_gate = false; ctx->dc->debug.disable_hubp_power_gate = false; ctx->dc->debug.disable_dsc_power_gate = false; + + /* Disable dynamic IPS2 in older PMFW (93.12) for Z8 interop. */ + if (ctx->dc->config.disable_ips == DMUB_IPS_ENABLE && + ctx->dce_version == DCN_VERSION_3_5 && + ((clk_mgr->base.smu_ver & 0x00FFFFFF) <= 0x005d0c00)) + ctx->dc->config.disable_ips = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; } else { /*let's reset the config control flag*/ ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/ From 3766a840e093d30e1a2522f650d8a6ac892a8719 Mon Sep 17 00:00:00 2001 From: Martin Tsai Date: Mon, 22 Jul 2024 14:12:25 +0800 Subject: [PATCH 358/655] drm/amd/display: Clean up dsc blocks in accelerated mode [WHY] DSC on eDP could be enabled during VBIOS post. The enabled DSC may not be disabled when enter to OS, once the system was in second screen only mode before entering to S4. In this case, OS will not send setTimings to reset eDP path again. The enabled DSC HW will make a new stream without DSC cannot output normally if it reused this pipe with enabled DSC. [HOW] In accelerated mode, to clean up DSC blocks if eDP is on link but not active when we are not in fast boot and seamless boot. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Charlene Liu Signed-off-by: Martin Tsai Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index d52ce58c6a98..aa7479b31898 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -57,6 +57,7 @@ #include "panel_cntl.h" #include "dc_state_priv.h" #include "dpcd_defs.h" +#include "dsc.h" /* include DCE11 register header files */ #include "dce/dce_11_0_d.h" #include "dce/dce_11_0_sh_mask.h" @@ -1823,6 +1824,48 @@ static void get_edp_links_with_sink( } } +static void clean_up_dsc_blocks(struct dc *dc) +{ + struct display_stream_compressor *dsc = NULL; + struct timing_generator *tg = NULL; + struct stream_encoder *se = NULL; + struct dccg *dccg = dc->res_pool->dccg; + struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl; + int i; + + if (dc->ctx->dce_version != DCN_VERSION_3_5 && + dc->ctx->dce_version != DCN_VERSION_3_51) + return; + + for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) { + struct dcn_dsc_state s = {0}; + + dsc = dc->res_pool->dscs[i]; + dsc->funcs->dsc_read_state(dsc, &s); + if (s.dsc_fw_en) { + /* disable DSC in OPTC */ + if (i < dc->res_pool->timing_generator_count) { + tg = dc->res_pool->timing_generators[i]; + tg->funcs->set_dsc_config(tg, OPTC_DSC_DISABLED, 0, 0); + } + /* disable DSC in stream encoder */ + if (i < dc->res_pool->stream_enc_count) { + se = dc->res_pool->stream_enc[i]; + se->funcs->dp_set_dsc_config(se, OPTC_DSC_DISABLED, 0, 0); + se->funcs->dp_set_dsc_pps_info_packet(se, false, NULL, true); + } + /* disable DSC block */ + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, dsc->inst); + dsc->funcs->dsc_disable(dsc); + + /* power down DSC */ + if (pg_cntl != NULL) + pg_cntl->funcs->dsc_pg_control(pg_cntl, dsc->inst, false); + } + } +} + /* * When ASIC goes from VBIOS/VGA mode to driver/accelerated mode we need: * 1. Power down all DC HW blocks @@ -1927,6 +1970,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); power_down_all_hw_blocks(dc); + + /* DSC could be enabled on eDP during VBIOS post. + * To clean up dsc blocks if eDP is in link but not active. + */ + if (edp_link_with_sink && (edp_stream_num == 0)) + clean_up_dsc_blocks(dc); + disable_vga_and_power_gate_all_controllers(dc); if (edp_link_with_sink && !keep_edp_vdd_on) dc->hwss.edp_power_control(edp_link_with_sink, false); From ae5100805f98641ea4112241e350485c97936bbe Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Tue, 27 Aug 2024 14:49:44 -0400 Subject: [PATCH 359/655] drm/amd/display: Disable SYMCLK32_LE root clock gating [WHY & HOW] On display on sequence, enabling SYMCLK32_LE root clock gating causes issue in link training so disabling it is needed. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Signed-off-by: Sung Joon Kim Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 514c6d56925d..da9101b83e8c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -736,7 +736,7 @@ static const struct dc_debug_options debug_defaults_drv = { .hdmichar = true, .dpstream = true, .symclk32_se = true, - .symclk32_le = true, + .symclk32_le = false, .symclk_fe = true, .physymclk = false, .dpiasymclk = true, From cf4cebcec619d963fa7496018f03cb0ff00dc257 Mon Sep 17 00:00:00 2001 From: Peichen Huang Date: Thu, 22 Aug 2024 14:50:07 +0800 Subject: [PATCH 360/655] drm/amd/display: Restructure dpia link training [WHY] We intend to consolidate dp tunneling and conventional dp link training. [HOW] 1. Use the same link training entry for both dp and dpia 2. Move SET_CONFIG of non-transparent mode to dmub side 3. Add set_tps_notification dmub_cmd to notify tps request for non-transparent dpia link training 4. Check dpcd request result and abort link training early if dpia aux tunneling fails 5. Add option to avoid affect old product 6. Separately handle wait_time_microsec for dpia Reviewed-by: Cruise Hung Reviewed-by: George Shen Reviewed-by: Meenakshikumar Somasundaram Signed-off-by: Peichen Huang Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 21 +++++ drivers/gpu/drm/amd/display/dc/dc.h | 6 +- .../amd/display/dc/link/hwss/link_hwss_dpia.c | 31 ++++++- .../dc/link/protocols/link_dp_training.c | 80 ++++++++++++++++--- .../dc/link/protocols/link_dp_training.h | 16 +++- .../link/protocols/link_dp_training_8b_10b.c | 21 ++--- .../dc/link/protocols/link_dp_training_dpia.c | 64 ++++++++------- .../dc/link/protocols/link_dp_training_dpia.h | 19 +++++ drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 1 + .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 25 +++++- .../gpu/drm/amd/display/dmub/src/dmub_dcn35.c | 1 + 11 files changed, 233 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 243928b0a39f..eb7c7681bdd9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5755,6 +5755,27 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc, return DC_OK; } +/** + * dc_process_dmub_dpia_set_tps_notification - Submits tps notification + * + * @dc: [in] dc structure + * @link_index: [in] link index + * @ts: [in] request tps + * + * Submits set_tps_notification command to dmub via inbox message + */ +void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps) +{ + union dmub_rb_cmd cmd = {0}; + + cmd.set_tps_notification.header.type = DMUB_CMD__DPIA; + cmd.set_tps_notification.header.sub_type = DMUB_CMD__DPIA_SET_TPS_NOTIFICATION; + cmd.set_tps_notification.tps_notification.instance = dc->links[link_index]->ddc_hw_inst; + cmd.set_tps_notification.tps_notification.tps = tps; + + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); +} + /** * dc_process_dmub_dpia_hpd_int_enable - Submits DPIA DPD interruption * diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7c4812dd1a71..133cac4d9fc4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -462,6 +462,7 @@ struct dc_config { bool support_edp0_on_dp1; unsigned int enable_fpo_flicker_detection; bool disable_hbr_audio_dp2; + bool consolidated_dpia_dp_lt; }; enum visual_confirm { @@ -762,7 +763,8 @@ union dpia_debug_options { uint32_t disable_mst_dsc_work_around:1; /* bit 3 */ uint32_t enable_force_tbt3_work_around:1; /* bit 4 */ uint32_t disable_usb4_pm_support:1; /* bit 5 */ - uint32_t reserved:26; + uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */ + uint32_t reserved:25; } bits; uint32_t raw; }; @@ -2525,6 +2527,8 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc, uint8_t mst_alloc_slots, uint8_t *mst_slots_in_use); +void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps); + void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc, uint32_t hpd_int_enable); diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c index 46fb3649bc86..6499807af72a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c @@ -50,8 +50,31 @@ static void update_dpia_stream_allocation_table(struct dc_link *link, DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n", status, mst_alloc_slots, prev_mst_slots_in_use); - ASSERT(link_enc); - link_enc->funcs->update_mst_stream_allocation_table(link_enc, table); + if (link_enc) + link_enc->funcs->update_mst_stream_allocation_table(link_enc, table); +} + +static void set_dio_dpia_link_test_pattern(struct dc_link *link, + const struct link_resource *link_res, + struct encoder_set_dp_phy_pattern_param *tp_params) +{ + if (tp_params->dp_phy_pattern != DP_TEST_PATTERN_VIDEO_MODE) + return; + + struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); + + if (!link_enc) + return; + + link_enc->funcs->dp_set_phy_pattern(link_enc, tp_params); + link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_SET_SOURCE_PATTERN); +} + +static void set_dio_dpia_lane_settings(struct dc_link *link, + const struct link_resource *link_res, + const struct dc_link_settings *link_settings, + const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]) +{ } static const struct link_hwss dpia_link_hwss = { @@ -65,8 +88,8 @@ static const struct link_hwss dpia_link_hwss = { .ext = { .set_throttled_vcp_size = set_dio_throttled_vcp_size, .enable_dp_link_output = enable_dio_dp_link_output, - .set_dp_link_test_pattern = set_dio_dp_link_test_pattern, - .set_dp_lane_settings = set_dio_dp_lane_settings, + .set_dp_link_test_pattern = set_dio_dpia_link_test_pattern, + .set_dp_lane_settings = set_dio_dpia_lane_settings, .update_stream_allocation_table = update_dpia_stream_allocation_table, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 988999c44475..27b881f947e8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -515,6 +515,41 @@ bool dp_is_interlane_aligned(union lane_align_status_updated align_status) return align_status.bits.INTERLANE_ALIGN_DONE == 1; } +bool dp_check_interlane_aligned(union lane_align_status_updated align_status, + struct dc_link *link, + uint8_t retries) +{ + /* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4 + * has to share encoders unlike DP and USBC + */ + return (dp_is_interlane_aligned(align_status) || + (link->skip_fallback_on_link_loss && retries)); +} + +uint32_t dp_get_eq_aux_rd_interval( + const struct dc_link *link, + const struct link_training_settings *lt_settings, + uint32_t offset, + uint8_t retries) +{ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + if (offset == 0 && retries == 1 && lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) + return max(lt_settings->eq_pattern_time, (uint32_t) DPIA_CLK_SYNC_DELAY); + else + return dpia_get_eq_aux_rd_interval(link, lt_settings, offset); + } else if (is_repeater(lt_settings, offset)) + return dp_translate_training_aux_read_interval( + link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]); + else + return lt_settings->eq_pattern_time; +} + +bool dp_check_dpcd_reqeust_status(const struct dc_link *link, + enum dc_status status) +{ + return (status != DC_OK && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA); +} + enum link_training_result dp_check_link_loss_status( struct dc_link *link, const struct link_training_settings *link_training_setting) @@ -973,13 +1008,17 @@ void repeater_training_done(struct dc_link *link, uint32_t offset) dpcd_pattern.v1_4.TRAINING_PATTERN_SET); } -static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding) +static enum link_training_result dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding) { + enum dc_status status; uint8_t sink_status = 0; uint8_t i; /* clear training pattern set */ - dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE); + status = dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE); + + if (dp_check_dpcd_reqeust_status(link, status)) + return LINK_TRAINING_ABORT; if (encoding == DP_128b_132b_ENCODING) { /* poll for intra-hop disable */ @@ -990,6 +1029,8 @@ static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding fsleep(1000); } } + + return LINK_TRAINING_SUCCESS; } enum dc_status dpcd_configure_channel_coding(struct dc_link *link, @@ -1013,17 +1054,18 @@ enum dc_status dpcd_configure_channel_coding(struct dc_link *link, return status; } -void dpcd_set_training_pattern( +enum dc_status dpcd_set_training_pattern( struct dc_link *link, enum dc_dp_training_pattern training_pattern) { + enum dc_status status; union dpcd_training_pattern dpcd_pattern = {0}; dpcd_pattern.v1_4.TRAINING_PATTERN_SET = dp_training_pattern_to_dpcd_training_pattern( link, training_pattern); - core_link_write_dpcd( + status = core_link_write_dpcd( link, DP_TRAINING_PATTERN_SET, &dpcd_pattern.raw, @@ -1033,6 +1075,8 @@ void dpcd_set_training_pattern( __func__, DP_TRAINING_PATTERN_SET, dpcd_pattern.v1_4.TRAINING_PATTERN_SET); + + return status; } enum dc_status dpcd_set_link_settings( @@ -1185,6 +1229,13 @@ void dpcd_set_lt_pattern_and_lane_settings( dpcd_lt_buffer[DP_TRAINING_PATTERN_SET - DP_TRAINING_PATTERN_SET] = dpcd_pattern.raw; + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + dpia_set_tps_notification( + link, + lt_settings, + dpcd_pattern.v1_4.TRAINING_PATTERN_SET, + offset); + if (is_repeater(lt_settings, offset)) { DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Repeater ID: %d\n 0x%X pattern = %x\n", __func__, @@ -1455,7 +1506,8 @@ static enum link_training_result dp_transition_to_video_idle( */ if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) { msleep(5); - status = dp_check_link_loss_status(link, lt_settings); + if (!link->skip_fallback_on_link_loss) + status = dp_check_link_loss_status(link, lt_settings); } return status; } @@ -1521,7 +1573,9 @@ enum link_training_result dp_perform_link_training( ASSERT(0); /* exit training mode */ - dpcd_exit_training_mode(link, encoding); + if ((dpcd_exit_training_mode(link, encoding) != LINK_TRAINING_SUCCESS || status == LINK_TRAINING_ABORT) && + link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + dpia_training_abort(link, <_settings, 0); /* switch to video idle */ if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern) @@ -1599,8 +1653,7 @@ bool perform_link_training_with_retries( dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &cur_link_settings); return true; } else { - /** @todo Consolidate USB4 DP and DPx.x training. */ - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + if (!link->dc->config.consolidated_dpia_dp_lt && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { status = dpia_perform_link_training( link, &pipe_ctx->link_res, @@ -1629,8 +1682,17 @@ bool perform_link_training_with_retries( dp_trace_lt_total_count_increment(link, false); dp_trace_lt_result_update(link, status, false); dp_trace_set_lt_end_timestamp(link, false); - if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) + if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) { + // Update verified link settings to current one + // Because DPIA LT might fallback to lower link setting. + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + link->verified_link_cap.link_rate = link->cur_link_settings.link_rate; + link->verified_link_cap.lane_count = link->cur_link_settings.lane_count; + dm_helpers_dp_mst_update_branch_bandwidth(link->ctx, link); + } return true; + } } fail_count++; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h index 851bd17317a0..0b18aa35c33c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h @@ -55,7 +55,7 @@ void dp_set_hw_test_pattern( uint8_t *custom_pattern, uint32_t custom_pattern_size); -void dpcd_set_training_pattern( +enum dc_status dpcd_set_training_pattern( struct dc_link *link, enum dc_dp_training_pattern training_pattern); @@ -182,4 +182,18 @@ uint32_t dp_translate_training_aux_read_interval( uint8_t dp_get_nibble_at_index(const uint8_t *buf, uint32_t index); + +bool dp_check_interlane_aligned(union lane_align_status_updated align_status, + struct dc_link *link, + uint8_t retries); + +uint32_t dp_get_eq_aux_rd_interval( + const struct dc_link *link, + const struct link_training_settings *lt_settings, + uint32_t offset, + uint8_t retries); + +bool dp_check_dpcd_reqeust_status(const struct dc_link *link, + enum dc_status status); + #endif /* __DC_LINK_DP_TRAINING_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 2b4c15b0b407..3bdce32a85e3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -157,6 +157,7 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence( struct link_training_settings *lt_settings, uint32_t offset) { + enum dc_status status; uint32_t retries_cr; uint32_t retry_count; uint32_t wait_time_microsec; @@ -216,7 +217,7 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence( /* 4. Read lane status and requested drive * settings as set by the sink */ - dp_get_lane_status_and_lane_adjust( + status = dp_get_lane_status_and_lane_adjust( link, lt_settings, dpcd_lane_status, @@ -224,6 +225,9 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence( dpcd_lane_adjust, offset); + if (dp_check_dpcd_reqeust_status(link, status)) + return LINK_TRAINING_ABORT; + /* 5. check CR done*/ if (dp_is_cr_done(lane_count, dpcd_lane_status)) { DC_LOG_HW_LINK_TRAINING("%s: Clock recovery OK\n", __func__); @@ -273,6 +277,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( struct link_training_settings *lt_settings, uint32_t offset) { + enum dc_status status; enum dc_dp_training_pattern tr_pattern; uint32_t retries_ch_eq; uint32_t wait_time_microsec; @@ -308,12 +313,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( dpcd_set_lane_settings(link, lt_settings, offset); /* 3. wait for receiver to lock-on*/ - wait_time_microsec = lt_settings->eq_pattern_time; - - if (is_repeater(lt_settings, offset)) - wait_time_microsec = - dp_translate_training_aux_read_interval( - link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]); + wait_time_microsec = dp_get_eq_aux_rd_interval(link, lt_settings, offset, retries_ch_eq); dp_wait_for_training_aux_rd_interval( link, @@ -322,7 +322,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( /* 4. Read lane status and requested * drive settings as set by the sink*/ - dp_get_lane_status_and_lane_adjust( + status = dp_get_lane_status_and_lane_adjust( link, lt_settings, dpcd_lane_status, @@ -330,6 +330,9 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( dpcd_lane_adjust, offset); + if (dp_check_dpcd_reqeust_status(link, status)) + return LINK_TRAINING_ABORT; + /* 5. check CR done*/ if (!dp_is_cr_done(lane_count, dpcd_lane_status)) return dpcd_lane_status[0].bits.CR_DONE_0 ? @@ -339,7 +342,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( /* 6. check CHEQ done*/ if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) && dp_is_symbol_locked(lane_count, dpcd_lane_status) && - dp_is_interlane_aligned(dpcd_lane_status_updated)) + dp_check_interlane_aligned(dpcd_lane_status_updated, link, retries_ch_eq)) return LINK_TRAINING_SUCCESS; /* 7. update VS/PE/PC2 in lt_settings*/ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c index cd1975c03f38..39e4b7dc9588 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c @@ -43,9 +43,6 @@ #define DC_LOGGER \ link->ctx->logger -/* The approximate time (us) it takes to transmit 9 USB4 DP clock sync packets. */ -#define DPIA_CLK_SYNC_DELAY 16000 - /* Extend interval between training status checks for manual testing. */ #define DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US 60000000 @@ -566,28 +563,6 @@ static enum link_training_result dpia_training_cr_phase( return result; } -/* Return status read interval during equalization phase. */ -static uint32_t dpia_get_eq_aux_rd_interval( - const struct dc_link *link, - const struct link_training_settings *lt_settings, - uint32_t hop) -{ - uint32_t wait_time_microsec; - - if (hop == DPRX) - wait_time_microsec = lt_settings->eq_pattern_time; - else - wait_time_microsec = - dp_translate_training_aux_read_interval( - link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]); - - /* Check debug option for extending aux read interval. */ - if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval) - wait_time_microsec = DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US; - - return wait_time_microsec; -} - /* Execute equalization phase of link training for specified hop in display * path in non-transparent mode: * - driver issues both DPCD and SET_CONFIG transactions. @@ -936,6 +911,22 @@ static enum link_training_result dpia_training_end( return result; } +/* Return status read interval during equalization phase. */ +uint32_t dpia_get_eq_aux_rd_interval( + const struct dc_link *link, + const struct link_training_settings *lt_settings, + uint32_t hop) +{ + /* Check debug option for extending aux read interval. */ + if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval) + return DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US; + else if (hop == DPRX) + return lt_settings->eq_pattern_time; + else + return dp_translate_training_aux_read_interval( + link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]); +} + /* When aborting training of specified hop in display path, clean up by: * - Attempting to clear DPCD TRAINING_PATTERN_SET, LINK_BW_SET and LANE_COUNT_SET. * - Sending SET_CONFIG(SET_LINK) with lane count and link rate set to 0. @@ -943,7 +934,7 @@ static enum link_training_result dpia_training_end( * @param link DPIA link being trained. * @param hop Hop in display path. DPRX = 0. */ -static void dpia_training_abort( +void dpia_training_abort( struct dc_link *link, struct link_training_settings *lt_settings, uint32_t hop) @@ -968,7 +959,26 @@ static void dpia_training_abort( core_link_write_dpcd(link, dpcd_tps_offset, &data, 1); core_link_write_dpcd(link, DP_LINK_BW_SET, &data, 1); core_link_write_dpcd(link, DP_LANE_COUNT_SET, &data, 1); - core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data); + + if (!link->dc->config.consolidated_dpia_dp_lt) + core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data); +} + +void dpia_set_tps_notification( + struct dc_link *link, + const struct link_training_settings *lt_settings, + uint8_t pattern, + uint32_t hop) +{ + uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. */ + + if (lt_settings->lttpr_mode != LTTPR_MODE_NON_TRANSPARENT || pattern == DPCD_TRAINING_PATTERN_VIDEOIDLE) + return; + + repeater_cnt = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + + if (hop != repeater_cnt) + dc_process_dmub_dpia_set_tps_notification(link->ctx->dc, link->link_index, pattern); } enum link_training_result dpia_perform_link_training( diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h index b39fb9faf1c2..9f4eceb494c2 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h @@ -28,6 +28,9 @@ #define __DC_LINK_DP_TRAINING_DPIA_H__ #include "link_dp_training.h" +/* The approximate time (us) it takes to transmit 9 USB4 DP clock sync packets. */ +#define DPIA_CLK_SYNC_DELAY 16000 + /* Train DP tunneling link for USB4 DPIA display endpoint. * DPIA equivalent of dc_link_dp_perfrorm_link_training. * Aborts link training upon detection of sink unplug. @@ -38,4 +41,20 @@ enum link_training_result dpia_perform_link_training( const struct dc_link_settings *link_setting, bool skip_video_pattern); +void dpia_training_abort( + struct dc_link *link, + struct link_training_settings *lt_settings, + uint32_t hop); + +uint32_t dpia_get_eq_aux_rd_interval( + const struct dc_link *link, + const struct link_training_settings *lt_settings, + uint32_t hop); + +void dpia_set_tps_notification( + struct dc_link *link, + const struct link_training_settings *lt_settings, + uint8_t pattern, + uint32_t offset); + #endif /* __DC_LINK_DP_TRAINING_DPIA_H__ */ diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index cd70453aeae0..fe5b6f7a3eb1 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -300,6 +300,7 @@ struct dmub_srv_hw_params { enum dmub_ips_disable_type disable_ips; bool disallow_phy_access; bool disable_sldo_opt; + bool enable_non_transparent_setconfig; }; /** diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index e20c220aa8b4..ebcf68bfae2b 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -682,7 +682,7 @@ union dmub_fw_boot_options { uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/ uint32_t usb4_cm_version: 1; /**< 1 CM support */ uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */ - uint32_t reserved0: 1; + uint32_t enable_non_transparent_setconfig: 1; /* 1 if dpia use conventional dp lt flow*/ uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/ uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */ uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/ @@ -1308,6 +1308,7 @@ enum dmub_cmd_dpia_type { DMUB_CMD__DPIA_DIG1_DPIA_CONTROL = 0, DMUB_CMD__DPIA_SET_CONFIG_ACCESS = 1, DMUB_CMD__DPIA_MST_ALLOC_SLOTS = 2, + DMUB_CMD__DPIA_SET_TPS_NOTIFICATION = 3, }; /* DMUB_OUT_CMD__DPIA_NOTIFICATION command types. */ @@ -2138,6 +2139,24 @@ struct dmub_rb_cmd_set_mst_alloc_slots { struct dmub_cmd_mst_alloc_slots_control_data mst_slots_control; /* mst slots control */ }; +/** + * Data passed from driver to FW in a DMUB_CMD__SET_TPS_NOTIFICATION command. + */ +struct dmub_cmd_tps_notification_data { + uint8_t instance; /* DPIA instance */ + uint8_t tps; /* requested training pattern */ + uint8_t reserved1; + uint8_t reserved2; +}; + +/** + * DMUB command structure for SET_TPS_NOTIFICATION command. + */ +struct dmub_rb_cmd_set_tps_notification { + struct dmub_cmd_header header; /* header */ + struct dmub_cmd_tps_notification_data tps_notification; /* set tps_notification data */ +}; + /** * DMUB command structure for DPIA HPD int enable control. */ @@ -5304,6 +5323,10 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__DPIA_MST_ALLOC_SLOTS command. */ struct dmub_rb_cmd_set_mst_alloc_slots set_mst_alloc_slots; + /** + * Definition of a DMUB_CMD__DPIA_SET_TPS_NOTIFICATION command. + */ + struct dmub_rb_cmd_set_tps_notification set_tps_notification; /** * Definition of a DMUB_CMD__EDID_CEA command. */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 746696b6f09a..2ccad79053c5 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -425,6 +425,7 @@ void dmub_dcn35_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu boot_options.bits.ips_disable = params->disable_ips; boot_options.bits.ips_sequential_ono = params->ips_sequential_ono; boot_options.bits.disable_sldo_opt = params->disable_sldo_opt; + boot_options.bits.enable_non_transparent_setconfig = params->enable_non_transparent_setconfig; REG_WRITE(DMCUB_SCRATCH14, boot_options.all); } From e79563bf5fb17d1a199c7c0f7d5a7a98c077302a Mon Sep 17 00:00:00 2001 From: Relja Vojvodic Date: Wed, 28 Aug 2024 11:42:26 -0400 Subject: [PATCH 361/655] drm/amd/display: Add fullscreen only sharpening policy [WHAT & HOW] Disable sharpening if not in fullscreen if this policy is selected Reviewed-by: Samson Tam Signed-off-by: Relja Vojvodic Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 3 +++ drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index f7a654b3a092..014e8a296f0c 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -853,6 +853,9 @@ static bool spl_get_isharp_en(struct spl_in *spl_in, else if ((spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) && (spl_in->debug.sharpen_policy == SHARPEN_RGB_FULLSCREEN_YUV)) return enable_isharp; + else if (!spl_in->is_fullscreen && + spl_in->debug.sharpen_policy == SHARPEN_FULLSCREEN_ALL) + return enable_isharp; /* * Apply sharpness if supports horizontal taps 4,6 AND diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 425d4a282c7a..2a74ff5fdfdb 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -490,7 +490,8 @@ enum linear_light_scaling { // convert it in translation logic enum sharpen_policy { SHARPEN_ALWAYS = 0, SHARPEN_YUV = 1, - SHARPEN_RGB_FULLSCREEN_YUV = 2 + SHARPEN_RGB_FULLSCREEN_YUV = 2, + SHARPEN_FULLSCREEN_ALL = 3 }; enum scale_to_sharpness_policy { NO_SCALE_TO_SHARPNESS_ADJ = 0, From 07bfa9cdbf3cd2daadfaaba0601f126f45951ffa Mon Sep 17 00:00:00 2001 From: Leo Ma Date: Mon, 19 Aug 2024 13:25:27 -0400 Subject: [PATCH 362/655] drm/amd/display: Add HDMI DSC native YCbCr422 support [WHY && HOW] For some HDMI OVT timing, YCbCr422 encoding fails at the DSC bandwidth check. The root cause is our DSC policy for timing doesn't account for HDMI YCbCr422 native support. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Chris Park Signed-off-by: Leo Ma Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dc_dsc.h | 3 ++- drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index c0c61c03984c..83a31b97e96b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1147,7 +1147,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, params[count].num_slices_v = aconnector->dsc_settings.dsc_num_slices_v; params[count].bpp_overwrite = aconnector->dsc_settings.dsc_bits_per_pixel; params[count].compression_possible = stream->sink->dsc_caps.dsc_dec_caps.is_dsc_supported; - dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy); + dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link)); if (!dc_dsc_compute_bandwidth_range( stream->sink->ctx->dc->res_pool->dscs[0], stream->sink->ctx->dc->debug.dsc_min_slice_height_override, @@ -1681,7 +1681,7 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream, { struct dc_dsc_policy dsc_policy = {0}; - dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy); + dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link)); dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0], stream->sink->ctx->dc->debug.dsc_min_slice_height_override, dsc_policy.min_target_bpp * 16, diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h index 2a5120ecf48b..9014c2409817 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h @@ -101,7 +101,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps( */ void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, - struct dc_dsc_policy *policy); + struct dc_dsc_policy *policy, + const enum dc_link_encoding_format link_encoding); void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit); diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 79c426425911..ebd5df1a36e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -883,7 +883,7 @@ static bool setup_dsc_config( memset(dsc_cfg, 0, sizeof(struct dc_dsc_config)); - dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy); + dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy, link_encoding); pic_width = timing->h_addressable + timing->h_border_left + timing->h_border_right; pic_height = timing->v_addressable + timing->v_border_top + timing->v_border_bottom; @@ -1173,7 +1173,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps( void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, - struct dc_dsc_policy *policy) + struct dc_dsc_policy *policy, + const enum dc_link_encoding_format link_encoding) { uint32_t bpc = 0; From ce83ae29f93772d604b4ea73459fb17822d6a6b0 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Tue, 3 Sep 2024 08:45:48 -0400 Subject: [PATCH 363/655] drm/amd/display: 3.2.300 - Add HDMI DSC native YCbCr422 support - Add fullscreen only sharpening policy - Restructure dpia link training - Disable SYMCLK32_LE root clock gating - Clean up dsc blocks in accelerated mode - Block dynamic IPS2 on DCN35 for incompatible FW versions - Add debug options to change sharpen policies - Block timing sync for different output formats in pmo - Enable DML2 override_det_buffer_size_kbytes - Add dmub hpd sense callback - Emulate Display Hotplug Hang - Implement new DPCD register handling - Use SDR white level to calculate matrix coefficients - Round calculated vtotal Reviewed-by: Alex Hung Signed-off-by: Aric Cyr Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 133cac4d9fc4..e659f4fed19f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.299" +#define DC_VER "3.2.300" #define MAX_SURFACES 3 #define MAX_PLANES 6 From ff599ef6970ee000fa5bc38d02fa5ff5f3fc7575 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 29 Aug 2024 17:30:26 -0600 Subject: [PATCH 364/655] drm/amd/display: Check null pointer before dereferencing se [WHAT & HOW] se is null checked previously in the same function, indicating it might be null; therefore, it must be checked when used again. This fixes 1 FORWARD_NULL issue reported by Coverity. Acked-by: Alex Hung Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index eb7c7681bdd9..67812fbbb006 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1767,7 +1767,7 @@ bool dc_validate_boot_timing(const struct dc *dc, if (crtc_timing->pix_clk_100hz != pix_clk_100hz) return false; - if (!se->funcs->dp_get_pixel_format) + if (!se || !se->funcs->dp_get_pixel_format) return false; if (!se->funcs->dp_get_pixel_format( From b6499840cafca25175f43ebd601913bf31d06f16 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 29 Aug 2024 16:35:51 -0600 Subject: [PATCH 365/655] drm/amd/display: Remove always-false branches [WHAT & HOW] req128_c is always set to false and its branch is never taken. Similarly, MacroTileSizeBytes is set to either 256 or 65535 and it is never 4096 and it's branch is not taken. Therefore, their branches are removed. This fixes 3 DEADCODE issues reported by Coverity. Acked-by: Alex Hung Reviewed-by: Alvin Lee Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c | 3 --- .../amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c | 3 --- .../drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c | 9 --------- 3 files changed, 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index e7019c95ba79..4fce64a030b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib, if (swath_height_c > 0) log2_swath_height_c = dml_log2(swath_height_c); - - if (req128_c && log2_swath_height_c > 0) - log2_swath_height_c -= 1; } rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index ae5251041728..3fa9a5da02f6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib, if (swath_height_c > 0) log2_swath_height_c = dml_log2(swath_height_c); - - if (req128_c && log2_swath_height_c > 0) - log2_swath_height_c -= 1; } rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 0b132ce1d2cd..2b275e680379 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -1924,15 +1924,6 @@ static unsigned int CalculateVMAndRowBytes( *PixelPTEReqWidth = 32768.0 / BytePerPixel; *PTERequestSize = 64; FractionOfPTEReturnDrop = 0; - } else if (MacroTileSizeBytes == 4096) { - PixelPTEReqHeightPTEs = 1; - *PixelPTEReqHeight = MacroTileHeight; - *PixelPTEReqWidth = 8 * *MacroTileWidth; - *PTERequestSize = 64; - if (ScanDirection != dm_vert) - FractionOfPTEReturnDrop = 0; - else - FractionOfPTEReturnDrop = 7.0 / 8; } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { PixelPTEReqHeightPTEs = 16; *PixelPTEReqHeight = 16 * BlockHeight256Bytes; From f510dd5c210bf8cc22e4be48cbbda3cb754219f5 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 3 Sep 2024 10:10:44 -0400 Subject: [PATCH 366/655] drm/amd/display: Fix underflow when setting underscan on DCN401 [WHY & HOW] When underscan is set through xrandr, it causes the stream destination rect to change in a way it becomes complicated to handle the calculations for subvp. Since this is a corner case, disable subvp when underscan is set. Fix the existing check that is supposed to catch this corner case by adding a check based on the parameters in the stream Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Dillon Varone Reviewed-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index b0d9aed0f265..8697eac1e1f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -858,7 +858,9 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->immediate_flip = plane_state->flip_immediate; - plane->composition.rect_out_height_spans_vactive = plane_state->dst_rect.height >= stream->timing.v_addressable; + plane->composition.rect_out_height_spans_vactive = + plane_state->dst_rect.height >= stream->timing.v_addressable && + stream->dst.height >= stream->timing.v_addressable; } //TODO : Could be possibly moved to a common helper layer. From 4bdc5b504af7de1f649004cfdd37445d36db6703 Mon Sep 17 00:00:00 2001 From: Zhikai Zhai Date: Tue, 27 Aug 2024 14:06:01 +0800 Subject: [PATCH 367/655] drm/amd/display: Skip to enable dsc if it has been off [WHY] It makes DSC enable when we commit the stream which need keep power off, and then it will skip to disable DSC if pipe reset at this situation as power has been off. It may cause the DSC unexpected enable on the pipe with the next new stream which doesn't support DSC. [HOW] Check the DSC used on current pipe status when update stream. Skip to enable if it has been off. The operation enable DSC should happen when set power on. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Wenjing Liu Signed-off-by: Zhikai Zhai Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 14 ++++++++++++++ .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 13 +++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index a36e11606f90..2e8c9f738259 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1032,6 +1032,20 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) struct dsc_config dsc_cfg; struct dsc_optc_config dsc_optc_cfg = {0}; enum optc_dsc_mode optc_dsc_mode; + struct dcn_dsc_state dsc_state = {0}; + + if (!dsc) { + DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + + if (dsc->funcs->dsc_read_state) { + dsc->funcs->dsc_read_state(dsc, &dsc_state); + if (!dsc_state.dsc_fw_en) { + DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + } /* Enable DSC hw block */ dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 479fd3e89e5a..bd309dbdf7b2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -334,7 +334,20 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) struct dsc_config dsc_cfg; struct dsc_optc_config dsc_optc_cfg = {0}; enum optc_dsc_mode optc_dsc_mode; + struct dcn_dsc_state dsc_state = {0}; + if (!dsc) { + DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + + if (dsc->funcs->dsc_read_state) { + dsc->funcs->dsc_read_state(dsc, &dsc_state); + if (!dsc_state.dsc_fw_en) { + DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + } /* Enable DSC hw block */ dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom; From b74571a83fd3e50f804f090aae60c864d458187c Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 4 Sep 2024 15:58:25 -0400 Subject: [PATCH 368/655] drm/amd/display: Use full update for swizzle mode change [WHY & HOW] 1) We did linear/non linear transition properly long ago 2) We used that path to handle SystemDisplayEnable 3) We fixed a SystemDisplayEnable inability to fallback to passive by impacting the transition flow generically 4) AFMF later relied on the generic transition behavior Separating the two flows to make (3) non-generic is the best immediate coarse of action. DC can discern SSAMPO3 very easily from SDE. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Chris Park Signed-off-by: Charlene Liu Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 6 +++--- drivers/gpu/drm/amd/display/dc/dc.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 67812fbbb006..a1652130e4be 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2376,7 +2376,7 @@ static bool is_surface_in_context( return false; } -static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u) +static enum surface_update_type get_plane_info_update_type(const struct dc *dc, const struct dc_surface_update *u) { union surface_update_flags *update_flags = &u->surface->update_flags; enum surface_update_type update_type = UPDATE_TYPE_FAST; @@ -2455,7 +2455,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa /* todo: below are HW dependent, we should add a hook to * DCE/N resource and validated there. */ - if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) { + if (!dc->debug.skip_full_updated_if_possible) { /* swizzled mode requires RQ to be setup properly, * thus need to run DML to calculate RQ settings */ @@ -2547,7 +2547,7 @@ static enum surface_update_type det_surface_update(const struct dc *dc, update_flags->raw = 0; // Reset all flags - type = get_plane_info_update_type(u); + type = get_plane_info_update_type(dc, u); elevate_update_type(&overall_type, type); type = get_scaling_info_update_type(dc, u); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index e659f4fed19f..78ebe636389e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1060,6 +1060,7 @@ struct dc_debug_options { bool enable_ips_visual_confirm; unsigned int sharpen_policy; unsigned int scale_to_sharpness_policy; + bool skip_full_updated_if_possible; }; From 327e62f47eb57ae5ff63de82b0815557104e439a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 13 Sep 2024 13:00:39 -0500 Subject: [PATCH 369/655] drm/amd/display: Validate backlight caps are sane Currently amdgpu takes backlight caps provided by the ACPI tables on systems as is. If the firmware sets maximums that are too low this means that users don't get a good experience. To avoid having to maintain a quirk list of such systems, do a sanity check on the values. Check that the spread is at least half of the values that amdgpu would use if no ACPI table was found and if not use the amdgpu defaults. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3020 Reviewed-by: Harry Wentland Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4ab329e00714..08bcb47d7074 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4469,6 +4469,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) #define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12 #define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255 +#define AMDGPU_DM_MIN_SPREAD ((AMDGPU_DM_DEFAULT_MAX_BACKLIGHT - AMDGPU_DM_DEFAULT_MIN_BACKLIGHT) / 2) #define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50 static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, @@ -4483,6 +4484,21 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, return; amdgpu_acpi_get_backlight_caps(&caps); + + /* validate the firmware value is sane */ + if (caps.caps_valid) { + int spread = caps.max_input_signal - caps.min_input_signal; + + if (caps.max_input_signal > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT || + caps.min_input_signal < AMDGPU_DM_DEFAULT_MIN_BACKLIGHT || + spread > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT || + spread < AMDGPU_DM_MIN_SPREAD) { + DRM_DEBUG_KMS("DM: Invalid backlight caps: min=%d, max=%d\n", + caps.min_input_signal, caps.max_input_signal); + caps.caps_valid = false; + } + } + if (caps.caps_valid) { dm->backlight_caps[bl_idx].caps_valid = true; if (caps.aux_support) From 199888aa25b3a3315360224bda9134a9b58c9306 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 5 Sep 2024 14:22:30 -0400 Subject: [PATCH 370/655] drm/amd/display: Update IPS default mode for DCN35/DCN351 [WHY] RCG state of IPX in idle is more stable for DCN351 and some variants of DCN35 than IPS2. [HOW] Rework dm_get_default_ips_mode() to specify default per ASIC and update DCN35/DCN351 defaults accordingly. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Sun peng Li Signed-off-by: Roman Li Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 50 ++++++++++++------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 08bcb47d7074..94c8d9966ec3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1771,25 +1771,41 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device * static enum dmub_ips_disable_type dm_get_default_ips_mode( struct amdgpu_device *adev) { - /* - * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to - * cause a hard hang. A fix exists for newer PMFW. - * - * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest - * IPS state in all cases, except for s0ix and all displays off (DPMS), - * where IPS2 is allowed. - * - * When checking pmfw version, use the major and minor only. - */ - if (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(3, 5, 0) && - (adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) - return DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + enum dmub_ips_disable_type ret = DMUB_IPS_ENABLE; - if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0)) - return DMUB_IPS_ENABLE; + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + case IP_VERSION(3, 5, 0): + /* + * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to + * cause a hard hang. A fix exists for newer PMFW. + * + * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest + * IPS state in all cases, except for s0ix and all displays off (DPMS), + * where IPS2 is allowed. + * + * When checking pmfw version, use the major and minor only. + */ + if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) + ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0)) + /* + * Other ASICs with DCN35 that have residency issues with + * IPS2 in idle. + * We want them to use IPS2 only in display off cases. + */ + ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + break; + case IP_VERSION(3, 5, 1): + ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + break; + default: + /* ASICs older than DCN35 do not have IPSs */ + if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 5, 0)) + ret = DMUB_IPS_DISABLE_ALL; + break; + } - /* ASICs older than DCN35 do not have IPSs */ - return DMUB_IPS_DISABLE_ALL; + return ret; } static int amdgpu_dm_init(struct amdgpu_device *adev) From fa8a4d3659d0c1ad73d5f59b2e0a6d408de5b317 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Thu, 5 Sep 2024 17:28:12 -0400 Subject: [PATCH 371/655] drm/amd/display: Clear cached watermark after resume [WHY] Driver could skip program watermarks when resume from S0i3/S4. [HOW] Clear the cached one first to make sure new value gets applied. Reviewed-by: Alvin Lee Reviewed-by: Roman Li Signed-off-by: Charlene Liu Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c index 6293173ba2b9..5eb3da8d5206 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c @@ -545,6 +545,7 @@ static void hubbub35_init(struct hubbub *hubbub) DCHUBBUB_ARB_MAX_REQ_OUTSTAND, 256, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 256); + memset(&hubbub2->watermarks.a.cstate_pstate, 0, sizeof(hubbub2->watermarks.a.cstate_pstate)); } /*static void hubbub35_set_request_limit(struct hubbub *hubbub, From 06c9aeb57fe894e6e442cd66870cd3e863bbf08c Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 8 Sep 2024 21:40:21 -0400 Subject: [PATCH 372/655] drm/amd/display: 3.2.301 - Clear cached watermark after resume - Update IPS default mode for DCN35/DCN351 - Use full update for swizzle mode change - Skip to enable dsc if it has been off - Fix underflow when setting underscan on DCN401 - Remove always-false branches - Check null pointer before dereferencing se Acked-by: Alex Hung Signed-off-by: Aric Cyr Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 78ebe636389e..3992ad73165b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.300" +#define DC_VER "3.2.301" #define MAX_SURFACES 3 #define MAX_PLANES 6 From ef126c06a98bde1a41303970eb0fc0ac33c3cc02 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Mon, 22 Jul 2024 19:45:11 +0800 Subject: [PATCH 373/655] drm/amdgpu: Fix get each xcp macro Fix get each xcp macro to loop over each partition correctly Fixes: 4bdca2057933 ("drm/amdgpu: Add utility functions for xcp") Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index 90138bc5f03d..32775260556f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -180,6 +180,6 @@ amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from) #define for_each_xcp(xcp_mgr, xcp, i) \ for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \ - xcp = amdgpu_get_next_xcp(xcp_mgr, &i)) + ++i, xcp = amdgpu_get_next_xcp(xcp_mgr, &i)) #endif From 42ac749d5b8bf78b347ac8a52eb15cc397b157a0 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 11 Sep 2024 13:49:51 +0530 Subject: [PATCH 374/655] drm/amdgpu: Fix XCP instance mask calculation Fix instance mask calculation for VCN IP. There are cases where VCN instance could be shared across partitions. Fix here so that other blocks don't need to check for any shared instances based on partition mode. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 32 +++++++++------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 26e2188101e7..5e8833e4fed2 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -94,8 +94,6 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, case AMDGPU_RING_TYPE_VCN_ENC: case AMDGPU_RING_TYPE_VCN_JPEG: ip_blk = AMDGPU_XCP_VCN; - if (aqua_vanjaram_xcp_vcn_shared(adev)) - inst_mask = 1 << (inst_idx * 2); break; default: DRM_ERROR("Not support ring type %d!", ring->funcs->type); @@ -105,6 +103,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { ring->xcp_id = xcp_id; + dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name, + ring->xcp_id); if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id; break; @@ -394,38 +394,31 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x struct amdgpu_xcp_ip *ip) { struct amdgpu_device *adev = xcp_mgr->adev; + int num_sdma, num_vcn, num_shared_vcn, num_xcp; int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp; - int num_sdma, num_vcn; num_sdma = adev->sdma.num_instances; num_vcn = adev->vcn.num_vcn_inst; + num_shared_vcn = 1; + + num_xcc_xcp = adev->gfx.num_xcc_per_xcp; + num_xcp = NUM_XCC(adev->gfx.xcc_mask) / num_xcc_xcp; switch (xcp_mgr->mode) { case AMDGPU_SPX_PARTITION_MODE: - num_sdma_xcp = num_sdma; - num_vcn_xcp = num_vcn; - break; case AMDGPU_DPX_PARTITION_MODE: - num_sdma_xcp = num_sdma / 2; - num_vcn_xcp = num_vcn / 2; - break; case AMDGPU_TPX_PARTITION_MODE: - num_sdma_xcp = num_sdma / 3; - num_vcn_xcp = num_vcn / 3; - break; case AMDGPU_QPX_PARTITION_MODE: - num_sdma_xcp = num_sdma / 4; - num_vcn_xcp = num_vcn / 4; - break; case AMDGPU_CPX_PARTITION_MODE: - num_sdma_xcp = 2; - num_vcn_xcp = num_vcn ? 1 : 0; + num_sdma_xcp = DIV_ROUND_UP(num_sdma, num_xcp); + num_vcn_xcp = DIV_ROUND_UP(num_vcn, num_xcp); break; default: return -EINVAL; } - num_xcc_xcp = adev->gfx.num_xcc_per_xcp; + if (num_vcn && num_xcp > num_vcn) + num_shared_vcn = num_xcp / num_vcn; switch (ip_id) { case AMDGPU_XCP_GFXHUB: @@ -441,7 +434,8 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x ip->ip_funcs = &sdma_v4_4_2_xcp_funcs; break; case AMDGPU_XCP_VCN: - ip->inst_mask = XCP_INST_MASK(num_vcn_xcp, xcp_id); + ip->inst_mask = + XCP_INST_MASK(num_vcn_xcp, xcp_id / num_shared_vcn); /* TODO : Assign IP funcs */ break; default: From 54b86443fd4437c051aefd3f462cfff4defd420c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 5 Jun 2024 16:26:22 +0200 Subject: [PATCH 375/655] drm/amdgpu: explicitely set the AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of having that in the amdgpu_bo_pin() function applied for all pinned BOs. Signed-off-by: Christian König Acked-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 2 ++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c | 1 + 9 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 046d4c4e0299..b119d27271c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -233,6 +233,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, } if (!adev->enable_virtual_display) { + new_abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev, new_abo->flags)); if (unlikely(r != 0)) { @@ -1759,6 +1760,7 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev) r = amdgpu_bo_reserve(aobj, true); if (r == 0) { + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); if (r != 0) dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index a987f671b1d5..d62df3b5a014 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -942,7 +942,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, */ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) { - bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; return amdgpu_bo_pin_restricted(bo, domain, 0, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index e5f508d34ed8..d4c2afafbb73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -338,6 +338,7 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane, else domain = AMDGPU_GEM_DOMAIN_VRAM; + rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(rbo, domain); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 742adbc460c9..70c1399f738d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1881,6 +1881,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, return r; if (!atomic) { + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); @@ -2401,6 +2402,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 8d46ebadfa46..f154c24499c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1931,6 +1931,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, return r; if (!atomic) { + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); @@ -2485,6 +2486,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index f08dc6a3886f..a7fcb135827f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1861,6 +1861,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, return r; if (!atomic) { + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); @@ -2321,6 +2322,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index a6a3adf2ae13..77ac3f114d24 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1828,6 +1828,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, return r; if (!atomic) { + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); @@ -2320,6 +2321,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 25f63b2e7a8e..495e3cd70426 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -961,6 +961,7 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, else domain = AMDGPU_GEM_DOMAIN_VRAM; + rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(rbo, domain); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c index 08c494a7a21b..0d5fefb0f591 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c @@ -114,6 +114,7 @@ static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector domain = amdgpu_display_supported_domains(adev, rbo->flags); + rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(rbo, domain); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) From f2be7b39e43893ab5361115de2b95e7c5c86f190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 5 Jun 2024 16:34:49 +0200 Subject: [PATCH 376/655] drm/amdgpu: remove amdgpu_pin_restricted() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We haven't used the functionality to pin BOs in a certain range at all while the driver existed. Just nuke it. Signed-off-by: Christian König Acked-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 56 ++----------------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 - 3 files changed, 6 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 4afef5b46c7d..ce5ca304dba9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1499,7 +1499,7 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain) } } - ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0); + ret = amdgpu_bo_pin(bo, domain); if (ret) pr_err("Error in Pinning BO to domain: %d\n", domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index d62df3b5a014..5bff4249b357 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -809,29 +809,22 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo) } /** - * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object + * amdgpu_bo_pin - pin an &amdgpu_bo buffer object * @bo: &amdgpu_bo buffer object to be pinned * @domain: domain to be pinned to - * @min_offset: the start of requested address range - * @max_offset: the end of requested address range * - * Pins the buffer object according to requested domain and address range. If - * the memory is unbound gart memory, binds the pages into gart table. Adjusts - * pin_count and pin_size accordingly. + * Pins the buffer object according to requested domain. If the memory is + * unbound gart memory, binds the pages into gart table. Adjusts pin_count and + * pin_size accordingly. * * Pinning means to lock pages in memory along with keeping them at a fixed * offset. It is required when a buffer can not be moved, for example, when * a display buffer is being scanned out. * - * Compared with amdgpu_bo_pin(), this function gives more flexibility on - * where to pin a buffer if there are specific restrictions on where a buffer - * must be located. - * * Returns: * 0 for success or a negative error code on failure. */ -int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, - u64 min_offset, u64 max_offset) +int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { false, false }; @@ -840,9 +833,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) return -EPERM; - if (WARN_ON_ONCE(min_offset > max_offset)) - return -EINVAL; - /* Check domain to be pinned to against preferred domains */ if (bo->preferred_domains & domain) domain = bo->preferred_domains & domain; @@ -868,14 +858,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; ttm_bo_pin(&bo->tbo); - - if (max_offset != 0) { - u64 domain_start = amdgpu_ttm_domain_start(adev, - mem_type); - WARN_ON_ONCE(max_offset < - (amdgpu_bo_gpu_offset(bo) - domain_start)); - } - return 0; } @@ -892,17 +874,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; amdgpu_bo_placement_from_domain(bo, domain); for (i = 0; i < bo->placement.num_placement; i++) { - unsigned int fpfn, lpfn; - - fpfn = min_offset >> PAGE_SHIFT; - lpfn = max_offset >> PAGE_SHIFT; - - if (fpfn > bo->placements[i].fpfn) - bo->placements[i].fpfn = fpfn; - if (!bo->placements[i].lpfn || - (lpfn && lpfn < bo->placements[i].lpfn)) - bo->placements[i].lpfn = lpfn; - if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && bo->placements[i].mem_type == TTM_PL_VRAM) bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; @@ -928,23 +899,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return r; } -/** - * amdgpu_bo_pin - pin an &amdgpu_bo buffer object - * @bo: &amdgpu_bo buffer object to be pinned - * @domain: domain to be pinned to - * - * A simple wrapper to amdgpu_bo_pin_restricted(). - * Provides a simpler API for buffers that do not have any strict restrictions - * on where a buffer must be located. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) -{ - return amdgpu_bo_pin_restricted(bo, domain, 0, 0); -} - /** * amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object * @bo: &amdgpu_bo buffer object to be unpinned diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index e42c34a3289d..717e47b46167 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -304,8 +304,6 @@ void amdgpu_bo_kunmap(struct amdgpu_bo *bo); struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo); void amdgpu_bo_unref(struct amdgpu_bo **bo); int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain); -int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, - u64 min_offset, u64 max_offset); void amdgpu_bo_unpin(struct amdgpu_bo *bo); int amdgpu_bo_init(struct amdgpu_device *adev); void amdgpu_bo_fini(struct amdgpu_device *adev); From 375b035f689735fd7a87ff31ccac3a42717252bf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 13 Sep 2024 16:22:01 -0400 Subject: [PATCH 377/655] drm/amdgpu/bios: split vbios fetching between APU and dGPU We need some different logic for dGPUs and the APU path can be simplified because there are some methods which are never used on APUs. This also fixes a regression on some older APUs causing the driver to fetch the unpatched ROM image rather than the patched image. Fixes: 9c081c11c621 ("drm/amdgpu: Reorder to read EFI exported ROM first") Reviewed-by: George Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 47 +++++++++++++++++++++++- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 42e64bce661e..e8f62d718167 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -414,7 +414,36 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev) } #endif -bool amdgpu_get_bios(struct amdgpu_device *adev) +static bool amdgpu_get_bios_apu(struct amdgpu_device *adev) +{ + if (amdgpu_acpi_vfct_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from VFCT\n"); + goto success; + } + + if (igp_read_bios_from_vram(adev)) { + dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n"); + goto success; + } + + if (amdgpu_read_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); + goto success; + } + + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + + dev_err(adev->dev, "Unable to locate a BIOS ROM\n"); + return false; + +success: + return true; +} + +static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev) { if (amdgpu_atrm_get_bios(adev)) { dev_info(adev->dev, "Fetched VBIOS from ATRM\n"); @@ -455,10 +484,24 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) return false; success: - adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10; return true; } +bool amdgpu_get_bios(struct amdgpu_device *adev) +{ + bool found; + + if (adev->flags & AMD_IS_APU) + found = amdgpu_get_bios_apu(adev); + else + found = amdgpu_get_bios_dgpu(adev); + + if (found) + adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10; + + return found; +} + /* helper function for soc15 and onwards to read bios from rom */ bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev, u8 *bios, u32 length_bytes) From e7d4e1438533abe448813bdc45691f9c230aa307 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Mon, 16 Sep 2024 14:54:05 +0200 Subject: [PATCH 378/655] drm/amd/display: handle nulled pipe context in DCE110's set_drr() As set_drr() is called from IRQ context, it can happen that the pipe context has been nulled by dc_state_destruct(). Apply the same protection here that is already present for dcn35_set_drr() and dcn10_set_drr(). I.e. fetch the tg pointer first (to avoid a race with dc_state_destruct()), and then check the local copy before using it. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3142 Fixes: 06ad7e164256 ("drm/amd/display: Destroy DC context while keeping DML and DML2") Acked-by: Alex Deucher Signed-off-by: Tobias Jakobi Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index aa7479b31898..4fbed0298adf 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -2096,13 +2096,20 @@ static void set_drr(struct pipe_ctx **pipe_ctx, * as well. */ for (i = 0; i < num_pipes; i++) { - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); + /* dc_state_destruct() might null the stream resources, so fetch tg + * here first to avoid a race condition. The lifetime of the pointee + * itself (the timing_generator object) is not a problem here. + */ + struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; - if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + if ((tg != NULL) && tg->funcs) { + if (tg->funcs->set_drr) + tg->funcs->set_drr(tg, ¶ms); + if (adjust.v_total_max != 0 && adjust.v_total_min != 0) + if (tg->funcs->set_static_screen_control) + tg->funcs->set_static_screen_control( + tg, event_triggers, num_frames); + } } } From 042658d17a54c9dc8c028986dfbde49f4aa01871 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 Sep 2024 08:53:21 -0400 Subject: [PATCH 379/655] drm/amdgpu: clean up vbios fetching code After splitting the logic between APU and dGPU, clean up some of the APU and dGPU specific logic that no longer applied. Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index e8f62d718167..46bf623919d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -284,10 +284,6 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) acpi_status status; bool found = false; - /* ATRM is for the discrete card only */ - if (adev->flags & AMD_IS_APU) - return false; - /* ATRM is for on-platform devices only */ if (dev_is_removable(&adev->pdev->dev)) return false; @@ -343,11 +339,8 @@ static inline bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev) { - if (adev->flags & AMD_IS_APU) - return igp_read_bios_from_vram(adev); - else - return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ? - false : amdgpu_asic_read_disabled_bios(adev); + return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ? + false : amdgpu_asic_read_disabled_bios(adev); } #ifdef CONFIG_ACPI @@ -455,11 +448,6 @@ static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev) goto success; } - if (igp_read_bios_from_vram(adev)) { - dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n"); - goto success; - } - if (amdgpu_read_platform_bios(adev)) { dev_info(adev->dev, "Fetched VBIOS from platform\n"); goto success; From 7b6df1d73290961ff0a00fd0022f28dd19e37181 Mon Sep 17 00:00:00 2001 From: Frank Min Date: Wed, 4 Sep 2024 10:50:33 +0800 Subject: [PATCH 380/655] drm/amdgpu: update golden regs for gfx12 update golden regs for gfx12 Signed-off-by: Frank Min Reviewed-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index d1357c01eb39..47b47d21f464 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -202,12 +202,16 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) }; -static const struct soc15_reg_golden golden_settings_gc_12_0[] = { +static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f), SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000), SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020) }; +static const struct soc15_reg_golden golden_settings_gc_12_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x00008000, 0x00008000), +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3495,10 +3499,14 @@ static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): + soc15_program_register_sequence(adev, + golden_settings_gc_12_0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + if (adev->rev_id == 0) soc15_program_register_sequence(adev, - golden_settings_gc_12_0, - (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + golden_settings_gc_12_0_rev0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0_rev0)); break; default: break; From d5a29e6a61028887bb8480e7c4af3547d6f3862d Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 17 Sep 2024 18:39:07 +0530 Subject: [PATCH 381/655] drm/amd/display: Fix kdoc entry for 'tps' in 'dc_process_dmub_dpia_set_tps_notification' Correct the parameter descriptor for the function `dc_process_dmub_dpia_set_tps_notification` to match the actual parameters used. Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc.c:5768: warning: Function parameter or struct member 'tps' not described in 'dc_process_dmub_dpia_set_tps_notification' drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc.c:5768: warning: Excess function parameter 'ts' description in 'dc_process_dmub_dpia_set_tps_notification' Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index a1652130e4be..5c39390ecbd5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5760,7 +5760,7 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc, * * @dc: [in] dc structure * @link_index: [in] link index - * @ts: [in] request tps + * @tps: [in] request tps * * Submits set_tps_notification command to dmub via inbox message */ From 87d749a6aab73d8069d0345afaa98297816cb220 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 15 Sep 2024 14:28:37 -0500 Subject: [PATCH 382/655] drm/amd/display: Allow backlight to go below `AMDGPU_DM_DEFAULT_MIN_BACKLIGHT` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The issue with panel power savings compatibility below `AMDGPU_DM_DEFAULT_MIN_BACKLIGHT` happens at `AMDGPU_DM_DEFAULT_MIN_BACKLIGHT` as well. That issue will be fixed separately, so don't prevent the backlight brightness from going that low. Cc: Harry Wentland Cc: Thomas Weißschuh Link: https://lore.kernel.org/amd-gfx/be04226a-a9e3-4a45-a83b-6d263c6557d8@t-8ch.de/T/#m400dee4e2fc61fe9470334d20a7c8c89c9aef44f Reviewed-by: Harry Wentland Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 94c8d9966ec3..6e79028c5d78 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4506,7 +4506,7 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, int spread = caps.max_input_signal - caps.min_input_signal; if (caps.max_input_signal > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT || - caps.min_input_signal < AMDGPU_DM_DEFAULT_MIN_BACKLIGHT || + caps.min_input_signal < 0 || spread > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT || spread < AMDGPU_DM_MIN_SPREAD) { DRM_DEBUG_KMS("DM: Invalid backlight caps: min=%d, max=%d\n", From 6dcba0975d39b30be65dd038fed69e1aa111c73a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 11 Jul 2024 14:39:43 +0200 Subject: [PATCH 383/655] drm/amdgpu: use GEM references instead of TTMs v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of a TTM reference grab a GEM reference whenever necessary. v2: fix typo in amdgpu_bo_unref pointed out by Vitaly, initialize the GEM funcs for kernel allocations as well. Signed-off-by: Christian König Reviewed-by: Daniel Vetter (v1) Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 13 +++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 8 +++----- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 0e617dff8765..1a5df8b94661 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -43,8 +43,6 @@ #include "amdgpu_hmm.h" #include "amdgpu_xgmi.h" -static const struct drm_gem_object_funcs amdgpu_gem_object_funcs; - static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo = vmf->vma->vm_private_data; @@ -87,11 +85,11 @@ static const struct vm_operations_struct amdgpu_gem_vm_ops = { static void amdgpu_gem_object_free(struct drm_gem_object *gobj) { - struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); + struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj); - if (robj) { - amdgpu_hmm_unregister(robj); - amdgpu_bo_unref(&robj); + if (aobj) { + amdgpu_hmm_unregister(aobj); + ttm_bo_put(&aobj->tbo); } } @@ -126,7 +124,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, bo = &ubo->bo; *obj = &bo->tbo.base; - (*obj)->funcs = &amdgpu_gem_object_funcs; return 0; } @@ -295,7 +292,7 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str return drm_gem_ttm_mmap(obj, vma); } -static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = { +const struct drm_gem_object_funcs amdgpu_gem_object_funcs = { .free = amdgpu_gem_object_free, .open = amdgpu_gem_object_open, .close = amdgpu_gem_object_close, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index f30264782ba2..3a8f57900a3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -33,6 +33,8 @@ #define AMDGPU_GEM_DOMAIN_MAX 0x3 #define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base) +extern const struct drm_gem_object_funcs amdgpu_gem_object_funcs; + unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5bff4249b357..44819cdba7fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -564,6 +564,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (bo == NULL) return -ENOMEM; drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size); + bo->tbo.base.funcs = &amdgpu_gem_object_funcs; bo->vm_bo = NULL; bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : bp->domain; @@ -786,7 +787,7 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo) if (bo == NULL) return NULL; - ttm_bo_get(&bo->tbo); + drm_gem_object_get(&bo->tbo.base); return bo; } @@ -798,13 +799,10 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo) */ void amdgpu_bo_unref(struct amdgpu_bo **bo) { - struct ttm_buffer_object *tbo; - if ((*bo) == NULL) return; - tbo = &((*bo)->tbo); - ttm_bo_put(tbo); + drm_gem_object_put(&(*bo)->tbo.base); *bo = NULL; } From 856265caa94a3c78feaa23ec1acd799fe1989201 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Sep 2024 10:52:24 -0400 Subject: [PATCH 384/655] drm/amdgpu/mes11: reduce timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The firmware timeout is 2s. Reduce the driver timeout to 2.1 seconds to avoid back pressure on queue submissions. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3627 Fixes: f7c161a4c250 ("drm/amdgpu: increase mes submission timeout") Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index ee91ff9e52a2..231a3d490ea8 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -161,7 +161,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; - signed long timeout = 3000000; /* 3000 ms */ + signed long timeout = 2100000; /* 2100 ms */ struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; From 84f76408abe989809de19d02e476b044fd985adc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 18 Sep 2024 09:37:31 -0400 Subject: [PATCH 385/655] drm/amdgpu/mes12: reduce timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The firmware timeout is 2s. Reduce the driver timeout to 2.1 seconds to avoid back pressure on queue submissions. Fixes: 94b51a3d01ed ("drm/amdgpu/mes12: increase mes submission timeout") Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index ef05a4116230..186f77813397 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -146,7 +146,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; - signed long timeout = 3000000; /* 3000 ms */ + signed long timeout = 2100000; /* 2100 ms */ struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring[pipe]; spinlock_t *ring_lock = &mes->ring_lock[pipe]; From 423c9baae4c7fe73d5812e28610418fd2e8050bd Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 13 Sep 2024 15:32:16 -0700 Subject: [PATCH 386/655] cxl: Fix comment regarding cxl_query_cmd() return data The code indicates that the min of n_commands and total commands is returned. The comment incorrectly says it's the max(). Correct comment to min(). Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20240913223216.3234173-1-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 915a93ced78f..946f8e44455f 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -546,7 +546,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, return put_user(ARRAY_SIZE(cxl_mem_commands), &q->n_commands); /* - * otherwise, return max(n_commands, total commands) cxl_command_info + * otherwise, return min(n_commands, total commands) cxl_command_info * structures. */ cxl_for_each_cmd(cmd) { From 289ebd9afeb94862d96c89217068943f1937df5b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 19 Sep 2024 09:22:57 +0900 Subject: [PATCH 387/655] ksmbd: fix warning: comparison of distinct pointer types lacks a cast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit smb2pdu.c: In function ‘smb2_open’: ./include/linux/minmax.h:20:28: warning: comparison of distinct pointer types lacks a cast 20 | (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) | ^~ ./include/linux/minmax.h:26:4: note: in expansion of macro ‘__typecheck’ 26 | (__typecheck(x, y) && __no_side_effects(x, y)) | ^~~~~~~~~~~ ./include/linux/minmax.h:36:24: note: in expansion of macro ‘__safe_cmp’ 36 | __builtin_choose_expr(__safe_cmp(x, y), \ | ^~~~~~~~~~ ./include/linux/minmax.h:45:19: note: in expansion of macro ‘__careful_cmp’ 45 | #define min(x, y) __careful_cmp(x, y, <) | ^~~~~~~~~~~~~ /home/linkinjeon/git/smbd_work/ksmbd/smb2pdu.c:3713:27: note: in expansion of macro ‘min’ 3713 | fp->durable_timeout = min(dh_info.timeout, Fixes: c8efcc786146 ("ksmbd: add support for durable handles v1/v2") Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 5 +++-- fs/smb/server/vfs_cache.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index e6bdc1b20727..28cd66fa8e05 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3531,8 +3531,9 @@ int smb2_open(struct ksmbd_work *work) memcpy(fp->create_guid, dh_info.CreateGuid, SMB2_CREATE_GUID_SIZE); if (dh_info.timeout) - fp->durable_timeout = min(dh_info.timeout, - DURABLE_HANDLE_MAX_TIMEOUT); + fp->durable_timeout = + min_t(unsigned int, dh_info.timeout, + DURABLE_HANDLE_MAX_TIMEOUT); else fp->durable_timeout = 60; } diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index b0f6d0f94cb8..5bbb179736c2 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -100,8 +100,8 @@ struct ksmbd_file { struct list_head blocked_works; struct list_head lock_list; - int durable_timeout; - int durable_scavenger_timeout; + unsigned int durable_timeout; + unsigned int durable_scavenger_timeout; /* if ls is happening on directory, below is valid*/ struct ksmbd_readdir_data readdir_data; From 8bb04fb2b7e346ffcf00164b91129ab151ceaa46 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 20 Aug 2024 21:15:20 +0200 Subject: [PATCH 388/655] ksmbd: Replace one-element arrays with flexible-array members Replace the deprecated one-element arrays with flexible-array members in the structs copychunk_ioctl_req and smb2_ea_info_req. There are no binary differences after this conversion. Link: https://github.com/KSPP/linux/issues/79 Signed-off-by: Thorsten Blum Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 ++-- fs/smb/server/smb2pdu.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 28cd66fa8e05..7121266daa02 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4587,7 +4587,7 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp, path = &fp->filp->f_path; /* single EA entry is requested with given user.* name */ if (req->InputBufferLength) { - if (le32_to_cpu(req->InputBufferLength) < + if (le32_to_cpu(req->InputBufferLength) <= sizeof(struct smb2_ea_info_req)) return -EINVAL; @@ -8091,7 +8091,7 @@ int smb2_ioctl(struct ksmbd_work *work) goto out; } - if (in_buf_len < sizeof(struct copychunk_ioctl_req)) { + if (in_buf_len <= sizeof(struct copychunk_ioctl_req)) { ret = -EINVAL; goto out; } diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h index 3be7d5ae65a8..73aff20e22d0 100644 --- a/fs/smb/server/smb2pdu.h +++ b/fs/smb/server/smb2pdu.h @@ -194,7 +194,7 @@ struct copychunk_ioctl_req { __le64 ResumeKey[3]; __le32 ChunkCount; __le32 Reserved; - __u8 Chunks[1]; /* array of srv_copychunk */ + __u8 Chunks[]; /* array of srv_copychunk */ } __packed; struct srv_copychunk { @@ -370,7 +370,7 @@ struct smb2_file_attr_tag_info { struct smb2_ea_info_req { __le32 NextEntryOffset; __u8 EaNameLength; - char name[1]; + char name[]; } __packed; /* level 15 Query */ struct smb2_ea_info { From 4f2c346e621624315e2a1405e98616a0c5ac146f Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Wed, 18 Sep 2024 23:31:43 -0500 Subject: [PATCH 389/655] driver core: fix async device shutdown hang Modify device_shutdown() so that supplier devices do not wait for consumer devices to be shut down first when the devlink is sync state only, since the consumer is not dependent on the supplier in this case. Without this change, a circular dependency could hang the system. Fixes: 8064952c6504 ("driver core: shut down devices asynchronously") Signed-off-by: Stuart Hayes Tested-by: Laurence Oberman Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20240919043143.1194950-1-stuart.w.hayes@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index b69b82da8837..76513e360496 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4898,8 +4898,16 @@ void device_shutdown(void) idx = device_links_read_lock(); list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) + device_links_read_lock_held()) { + /* + * sync_state_only suppliers don't need to wait, + * aren't reordered on devices_kset, so making them + * wait could result in a hang + */ + if (device_link_flag_is_sync_state_only(link->flags)) + continue; link->supplier->p->shutdown_after = cookie; + } device_links_read_unlock(idx); put_device(dev); From 39c3aad43f6f9bcddd660f5874dcd760e8c04a94 Mon Sep 17 00:00:00 2001 From: Ahmed Ehab Date: Sun, 22 Sep 2024 00:00:36 +0300 Subject: [PATCH 390/655] bcachefs: Hold read lock in bch2_snapshot_tree_oldest_subvol() Syzbot reports a problem that a warning is triggered due to suspicious use of rcu_dereference_check(). That is triggered by a call of bch2_snapshot_tree_oldest_subvol(). The cause of the warning is that inside bch2_snapshot_tree_oldest_subvol(), snapshot_t() is called which calls rcu_dereference() that requires a read lock to be held. Also, the call of bch2_snapshot_tree_next() eventually calls snapshot_t(). To fix this, call rcu_read_lock() before calling snapshot_t(). Then, release the lock after the termination of the while loop. Reported-by: Signed-off-by: Ahmed Ehab Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 8b18a9b483a4..dff83ebbd912 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -469,6 +469,7 @@ static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root) u32 id = snapshot_root; u32 subvol = 0, s; + rcu_read_lock(); while (id) { s = snapshot_t(c, id)->subvol; @@ -477,6 +478,7 @@ static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root) id = bch2_snapshot_tree_next(c, id); } + rcu_read_unlock(); return subvol; } From 0e4ed48292c55eeb0afab22f8930b556f17eaad2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 29 Aug 2024 15:58:53 +0200 Subject: [PATCH 391/655] mailbox: ARM_MHU_V3 should depend on ARM64 The ARM MHUv3 controller is only present on ARM64 SoCs. Hence add a dependency on ARM64, to prevent asking the user about this driver when configuring a kernel for a different architecture than ARM64. Fixes: ca1a8680b134b5e6 ("mailbox: arm_mhuv3: Add driver") Signed-off-by: Geert Uytterhoeven Acked-by: Sudeep Holla Signed-off-by: Jassi Brar --- drivers/mailbox/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index 4eed97295927..cbd9206cd7de 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -25,6 +25,7 @@ config ARM_MHU_V2 config ARM_MHU_V3 tristate "ARM MHUv3 Mailbox" + depends on ARM64 || COMPILE_TEST depends on HAS_IOMEM || COMPILE_TEST depends on OF help From 39d7d6177f0cc25a567a4b3d2b2323489d4615f7 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 29 Jul 2024 15:47:09 +0800 Subject: [PATCH 392/655] mailbox: imx: use device name in interrupt name There are several MUs for different usage, SCMI MU, ELE MU, RemotePROC MU. Using "imx_mu_chan" in interrupt name would be hard to identify which MU triggers interrupt, so use device name to make it easy to know which MU triggers which interrupt. Signed-off-by: Peng Fan Signed-off-by: Jassi Brar --- drivers/mailbox/imx-mailbox.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c index d17efb1dd0cb..f815dab3be50 100644 --- a/drivers/mailbox/imx-mailbox.c +++ b/drivers/mailbox/imx-mailbox.c @@ -30,7 +30,7 @@ #define IMX_MU_SCU_CHANS 6 /* TX0/RX0 */ #define IMX_MU_S4_CHANS 2 -#define IMX_MU_CHAN_NAME_SIZE 20 +#define IMX_MU_CHAN_NAME_SIZE 32 #define IMX_MU_V2_PAR_OFF 0x4 #define IMX_MU_V2_TR_MASK GENMASK(7, 0) @@ -782,7 +782,7 @@ static int imx_mu_init_generic(struct imx_mu_priv *priv) cp->chan = &priv->mbox_chans[i]; priv->mbox_chans[i].con_priv = cp; snprintf(cp->irq_desc, sizeof(cp->irq_desc), - "imx_mu_chan[%i-%i]", cp->type, cp->idx); + "%s[%i-%i]", dev_name(priv->dev), cp->type, cp->idx); } priv->mbox.num_chans = IMX_MU_CHANS; @@ -819,7 +819,7 @@ static int imx_mu_init_specific(struct imx_mu_priv *priv) cp->chan = &priv->mbox_chans[i]; priv->mbox_chans[i].con_priv = cp; snprintf(cp->irq_desc, sizeof(cp->irq_desc), - "imx_mu_chan[%i-%i]", cp->type, cp->idx); + "%s[%i-%i]", dev_name(priv->dev), cp->type, cp->idx); } priv->mbox.num_chans = num_chans; From e92d87c9c5d769e4cb1dd7c90faa38dddd7e52e3 Mon Sep 17 00:00:00 2001 From: Liao Chen Date: Wed, 14 Aug 2024 02:51:47 +0000 Subject: [PATCH 393/655] mailbox: rockchip: fix a typo in module autoloading MODULE_DEVICE_TABLE(of, rockchip_mbox_of_match) could let the module properly autoloaded based on the alias from of_device_id table. It should be 'rockchip_mbox_of_match' instead of 'rockchp_mbox_of_match', just fix it. Fixes: f70ed3b5dc8b ("mailbox: rockchip: Add Rockchip mailbox driver") Signed-off-by: Liao Chen Reviewed-by: Heiko Stuebner Signed-off-by: Jassi Brar --- drivers/mailbox/rockchip-mailbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mailbox/rockchip-mailbox.c b/drivers/mailbox/rockchip-mailbox.c index 8ffad059e898..4d966cb2ed03 100644 --- a/drivers/mailbox/rockchip-mailbox.c +++ b/drivers/mailbox/rockchip-mailbox.c @@ -159,7 +159,7 @@ static const struct of_device_id rockchip_mbox_of_match[] = { { .compatible = "rockchip,rk3368-mailbox", .data = &rk3368_drv_data}, { }, }; -MODULE_DEVICE_TABLE(of, rockchp_mbox_of_match); +MODULE_DEVICE_TABLE(of, rockchip_mbox_of_match); static int rockchip_mbox_probe(struct platform_device *pdev) { From 0d97651b7577148242571b8692aae4a8b9ee0979 Mon Sep 17 00:00:00 2001 From: Huan Yang Date: Thu, 22 Aug 2024 09:59:55 +0800 Subject: [PATCH 394/655] mailbox: sprd: Use devm_clk_get_enabled() helpers The devm_clk_get_enabled() helpers: - call devm_clk_get() - call clk_prepare_enable() and register what is needed in order to call clk_disable_unprepare() when needed, as a managed resource. This simplifies the code and avoids the calls to clk_disable_unprepare(). Due to clk only used in probe, not in suspend\resume, this pointer can remove from sprd_mbox_priv to save a little memory. Signed-off-by: Huan Yang Reviewed-by: Christophe JAILLET Reviewed-by: Baolin Wang Signed-off-by: Jassi Brar --- drivers/mailbox/sprd-mailbox.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/drivers/mailbox/sprd-mailbox.c b/drivers/mailbox/sprd-mailbox.c index 9ae57de77d4d..ee8539dfcef5 100644 --- a/drivers/mailbox/sprd-mailbox.c +++ b/drivers/mailbox/sprd-mailbox.c @@ -62,7 +62,6 @@ struct sprd_mbox_priv { void __iomem *outbox_base; /* Base register address for supplementary outbox */ void __iomem *supp_base; - struct clk *clk; u32 outbox_fifo_depth; struct mutex lock; @@ -291,19 +290,13 @@ static const struct mbox_chan_ops sprd_mbox_ops = { .shutdown = sprd_mbox_shutdown, }; -static void sprd_mbox_disable(void *data) -{ - struct sprd_mbox_priv *priv = data; - - clk_disable_unprepare(priv->clk); -} - static int sprd_mbox_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct sprd_mbox_priv *priv; int ret, inbox_irq, outbox_irq, supp_irq; unsigned long id, supp; + struct clk *clk; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -331,20 +324,10 @@ static int sprd_mbox_probe(struct platform_device *pdev) if (IS_ERR(priv->outbox_base)) return PTR_ERR(priv->outbox_base); - priv->clk = devm_clk_get(dev, "enable"); - if (IS_ERR(priv->clk)) { + clk = devm_clk_get_enabled(dev, "enable"); + if (IS_ERR(clk)) { dev_err(dev, "failed to get mailbox clock\n"); - return PTR_ERR(priv->clk); - } - - ret = clk_prepare_enable(priv->clk); - if (ret) - return ret; - - ret = devm_add_action_or_reset(dev, sprd_mbox_disable, priv); - if (ret) { - dev_err(dev, "failed to add mailbox disable action\n"); - return ret; + return PTR_ERR(clk); } inbox_irq = platform_get_irq_byname(pdev, "inbox"); From dc09f007caed3b2f6a3b6bd7e13777557ae22bfd Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 21 Aug 2024 23:40:44 +0200 Subject: [PATCH 395/655] mailbox: bcm2835: Fix timeout during suspend mode During noirq suspend phase the Raspberry Pi power driver suffer of firmware property timeouts. The reason is that the IRQ of the underlying BCM2835 mailbox is disabled and rpi_firmware_property_list() will always run into a timeout [1]. Since the VideoCore side isn't consider as a wakeup source, set the IRQF_NO_SUSPEND flag for the mailbox IRQ in order to keep it enabled during suspend-resume cycle. [1] PM: late suspend of devices complete after 1.754 msecs WARNING: CPU: 0 PID: 438 at drivers/firmware/raspberrypi.c:128 rpi_firmware_property_list+0x204/0x22c Firmware transaction 0x00028001 timeout Modules linked in: CPU: 0 PID: 438 Comm: bash Tainted: G C 6.9.3-dirty #17 Hardware name: BCM2835 Call trace: unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x34/0x44 dump_stack_lvl from __warn+0x88/0xec __warn from warn_slowpath_fmt+0x7c/0xb0 warn_slowpath_fmt from rpi_firmware_property_list+0x204/0x22c rpi_firmware_property_list from rpi_firmware_property+0x68/0x8c rpi_firmware_property from rpi_firmware_set_power+0x54/0xc0 rpi_firmware_set_power from _genpd_power_off+0xe4/0x148 _genpd_power_off from genpd_sync_power_off+0x7c/0x11c genpd_sync_power_off from genpd_finish_suspend+0xcc/0xe0 genpd_finish_suspend from dpm_run_callback+0x78/0xd0 dpm_run_callback from device_suspend_noirq+0xc0/0x238 device_suspend_noirq from dpm_suspend_noirq+0xb0/0x168 dpm_suspend_noirq from suspend_devices_and_enter+0x1b8/0x5ac suspend_devices_and_enter from pm_suspend+0x254/0x2e4 pm_suspend from state_store+0xa8/0xd4 state_store from kernfs_fop_write_iter+0x154/0x1a0 kernfs_fop_write_iter from vfs_write+0x12c/0x184 vfs_write from ksys_write+0x78/0xc0 ksys_write from ret_fast_syscall+0x0/0x54 Exception stack(0xcc93dfa8 to 0xcc93dff0) [...] PM: noirq suspend of devices complete after 3095.584 msecs Link: https://github.com/raspberrypi/firmware/issues/1894 Fixes: 0bae6af6d704 ("mailbox: Enable BCM2835 mailbox support") Signed-off-by: Stefan Wahren Reviewed-by: Florian Fainelli Signed-off-by: Jassi Brar --- drivers/mailbox/bcm2835-mailbox.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mailbox/bcm2835-mailbox.c b/drivers/mailbox/bcm2835-mailbox.c index fbfd0202047c..ea12fb8d2401 100644 --- a/drivers/mailbox/bcm2835-mailbox.c +++ b/drivers/mailbox/bcm2835-mailbox.c @@ -145,7 +145,8 @@ static int bcm2835_mbox_probe(struct platform_device *pdev) spin_lock_init(&mbox->lock); ret = devm_request_irq(dev, irq_of_parse_and_map(dev->of_node, 0), - bcm2835_mbox_irq, 0, dev_name(dev), mbox); + bcm2835_mbox_irq, IRQF_NO_SUSPEND, dev_name(dev), + mbox); if (ret) { dev_err(dev, "Failed to register a mailbox IRQ handler: %d\n", ret); From 263dbd3cc88da7ea7413494eea66418b4f1b2e6d Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 14:16:08 -0600 Subject: [PATCH 396/655] mailbox: Use of_property_match_string() instead of open-coding Use of_property_match_string() instead of open-coding the search. With this, of_get_property() can be removed as there is no need to check for "mbox-names" presence first. This is part of a larger effort to remove callers of of_get_property() and similar functions. of_get_property() leaks the DT property data pointer which is a problem for dynamically allocated nodes which may be freed. Signed-off-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index ebff3baf3045..d3d26a2c9895 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -450,30 +450,20 @@ struct mbox_chan *mbox_request_channel_byname(struct mbox_client *cl, const char *name) { struct device_node *np = cl->dev->of_node; - struct property *prop; - const char *mbox_name; - int index = 0; + int index; if (!np) { dev_err(cl->dev, "%s() currently only supports DT\n", __func__); return ERR_PTR(-EINVAL); } - if (!of_get_property(np, "mbox-names", NULL)) { - dev_err(cl->dev, - "%s() requires an \"mbox-names\" property\n", __func__); + index = of_property_match_string(np, "mbox-names", name); + if (index < 0) { + dev_err(cl->dev, "%s() could not locate channel named \"%s\"\n", + __func__, name); return ERR_PTR(-EINVAL); } - - of_property_for_each_string(np, "mbox-names", prop, mbox_name) { - if (!strncmp(name, mbox_name, strlen(name))) - return mbox_request_channel(cl, index); - index++; - } - - dev_err(cl->dev, "%s() could not locate channel named \"%s\"\n", - __func__, name); - return ERR_PTR(-EINVAL); + return mbox_request_channel(cl, index); } EXPORT_SYMBOL_GPL(mbox_request_channel_byname); From c13c196d5e5c96356386a8099e14690e393a5148 Mon Sep 17 00:00:00 2001 From: Fei Shao Date: Wed, 11 Sep 2024 22:33:55 +0800 Subject: [PATCH 397/655] dt-bindings: mailbox: mtk,adsp-mbox: Add compatible for MT8188 Add compatible string for ADSP mailbox on MT8188 SoC, which is compatible with the one used on MT8186. Acked-by: Rob Herring (Arm) Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Fei Shao Signed-off-by: Jassi Brar --- .../devicetree/bindings/mailbox/mtk,adsp-mbox.yaml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/mailbox/mtk,adsp-mbox.yaml b/Documentation/devicetree/bindings/mailbox/mtk,adsp-mbox.yaml index 72c1d9e82c89..8a1369df4ecb 100644 --- a/Documentation/devicetree/bindings/mailbox/mtk,adsp-mbox.yaml +++ b/Documentation/devicetree/bindings/mailbox/mtk,adsp-mbox.yaml @@ -17,9 +17,15 @@ description: | properties: compatible: - enum: - - mediatek,mt8195-adsp-mbox - - mediatek,mt8186-adsp-mbox + oneOf: + - enum: + - mediatek,mt8186-adsp-mbox + - mediatek,mt8195-adsp-mbox + - items: + - enum: + - mediatek,mt8188-adsp-mbox + - const: mediatek,mt8186-adsp-mbox + "#mbox-cells": const: 0 From 4116ab5e8a48db72ca366ac042641dea8cf92f67 Mon Sep 17 00:00:00 2001 From: Nikunj Kela Date: Thu, 5 Sep 2024 11:47:36 -0700 Subject: [PATCH 398/655] dt-bindings: mailbox: qcom-ipcc: document the support for SA8255p Add a compatible for the ipcc on SA8255p platforms. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Nikunj Kela Signed-off-by: Jassi Brar --- Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml index 05e4e1d51713..bc108b8db9f4 100644 --- a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml +++ b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml @@ -25,6 +25,7 @@ properties: items: - enum: - qcom,qdu1000-ipcc + - qcom,sa8255p-ipcc - qcom,sa8775p-ipcc - qcom,sc7280-ipcc - qcom,sc8280xp-ipcc From 5232544ea368b54b517dc504308c9e62bc6e87eb Mon Sep 17 00:00:00 2001 From: Jingyi Wang Date: Wed, 11 Sep 2024 15:25:15 +0800 Subject: [PATCH 399/655] dt-bindings: mailbox: qcom-ipcc: Document QCS8300 IPCC Document the Inter-Processor Communication Controller on the Qualcomm QCS8300 Platform, which will be used to route interrupts across various subsystems found on the SoC. Signed-off-by: Jingyi Wang Reviewed-by: Krzysztof Kozlowski Signed-off-by: Jassi Brar --- Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml index bc108b8db9f4..2d66770ed361 100644 --- a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml +++ b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml @@ -24,6 +24,7 @@ properties: compatible: items: - enum: + - qcom,qcs8300-ipcc - qcom,qdu1000-ipcc - qcom,sa8255p-ipcc - qcom,sa8775p-ipcc From e91be3ed30d79ccd3e87e3970a26dea844c04919 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 3 Sep 2024 17:11:50 -0700 Subject: [PATCH 400/655] cxl: Preserve the CDAT access_coordinate for an endpoint Keep the access_coordinate from the CDAT tables for region perf calculations. The region perf calculation requires all participating endpoints to have arrived in order to determine if there are limitations of bandwidth data due to shared uplink. Reviewed-by: Jonathan Cameron Reviewed-by: Ira Weiny Acked-by: Dan Williams Link: https://patch.msgid.link/20240904001316.1688225-2-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/cdat.c | 10 ++++++---- drivers/cxl/cxlmem.h | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index e2e1ccda88fa..7bbf266a1612 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -14,7 +14,7 @@ struct dsmas_entry { struct range dpa_range; u8 handle; struct access_coordinate coord[ACCESS_COORDINATE_MAX]; - + struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX]; int entries; int qos_class; }; @@ -162,7 +162,7 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg, val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base), dslbis->data_type); - cxl_access_coordinate_set(dent->coord, dslbis->data_type, val); + cxl_access_coordinate_set(dent->cdat_coord, dslbis->data_type, val); return 0; } @@ -219,7 +219,7 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port, xa_for_each(dsmas_xa, index, dent) { int qos_class; - cxl_coordinates_combine(dent->coord, dent->coord, ep_c); + cxl_coordinates_combine(dent->coord, dent->cdat_coord, ep_c); dent->entries = 1; rc = cxl_root->ops->qos_class(cxl_root, &dent->coord[ACCESS_COORDINATE_CPU], @@ -240,8 +240,10 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port, static void update_perf_entry(struct device *dev, struct dsmas_entry *dent, struct cxl_dpa_perf *dpa_perf) { - for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { dpa_perf->coord[i] = dent->coord[i]; + dpa_perf->cdat_coord[i] = dent->cdat_coord[i]; + } dpa_perf->dpa_range = dent->dpa_range; dpa_perf->qos_class = dent->qos_class; dev_dbg(dev, diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index c7c423ae16ab..2a25d1957ddb 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -398,11 +398,13 @@ enum cxl_devtype { * struct cxl_dpa_perf - DPA performance property entry * @dpa_range: range for DPA address * @coord: QoS performance data (i.e. latency, bandwidth) + * @cdat_coord: raw QoS performance data from CDAT * @qos_class: QoS Class cookies */ struct cxl_dpa_perf { struct range dpa_range; struct access_coordinate coord[ACCESS_COORDINATE_MAX]; + struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX]; int qos_class; }; From a5ab0de0ebaa65e0a75ec0761a2745c66a9d17dc Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 3 Sep 2024 17:11:51 -0700 Subject: [PATCH 401/655] cxl: Calculate region bandwidth of targets with shared upstream link The current bandwidth calculation aggregates all the targets. This simple method does not take into account where multiple targets sharing under a switch or a root port where the aggregated bandwidth can be greater than the upstream link of the switch. To accurately account for the shared upstream uplink cases, a new update function is introduced by walking from the leaves to the root of the hierarchy and clamp the bandwidth in the process as needed. This process is done when all the targets for a region are present but before the final values are send to the HMAT handling code cached access_coordinate targets. The original perf calculation path was kept to calculate the latency performance data that does not require the shared link consideration. The shared upstream link calculation is done as a second pass when all the endpoints have arrived. Testing is done via qemu with CXL hierarchy. run_qemu[1] is modified to support several CXL hierarchy layouts. The following layouts are tested: HB: Host Bridge RP: Root Port SW: Switch EP: End Point 2 HB 2 RP 2 EP: resulting bandwidth: 624 1 HB 2 RP 2 EP: resulting bandwidth: 624 2 HB 2 RP 2 SW 4 EP: resulting bandwidth: 624 Current testing, perf number from SRAT/HMAT is hacked into the kernel code. However with new QEMU support of Generic Target Port that's incoming, the perf data injection is no longer needed. [1]: https://github.com/pmem/run_qemu Suggested-by: Jonathan Cameron Link: https://lore.kernel.org/linux-cxl/20240501152503.00002e60@Huawei.com/ Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Acked-by: Dan Williams Link: https://patch.msgid.link/20240904001316.1688225-3-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/cdat.c | 501 +++++++++++++++++++++++++++++++++++++- drivers/cxl/core/core.h | 4 +- drivers/cxl/core/pci.c | 23 ++ drivers/cxl/core/port.c | 20 ++ drivers/cxl/core/region.c | 2 + drivers/cxl/cxl.h | 1 + 6 files changed, 539 insertions(+), 12 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 7bbf266a1612..ef1621d40f05 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -547,19 +547,37 @@ void cxl_coordinates_combine(struct access_coordinate *out, MODULE_IMPORT_NS(CXL); -void cxl_region_perf_data_calculate(struct cxl_region *cxlr, - struct cxl_endpoint_decoder *cxled) +static void cxl_bandwidth_add(struct access_coordinate *coord, + struct access_coordinate *c1, + struct access_coordinate *c2) +{ + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + coord[i].read_bandwidth = c1[i].read_bandwidth + + c2[i].read_bandwidth; + coord[i].write_bandwidth = c1[i].write_bandwidth + + c2[i].write_bandwidth; + } +} + +static bool dpa_perf_contains(struct cxl_dpa_perf *perf, + struct resource *dpa_res) +{ + struct range dpa = { + .start = dpa_res->start, + .end = dpa_res->end, + }; + + return range_contains(&perf->dpa_range, &dpa); +} + +static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled, + enum cxl_decoder_mode mode) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - struct range dpa = { - .start = cxled->dpa_res->start, - .end = cxled->dpa_res->end, - }; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_dpa_perf *perf; - switch (cxlr->mode) { + switch (mode) { case CXL_DECODER_RAM: perf = &mds->ram_perf; break; @@ -567,12 +585,473 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr, perf = &mds->pmem_perf; break; default: - return; + return ERR_PTR(-EINVAL); } + if (!dpa_perf_contains(perf, cxled->dpa_res)) + return ERR_PTR(-EINVAL); + + return perf; +} + +/* + * Transient context for containing the current calculation of bandwidth when + * doing walking the port hierarchy to deal with shared upstream link. + */ +struct cxl_perf_ctx { + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; + struct cxl_port *port; +}; + +/** + * cxl_endpoint_gather_bandwidth - collect all the endpoint bandwidth in an xarray + * @cxlr: CXL region for the bandwidth calculation + * @cxled: endpoint decoder to start on + * @usp_xa: (output) the xarray that collects all the bandwidth coordinates + * indexed by the upstream device with data of 'struct cxl_perf_ctx'. + * @gp_is_root: (output) bool of whether the grandparent is cxl root. + * + * Return: 0 for success or -errno + * + * Collects aggregated endpoint bandwidth and store the bandwidth in + * an xarray indexed by the upstream device of the switch or the RP + * device. Each endpoint consists the minimum of the bandwidth from DSLBIS + * from the endpoint CDAT, the endpoint upstream link bandwidth, and the + * bandwidth from the SSLBIS of the switch CDAT for the switch upstream port to + * the downstream port that's associated with the endpoint. If the + * device is directly connected to a RP, then no SSLBIS is involved. + */ +static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, + struct xarray *usp_xa, + bool *gp_is_root) +{ + struct cxl_port *endpoint = to_cxl_port(cxled->cxld.dev.parent); + struct cxl_port *parent_port = to_cxl_port(endpoint->dev.parent); + struct cxl_port *gp_port = to_cxl_port(parent_port->dev.parent); + struct access_coordinate pci_coord[ACCESS_COORDINATE_MAX]; + struct access_coordinate sw_coord[ACCESS_COORDINATE_MAX]; + struct access_coordinate ep_coord[ACCESS_COORDINATE_MAX]; + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct cxl_perf_ctx *perf_ctx; + struct cxl_dpa_perf *perf; + unsigned long index; + void *ptr; + int rc; + + if (cxlds->rcd) + return -ENODEV; + + perf = cxled_get_dpa_perf(cxled, cxlr->mode); + if (IS_ERR(perf)) + return PTR_ERR(perf); + + gp_port = to_cxl_port(parent_port->dev.parent); + *gp_is_root = is_cxl_root(gp_port); + + /* + * If the grandparent is cxl root, then index is the root port, + * otherwise it's the parent switch upstream device. + */ + if (*gp_is_root) + index = (unsigned long)endpoint->parent_dport->dport_dev; + else + index = (unsigned long)parent_port->uport_dev; + + perf_ctx = xa_load(usp_xa, index); + if (!perf_ctx) { + struct cxl_perf_ctx *c __free(kfree) = + kzalloc(sizeof(*perf_ctx), GFP_KERNEL); + + if (!c) + return -ENOMEM; + ptr = xa_store(usp_xa, index, c, GFP_KERNEL); + if (xa_is_err(ptr)) + return xa_err(ptr); + perf_ctx = no_free_ptr(c); + perf_ctx->port = parent_port; + } + + /* Direct upstream link from EP bandwidth */ + rc = cxl_pci_get_bandwidth(pdev, pci_coord); + if (rc < 0) + return rc; + + /* + * Min of upstream link bandwidth and Endpoint CDAT bandwidth from + * DSLBIS. + */ + cxl_coordinates_combine(ep_coord, pci_coord, perf->cdat_coord); + + /* + * If grandparent port is root, then there's no switch involved and + * the endpoint is connected to a root port. + */ + if (!*gp_is_root) { + /* + * Retrieve the switch SSLBIS for switch downstream port + * associated with the endpoint bandwidth. + */ + rc = cxl_port_get_switch_dport_bandwidth(endpoint, sw_coord); + if (rc) + return rc; + + /* + * Min of the earlier coordinates with the switch SSLBIS + * bandwidth + */ + cxl_coordinates_combine(ep_coord, ep_coord, sw_coord); + } + + /* + * Aggregate the computed bandwidth with the current aggregated bandwidth + * of the endpoints with the same switch upstream device or RP. + */ + cxl_bandwidth_add(perf_ctx->coord, perf_ctx->coord, ep_coord); + + return 0; +} + +static void free_perf_xa(struct xarray *xa) +{ + struct cxl_perf_ctx *ctx; + unsigned long index; + + if (!xa) + return; + + xa_for_each(xa, index, ctx) + kfree(ctx); + xa_destroy(xa); + kfree(xa); +} +DEFINE_FREE(free_perf_xa, struct xarray *, if (_T) free_perf_xa(_T)) + +/** + * cxl_switch_gather_bandwidth - collect all the bandwidth at switch level in an xarray + * @cxlr: The region being operated on + * @input_xa: xarray indexed by upstream device of a switch with data of 'struct + * cxl_perf_ctx' + * @gp_is_root: (output) bool of whether the grandparent is cxl root. + * + * Return: a xarray of resulting cxl_perf_ctx per parent switch or root port + * or ERR_PTR(-errno) + * + * Iterate through the xarray. Take the minimum of the downstream calculated + * bandwidth, the upstream link bandwidth, and the SSLBIS of the upstream + * switch if exists. Sum the resulting bandwidth under the switch upstream + * device or a RP device. The function can be iterated over multiple switches + * if the switches are present. + */ +static struct xarray *cxl_switch_gather_bandwidth(struct cxl_region *cxlr, + struct xarray *input_xa, + bool *gp_is_root) +{ + struct xarray *res_xa __free(free_perf_xa) = + kzalloc(sizeof(*res_xa), GFP_KERNEL); + struct access_coordinate coords[ACCESS_COORDINATE_MAX]; + struct cxl_perf_ctx *ctx, *us_ctx; + unsigned long index, us_index; + int dev_count = 0; + int gp_count = 0; + void *ptr; + int rc; + + if (!res_xa) + return ERR_PTR(-ENOMEM); + xa_init(res_xa); + + xa_for_each(input_xa, index, ctx) { + struct device *dev = (struct device *)index; + struct cxl_port *port = ctx->port; + struct cxl_port *parent_port = to_cxl_port(port->dev.parent); + struct cxl_port *gp_port = to_cxl_port(parent_port->dev.parent); + struct cxl_dport *dport = port->parent_dport; + bool is_root = false; + + dev_count++; + if (is_cxl_root(gp_port)) { + is_root = true; + gp_count++; + } + + /* + * If the grandparent is cxl root, then index is the root port, + * otherwise it's the parent switch upstream device. + */ + if (is_root) + us_index = (unsigned long)port->parent_dport->dport_dev; + else + us_index = (unsigned long)parent_port->uport_dev; + + us_ctx = xa_load(res_xa, us_index); + if (!us_ctx) { + struct cxl_perf_ctx *n __free(kfree) = + kzalloc(sizeof(*n), GFP_KERNEL); + + if (!n) + return ERR_PTR(-ENOMEM); + + ptr = xa_store(res_xa, us_index, n, GFP_KERNEL); + if (xa_is_err(ptr)) + return ERR_PTR(xa_err(ptr)); + us_ctx = no_free_ptr(n); + us_ctx->port = parent_port; + } + + /* + * If the device isn't an upstream PCIe port, there's something + * wrong with the topology. + */ + if (!dev_is_pci(dev)) + return ERR_PTR(-EINVAL); + + /* Retrieve the upstream link bandwidth */ + rc = cxl_pci_get_bandwidth(to_pci_dev(dev), coords); + if (rc) + return ERR_PTR(-ENXIO); + + /* + * Take the min of downstream bandwidth and the upstream link + * bandwidth. + */ + cxl_coordinates_combine(coords, coords, ctx->coord); + + /* + * Take the min of the calculated bandwdith and the upstream + * switch SSLBIS bandwidth if there's a parent switch + */ + if (!is_root) + cxl_coordinates_combine(coords, coords, dport->coord); + + /* + * Aggregate the calculated bandwidth common to an upstream + * switch. + */ + cxl_bandwidth_add(us_ctx->coord, us_ctx->coord, coords); + } + + /* Asymmetric topology detected. */ + if (gp_count) { + if (gp_count != dev_count) { + dev_dbg(&cxlr->dev, + "Asymmetric hierarchy detected, bandwidth not updated\n"); + return ERR_PTR(-EOPNOTSUPP); + } + *gp_is_root = true; + } + + return no_free_ptr(res_xa); +} + +/** + * cxl_rp_gather_bandwidth - handle the root port level bandwidth collection + * @xa: the xarray that holds the cxl_perf_ctx that has the bandwidth calculated + * below each root port device. + * + * Return: xarray that holds cxl_perf_ctx per host bridge or ERR_PTR(-errno) + */ +static struct xarray *cxl_rp_gather_bandwidth(struct xarray *xa) +{ + struct xarray *hb_xa __free(free_perf_xa) = + kzalloc(sizeof(*hb_xa), GFP_KERNEL); + struct cxl_perf_ctx *ctx; + unsigned long index; + + if (!hb_xa) + return ERR_PTR(-ENOMEM); + xa_init(hb_xa); + + xa_for_each(xa, index, ctx) { + struct cxl_port *port = ctx->port; + unsigned long hb_index = (unsigned long)port->uport_dev; + struct cxl_perf_ctx *hb_ctx; + void *ptr; + + hb_ctx = xa_load(hb_xa, hb_index); + if (!hb_ctx) { + struct cxl_perf_ctx *n __free(kfree) = + kzalloc(sizeof(*n), GFP_KERNEL); + + if (!n) + return ERR_PTR(-ENOMEM); + ptr = xa_store(hb_xa, hb_index, n, GFP_KERNEL); + if (xa_is_err(ptr)) + return ERR_PTR(xa_err(ptr)); + hb_ctx = no_free_ptr(n); + hb_ctx->port = port; + } + + cxl_bandwidth_add(hb_ctx->coord, hb_ctx->coord, ctx->coord); + } + + return no_free_ptr(hb_xa); +} + +/** + * cxl_hb_gather_bandwidth - handle the host bridge level bandwidth collection + * @xa: the xarray that holds the cxl_perf_ctx that has the bandwidth calculated + * below each host bridge. + * + * Return: xarray that holds cxl_perf_ctx per ACPI0017 device or ERR_PTR(-errno) + */ +static struct xarray *cxl_hb_gather_bandwidth(struct xarray *xa) +{ + struct xarray *mw_xa __free(free_perf_xa) = + kzalloc(sizeof(*mw_xa), GFP_KERNEL); + struct cxl_perf_ctx *ctx; + unsigned long index; + + if (!mw_xa) + return ERR_PTR(-ENOMEM); + xa_init(mw_xa); + + xa_for_each(xa, index, ctx) { + struct cxl_port *port = ctx->port; + struct cxl_port *parent_port; + struct cxl_perf_ctx *mw_ctx; + struct cxl_dport *dport; + unsigned long mw_index; + void *ptr; + + parent_port = to_cxl_port(port->dev.parent); + mw_index = (unsigned long)parent_port->uport_dev; + + mw_ctx = xa_load(mw_xa, mw_index); + if (!mw_ctx) { + struct cxl_perf_ctx *n __free(kfree) = + kzalloc(sizeof(*n), GFP_KERNEL); + + if (!n) + return ERR_PTR(-ENOMEM); + ptr = xa_store(mw_xa, mw_index, n, GFP_KERNEL); + if (xa_is_err(ptr)) + return ERR_PTR(xa_err(ptr)); + mw_ctx = no_free_ptr(n); + } + + dport = port->parent_dport; + cxl_coordinates_combine(ctx->coord, ctx->coord, dport->coord); + cxl_bandwidth_add(mw_ctx->coord, mw_ctx->coord, ctx->coord); + } + + return no_free_ptr(mw_xa); +} + +/** + * cxl_region_update_bandwidth - Update the bandwidth access coordinates of a region + * @cxlr: The region being operated on + * @input_xa: xarray holds cxl_perf_ctx wht calculated bandwidth per ACPI0017 instance + */ +static void cxl_region_update_bandwidth(struct cxl_region *cxlr, + struct xarray *input_xa) +{ + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; + struct cxl_perf_ctx *ctx; + unsigned long index; + + memset(coord, 0, sizeof(coord)); + xa_for_each(input_xa, index, ctx) + cxl_bandwidth_add(coord, coord, ctx->coord); + + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + cxlr->coord[i].read_bandwidth = coord[i].read_bandwidth; + cxlr->coord[i].write_bandwidth = coord[i].write_bandwidth; + } +} + +/** + * cxl_region_shared_upstream_bandwidth_update - Recalculate the bandwidth for + * the region + * @cxlr: the cxl region to recalculate + * + * The function walks the topology from bottom up and calculates the bandwidth. It + * starts at the endpoints, processes at the switches if any, processes at the rootport + * level, at the host bridge level, and finally aggregates at the region. + */ +void cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr) +{ + struct xarray *working_xa; + int root_count = 0; + bool is_root; + int rc; + lockdep_assert_held(&cxl_dpa_rwsem); - if (!range_contains(&perf->dpa_range, &dpa)) + struct xarray *usp_xa __free(free_perf_xa) = + kzalloc(sizeof(*usp_xa), GFP_KERNEL); + + if (!usp_xa) + return; + + xa_init(usp_xa); + + /* Collect bandwidth data from all the endpoints. */ + for (int i = 0; i < cxlr->params.nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = cxlr->params.targets[i]; + + is_root = false; + rc = cxl_endpoint_gather_bandwidth(cxlr, cxled, usp_xa, &is_root); + if (rc) + return; + root_count += is_root; + } + + /* Detect asymmetric hierarchy with some direct attached endpoints. */ + if (root_count && root_count != cxlr->params.nr_targets) { + dev_dbg(&cxlr->dev, + "Asymmetric hierarchy detected, bandwidth not updated\n"); + return; + } + + /* + * Walk up one or more switches to deal with the bandwidth of the + * switches if they exist. Endpoints directly attached to RPs skip + * over this part. + */ + if (!root_count) { + do { + working_xa = cxl_switch_gather_bandwidth(cxlr, usp_xa, + &is_root); + if (IS_ERR(working_xa)) + return; + free_perf_xa(usp_xa); + usp_xa = working_xa; + } while (!is_root); + } + + /* Handle the bandwidth at the root port of the hierarchy */ + working_xa = cxl_rp_gather_bandwidth(usp_xa); + if (IS_ERR(working_xa)) + return; + free_perf_xa(usp_xa); + usp_xa = working_xa; + + /* Handle the bandwidth at the host bridge of the hierarchy */ + working_xa = cxl_hb_gather_bandwidth(usp_xa); + if (IS_ERR(working_xa)) + return; + free_perf_xa(usp_xa); + usp_xa = working_xa; + + /* + * Aggregate all the bandwidth collected per CFMWS (ACPI0017) and + * update the region bandwidth with the final calculated values. + */ + cxl_region_update_bandwidth(cxlr, usp_xa); +} + +void cxl_region_perf_data_calculate(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_dpa_perf *perf; + + lockdep_assert_held(&cxl_dpa_rwsem); + + perf = cxled_get_dpa_perf(cxled, cxlr->mode); + if (IS_ERR(perf)) return; for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 72a506c9dbd0..0c62b4069ba0 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -103,9 +103,11 @@ enum cxl_poison_trace_type { }; long cxl_pci_get_latency(struct pci_dev *pdev); - +int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, enum access_coordinate_class access); bool cxl_need_node_perf_attrs_update(int nid); +int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, + struct access_coordinate *c); #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 9d396ef0aeed..5b46bc46aaa9 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1031,3 +1031,26 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port) __cxl_endpoint_decoder_reset_detected); } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, CXL); + +int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) +{ + int speed, bw; + u16 lnksta; + u32 width; + + speed = pcie_link_speed_mbps(pdev); + if (speed < 0) + return speed; + speed /= BITS_PER_BYTE; + + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta); + width = FIELD_GET(PCI_EXP_LNKSTA_NLW, lnksta); + bw = speed * width; + + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + c[i].read_bandwidth = bw; + c[i].write_bandwidth = bw; + } + + return 0; +} diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index a5e6f3d23cfb..e666ec6a9085 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -2237,6 +2237,26 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_get_perf_coordinates, CXL); +int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, + struct access_coordinate *c) +{ + struct cxl_dport *dport = port->parent_dport; + + /* Check this port is connected to a switch DSP and not an RP */ + if (parent_port_is_cxl_root(to_cxl_port(port->dev.parent))) + return -ENODEV; + + if (!coordinates_valid(dport->coord)) + return -EINVAL; + + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + c[i].read_bandwidth = dport->coord[i].read_bandwidth; + c[i].write_bandwidth = dport->coord[i].write_bandwidth; + } + + return 0; +} + /* for user tooling to ensure port disable work has completed */ static ssize_t flush_store(const struct bus_type *bus, const char *buf, size_t count) { diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 7bb79f3f318c..e701e4b04032 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1983,6 +1983,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, * then the region is already committed. */ p->state = CXL_CONFIG_COMMIT; + cxl_region_shared_upstream_bandwidth_update(cxlr); return 0; } @@ -2004,6 +2005,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, if (rc) return rc; p->state = CXL_CONFIG_ACTIVE; + cxl_region_shared_upstream_bandwidth_update(cxlr); } cxled->cxld.interleave_ways = p->interleave_ways; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 0fc96f8bf15c..0d8b810a51f0 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -891,6 +891,7 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord); void cxl_region_perf_data_calculate(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled); +void cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr); void cxl_memdev_update_perf(struct cxl_memdev *cxlmd); From 2c70677dabb5e326467160e28915b804b925b53b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 3 Sep 2024 17:11:52 -0700 Subject: [PATCH 402/655] cxl: Add documentation to explain the shared link bandwidth calculation Create a kernel documentation to describe how the CXL shared upstream link bandwidth is calculated. Suggested-by: Dan Williams Reviewed-by: Alison Schofield Acked-by: Dan Williams Link: https://patch.msgid.link/20240904001316.1688225-4-dave.jiang@intel.com Signed-off-by: Dave Jiang --- .../driver-api/cxl/access-coordinates.rst | 91 +++++++++++++++++++ Documentation/driver-api/cxl/index.rst | 1 + 2 files changed, 92 insertions(+) create mode 100644 Documentation/driver-api/cxl/access-coordinates.rst diff --git a/Documentation/driver-api/cxl/access-coordinates.rst b/Documentation/driver-api/cxl/access-coordinates.rst new file mode 100644 index 000000000000..b07950ea30c9 --- /dev/null +++ b/Documentation/driver-api/cxl/access-coordinates.rst @@ -0,0 +1,91 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +================================== +CXL Access Coordinates Computation +================================== + +Shared Upstream Link Calculation +================================ +For certain CXL region construction with endpoints behind CXL switches (SW) or +Root Ports (RP), there is the possibility of the total bandwidth for all +the endpoints behind a switch being more than the switch upstream link. +A similar situation can occur within the host, upstream of the root ports. +The CXL driver performs an additional pass after all the targets have +arrived for a region in order to recalculate the bandwidths with possible +upstream link being a limiting factor in mind. + +The algorithm assumes the configuration is a symmetric topology as that +maximizes performance. When asymmetric topology is detected, the calculation +is aborted. An asymmetric topology is detected during topology walk where the +number of RPs detected as a grandparent is not equal to the number of devices +iterated in the same iteration loop. The assumption is made that subtle +asymmetry in properties does not happen and all paths to EPs are equal. + +There can be multiple switches under an RP. There can be multiple RPs under +a CXL Host Bridge (HB). There can be multiple HBs under a CXL Fixed Memory +Window Structure (CFMWS). + +An example hierarchy: + +> CFMWS 0 +> | +> _________|_________ +> | | +> ACPI0017-0 ACPI0017-1 +> GP0/HB0/ACPI0016-0 GP1/HB1/ACPI0016-1 +> | | | | +> RP0 RP1 RP2 RP3 +> | | | | +> SW 0 SW 1 SW 2 SW 3 +> | | | | | | | | +> EP0 EP1 EP2 EP3 EP4 EP5 EP6 EP7 + +Computation for the example hierarchy: + +Min (GP0 to CPU BW, + Min(SW 0 Upstream Link to RP0 BW, + Min(SW0SSLBIS for SW0DSP0 (EP0), EP0 DSLBIS, EP0 Upstream Link) + + Min(SW0SSLBIS for SW0DSP1 (EP1), EP1 DSLBIS, EP1 Upstream link)) + + Min(SW 1 Upstream Link to RP1 BW, + Min(SW1SSLBIS for SW1DSP0 (EP2), EP2 DSLBIS, EP2 Upstream Link) + + Min(SW1SSLBIS for SW1DSP1 (EP3), EP3 DSLBIS, EP3 Upstream link))) + +Min (GP1 to CPU BW, + Min(SW 2 Upstream Link to RP2 BW, + Min(SW2SSLBIS for SW2DSP0 (EP4), EP4 DSLBIS, EP4 Upstream Link) + + Min(SW2SSLBIS for SW2DSP1 (EP5), EP5 DSLBIS, EP5 Upstream link)) + + Min(SW 3 Upstream Link to RP3 BW, + Min(SW3SSLBIS for SW3DSP0 (EP6), EP6 DSLBIS, EP6 Upstream Link) + + Min(SW3SSLBIS for SW3DSP1 (EP7), EP7 DSLBIS, EP7 Upstream link)))) + +The calculation starts at cxl_region_shared_upstream_perf_update(). A xarray +is created to collect all the endpoint bandwidths via the +cxl_endpoint_gather_bandwidth() function. The min() of bandwidth from the +endpoint CDAT and the upstream link bandwidth is calculated. If the endpoint +has a CXL switch as a parent, then min() of calculated bandwidth and the +bandwidth from the SSLBIS for the switch downstream port that is associated +with the endpoint is calculated. The final bandwidth is stored in a +'struct cxl_perf_ctx' in the xarray indexed by a device pointer. If the +endpoint is direct attached to a root port (RP), the device pointer would be an +RP device. If the endpoint is behind a switch, the device pointer would be the +upstream device of the parent switch. + +At the next stage, the code walks through one or more switches if they exist +in the topology. For endpoints directly attached to RPs, this step is skipped. +If there is another switch upstream, the code takes the min() of the current +gathered bandwidth and the upstream link bandwidth. If there's a switch +upstream, then the SSLBIS of the upstream switch. + +Once the topology walk reaches the RP, whether it's direct attached endpoints +or walking through the switch(es), cxl_rp_gather_bandwidth() is called. At +this point all the bandwidths are aggregated per each host bridge, which is +also the index for the resulting xarray. + +The next step is to take the min() of the per host bridge bandwidth and the +bandwidth from the Generic Port (GP). The bandwidths for the GP is retrieved +via ACPI tables SRAT/HMAT. The min bandwidth are aggregated under the same +ACPI0017 device to form a new xarray. + +Finally, the cxl_region_update_bandwidth() is called and the aggregated +bandwidth from all the members of the last xarray is updated for the +access coordinates residing in the cxl region (cxlr) context. diff --git a/Documentation/driver-api/cxl/index.rst b/Documentation/driver-api/cxl/index.rst index 12b82725d322..965ba90e8fb7 100644 --- a/Documentation/driver-api/cxl/index.rst +++ b/Documentation/driver-api/cxl/index.rst @@ -8,6 +8,7 @@ Compute Express Link :maxdepth: 1 memory-devices + access-coordinates maturity-map From 878716d40cdd4d7923f4e910fe4f6841ae7686f1 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Sun, 22 Sep 2024 09:52:12 +0300 Subject: [PATCH 403/655] net: cirrus: use u8 for addr to calm down sparse ep93xx_eth.c:805:40: sparse: sparse: incorrect type in argument 2 (different address spaces) ep93xx_eth.c:805:40: sparse: expected unsigned char const [usertype] *addr ep93xx_eth.c:805:40: sparse: got void [noderef] __iomem * Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409212354.9CiUem7B-lkp@intel.com/ Fixes: 858555bb5598 ("net: cirrus: add DT support for Cirrus EP93xx") Signed-off-by: Nikita Shubin Acked-by: Alexander Sverdlin Signed-off-by: Arnd Bergmann --- drivers/net/ethernet/cirrus/ep93xx_eth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index 2523d9c9d1b8..c2007cd86416 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -771,6 +771,7 @@ static int ep93xx_eth_probe(struct platform_device *pdev) struct resource *mem; void __iomem *base_addr; struct device_node *np; + u8 addr[ETH_ALEN]; u32 phy_id; int irq; int err; @@ -802,7 +803,8 @@ static int ep93xx_eth_probe(struct platform_device *pdev) goto err_out; } - eth_hw_addr_set(dev, base_addr + 0x50); + memcpy_fromio(addr, base_addr + 0x50, ETH_ALEN); + eth_hw_addr_set(dev, addr); dev->ethtool_ops = &ep93xx_ethtool_ops; dev->netdev_ops = &ep93xx_netdev_ops; dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; From 268225a1de1a021bac4884e7d61fe047345cc9be Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 23 Sep 2024 19:00:21 +0900 Subject: [PATCH 404/655] tomoyo: preparation step for building as a loadable LSM module In order to allow Makefile to generate tomoyo.ko as output, rename tomoyo.c to hooks.h and cut out LSM hook registration part that will be built into vmlinux from hooks.h to init.c . Also, update comments and relocate some variables. No behavior changes. Signed-off-by: Tetsuo Handa --- security/tomoyo/Makefile | 2 +- security/tomoyo/gc.c | 3 + security/tomoyo/{tomoyo.c => hooks.h} | 110 +------------------------- security/tomoyo/init.c | 104 ++++++++++++++++++++++++ security/tomoyo/securityfs_if.c | 6 +- security/tomoyo/util.c | 3 - 6 files changed, 112 insertions(+), 116 deletions(-) rename security/tomoyo/{tomoyo.c => hooks.h} (79%) create mode 100644 security/tomoyo/init.c diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile index 55c67b9846a9..e573a5cad732 100644 --- a/security/tomoyo/Makefile +++ b/security/tomoyo/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y = audit.o common.o condition.o domain.o environ.o file.o gc.o group.o load_policy.o memory.o mount.o network.o realpath.o securityfs_if.o tomoyo.o util.o +obj-y = audit.o common.o condition.o domain.o environ.o file.o gc.o group.o init.o load_policy.o memory.o mount.o network.o realpath.o securityfs_if.o util.o targets += builtin-policy.h diff --git a/security/tomoyo/gc.c b/security/tomoyo/gc.c index 026e29ea3796..6eccca150839 100644 --- a/security/tomoyo/gc.c +++ b/security/tomoyo/gc.c @@ -9,6 +9,9 @@ #include #include +/* Lock for GC. */ +DEFINE_SRCU(tomoyo_ss); + /** * tomoyo_memory_free - Free memory for elements. * diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/hooks.h similarity index 79% rename from security/tomoyo/tomoyo.c rename to security/tomoyo/hooks.h index 04a92c3d65d4..58929bb71477 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/hooks.h @@ -1,12 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* - * security/tomoyo/tomoyo.c + * security/tomoyo/hooks.h * * Copyright (C) 2005-2011 NTT DATA CORPORATION */ -#include -#include #include "common.h" /** @@ -18,10 +16,6 @@ struct tomoyo_domain_info *tomoyo_domain(void) { struct tomoyo_task *s = tomoyo_task(current); - if (s->old_domain_info && !current->in_execve) { - atomic_dec(&s->old_domain_info->users); - s->old_domain_info = NULL; - } return s->domain_info; } @@ -62,26 +56,6 @@ static void tomoyo_bprm_committed_creds(const struct linux_binprm *bprm) s->old_domain_info = NULL; } -#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER -/** - * tomoyo_bprm_creds_for_exec - Target for security_bprm_creds_for_exec(). - * - * @bprm: Pointer to "struct linux_binprm". - * - * Returns 0. - */ -static int tomoyo_bprm_creds_for_exec(struct linux_binprm *bprm) -{ - /* - * Load policy if /sbin/tomoyo-init exists and /sbin/init is requested - * for the first time. - */ - if (!tomoyo_policy_loaded) - tomoyo_load_policy(bprm->filename); - return 0; -} -#endif - /** * tomoyo_bprm_check_security - Target for security_bprm_check(). * @@ -501,10 +475,6 @@ static int tomoyo_socket_sendmsg(struct socket *sock, struct msghdr *msg, return tomoyo_socket_sendmsg_permission(sock, msg, size); } -struct lsm_blob_sizes tomoyo_blob_sizes __ro_after_init = { - .lbs_task = sizeof(struct tomoyo_task), -}; - /** * tomoyo_task_alloc - Target for security_task_alloc(). * @@ -543,81 +513,3 @@ static void tomoyo_task_free(struct task_struct *task) s->old_domain_info = NULL; } } - -static const struct lsm_id tomoyo_lsmid = { - .name = "tomoyo", - .id = LSM_ID_TOMOYO, -}; - -/* - * tomoyo_security_ops is a "struct security_operations" which is used for - * registering TOMOYO. - */ -static struct security_hook_list tomoyo_hooks[] __ro_after_init = { - LSM_HOOK_INIT(cred_prepare, tomoyo_cred_prepare), - LSM_HOOK_INIT(bprm_committed_creds, tomoyo_bprm_committed_creds), - LSM_HOOK_INIT(task_alloc, tomoyo_task_alloc), - LSM_HOOK_INIT(task_free, tomoyo_task_free), -#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER - LSM_HOOK_INIT(bprm_creds_for_exec, tomoyo_bprm_creds_for_exec), -#endif - LSM_HOOK_INIT(bprm_check_security, tomoyo_bprm_check_security), - LSM_HOOK_INIT(file_fcntl, tomoyo_file_fcntl), - LSM_HOOK_INIT(file_open, tomoyo_file_open), - LSM_HOOK_INIT(file_truncate, tomoyo_file_truncate), - LSM_HOOK_INIT(path_truncate, tomoyo_path_truncate), - LSM_HOOK_INIT(path_unlink, tomoyo_path_unlink), - LSM_HOOK_INIT(path_mkdir, tomoyo_path_mkdir), - LSM_HOOK_INIT(path_rmdir, tomoyo_path_rmdir), - LSM_HOOK_INIT(path_symlink, tomoyo_path_symlink), - LSM_HOOK_INIT(path_mknod, tomoyo_path_mknod), - LSM_HOOK_INIT(path_link, tomoyo_path_link), - LSM_HOOK_INIT(path_rename, tomoyo_path_rename), - LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr), - LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl), - LSM_HOOK_INIT(file_ioctl_compat, tomoyo_file_ioctl), - LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod), - LSM_HOOK_INIT(path_chown, tomoyo_path_chown), - LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot), - LSM_HOOK_INIT(sb_mount, tomoyo_sb_mount), - LSM_HOOK_INIT(sb_umount, tomoyo_sb_umount), - LSM_HOOK_INIT(sb_pivotroot, tomoyo_sb_pivotroot), - LSM_HOOK_INIT(socket_bind, tomoyo_socket_bind), - LSM_HOOK_INIT(socket_connect, tomoyo_socket_connect), - LSM_HOOK_INIT(socket_listen, tomoyo_socket_listen), - LSM_HOOK_INIT(socket_sendmsg, tomoyo_socket_sendmsg), -}; - -/* Lock for GC. */ -DEFINE_SRCU(tomoyo_ss); - -int tomoyo_enabled __ro_after_init = 1; - -/** - * tomoyo_init - Register TOMOYO Linux as a LSM module. - * - * Returns 0. - */ -static int __init tomoyo_init(void) -{ - struct tomoyo_task *s = tomoyo_task(current); - - /* register ourselves with the security framework */ - security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), - &tomoyo_lsmid); - pr_info("TOMOYO Linux initialized\n"); - s->domain_info = &tomoyo_kernel_domain; - atomic_inc(&tomoyo_kernel_domain.users); - s->old_domain_info = NULL; - tomoyo_mm_init(); - - return 0; -} - -DEFINE_LSM(tomoyo) = { - .name = "tomoyo", - .enabled = &tomoyo_enabled, - .flags = LSM_FLAG_LEGACY_MAJOR, - .blobs = &tomoyo_blob_sizes, - .init = tomoyo_init, -}; diff --git a/security/tomoyo/init.c b/security/tomoyo/init.c new file mode 100644 index 000000000000..2b38dbc7a559 --- /dev/null +++ b/security/tomoyo/init.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * security/tomoyo/init.c + * + * Copyright (C) 2005-2011 NTT DATA CORPORATION + */ + +#include +#include +#include "common.h" + +#include "hooks.h" + +#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER +static int tomoyo_bprm_creds_for_exec(struct linux_binprm *bprm) +{ + /* + * Load policy if /sbin/tomoyo-init exists and /sbin/init is requested + * for the first time. + */ + if (!tomoyo_policy_loaded) + tomoyo_load_policy(bprm->filename); + return 0; +} +#endif + +struct lsm_blob_sizes tomoyo_blob_sizes __ro_after_init = { + .lbs_task = sizeof(struct tomoyo_task), +}; + +static const struct lsm_id tomoyo_lsmid = { + .name = "tomoyo", + .id = LSM_ID_TOMOYO, +}; + +/* tomoyo_hooks is used for registering TOMOYO. */ +static struct security_hook_list tomoyo_hooks[] __ro_after_init = { + LSM_HOOK_INIT(cred_prepare, tomoyo_cred_prepare), + LSM_HOOK_INIT(bprm_committed_creds, tomoyo_bprm_committed_creds), + LSM_HOOK_INIT(task_alloc, tomoyo_task_alloc), + LSM_HOOK_INIT(task_free, tomoyo_task_free), +#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER + LSM_HOOK_INIT(bprm_creds_for_exec, tomoyo_bprm_creds_for_exec), +#endif + LSM_HOOK_INIT(bprm_check_security, tomoyo_bprm_check_security), + LSM_HOOK_INIT(file_fcntl, tomoyo_file_fcntl), + LSM_HOOK_INIT(file_open, tomoyo_file_open), + LSM_HOOK_INIT(file_truncate, tomoyo_file_truncate), + LSM_HOOK_INIT(path_truncate, tomoyo_path_truncate), + LSM_HOOK_INIT(path_unlink, tomoyo_path_unlink), + LSM_HOOK_INIT(path_mkdir, tomoyo_path_mkdir), + LSM_HOOK_INIT(path_rmdir, tomoyo_path_rmdir), + LSM_HOOK_INIT(path_symlink, tomoyo_path_symlink), + LSM_HOOK_INIT(path_mknod, tomoyo_path_mknod), + LSM_HOOK_INIT(path_link, tomoyo_path_link), + LSM_HOOK_INIT(path_rename, tomoyo_path_rename), + LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr), + LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl), + LSM_HOOK_INIT(file_ioctl_compat, tomoyo_file_ioctl), + LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod), + LSM_HOOK_INIT(path_chown, tomoyo_path_chown), + LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot), + LSM_HOOK_INIT(sb_mount, tomoyo_sb_mount), + LSM_HOOK_INIT(sb_umount, tomoyo_sb_umount), + LSM_HOOK_INIT(sb_pivotroot, tomoyo_sb_pivotroot), + LSM_HOOK_INIT(socket_bind, tomoyo_socket_bind), + LSM_HOOK_INIT(socket_connect, tomoyo_socket_connect), + LSM_HOOK_INIT(socket_listen, tomoyo_socket_listen), + LSM_HOOK_INIT(socket_sendmsg, tomoyo_socket_sendmsg), +}; + +int tomoyo_enabled __ro_after_init = 1; + +/* Has /sbin/init started? */ +bool tomoyo_policy_loaded; + +/** + * tomoyo_init - Register TOMOYO Linux as a LSM module. + * + * Returns 0. + */ +static int __init tomoyo_init(void) +{ + struct tomoyo_task *s = tomoyo_task(current); + + /* register ourselves with the security framework */ + security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), + &tomoyo_lsmid); + pr_info("TOMOYO Linux initialized\n"); + s->domain_info = &tomoyo_kernel_domain; + atomic_inc(&tomoyo_kernel_domain.users); + s->old_domain_info = NULL; + tomoyo_mm_init(); + + return 0; +} + +DEFINE_LSM(tomoyo) = { + .name = "tomoyo", + .enabled = &tomoyo_enabled, + .flags = LSM_FLAG_LEGACY_MAJOR, + .blobs = &tomoyo_blob_sizes, + .init = tomoyo_init, +}; diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c index a2705798476f..7e69747b2f77 100644 --- a/security/tomoyo/securityfs_if.c +++ b/security/tomoyo/securityfs_if.c @@ -229,11 +229,11 @@ static void __init tomoyo_create_entry(const char *name, const umode_t mode, } /** - * tomoyo_initerface_init - Initialize /sys/kernel/security/tomoyo/ interface. + * tomoyo_interface_init - Initialize /sys/kernel/security/tomoyo/ interface. * * Returns 0. */ -static int __init tomoyo_initerface_init(void) +static int __init tomoyo_interface_init(void) { struct tomoyo_domain_info *domain; struct dentry *tomoyo_dir; @@ -270,4 +270,4 @@ static int __init tomoyo_initerface_init(void) return 0; } -fs_initcall(tomoyo_initerface_init); +fs_initcall(tomoyo_interface_init); diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index 6799b1122c9d..b851ff377382 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -13,9 +13,6 @@ /* Lock for protecting policy. */ DEFINE_MUTEX(tomoyo_policy_lock); -/* Has /sbin/init started? */ -bool tomoyo_policy_loaded; - /* * Mapping table from "enum tomoyo_mac_index" to * "enum tomoyo_mac_category_index". From 66cac80698cd1e31ae9bc5c271e83209903d4861 Mon Sep 17 00:00:00 2001 From: Matthew Sakai Date: Tue, 17 Sep 2024 23:24:37 -0400 Subject: [PATCH 405/655] dm vdo: handle unaligned discards correctly Reset the data_vio properly for each discard block, and delay acknowledgement and cleanup until all discard blocks are complete. Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/data-vio.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-vdo/data-vio.c b/drivers/md/dm-vdo/data-vio.c index ab3ea8337809..0d502f6a86ad 100644 --- a/drivers/md/dm-vdo/data-vio.c +++ b/drivers/md/dm-vdo/data-vio.c @@ -501,6 +501,7 @@ static void launch_data_vio(struct data_vio *data_vio, logical_block_number_t lb memset(&data_vio->record_name, 0, sizeof(data_vio->record_name)); memset(&data_vio->duplicate, 0, sizeof(data_vio->duplicate)); + vdo_reset_completion(&data_vio->decrement_completion); vdo_reset_completion(completion); completion->error_handler = handle_data_vio_error; set_data_vio_logical_callback(data_vio, attempt_logical_block_lock); @@ -1273,12 +1274,14 @@ static void clean_hash_lock(struct vdo_completion *completion) static void finish_cleanup(struct data_vio *data_vio) { struct vdo_completion *completion = &data_vio->vio.completion; + u32 discard_size = min_t(u32, data_vio->remaining_discard, + VDO_BLOCK_SIZE - data_vio->offset); VDO_ASSERT_LOG_ONLY(data_vio->allocation.lock == NULL, "complete data_vio has no allocation lock"); VDO_ASSERT_LOG_ONLY(data_vio->hash_lock == NULL, "complete data_vio has no hash lock"); - if ((data_vio->remaining_discard <= VDO_BLOCK_SIZE) || + if ((data_vio->remaining_discard <= discard_size) || (completion->result != VDO_SUCCESS)) { struct data_vio_pool *pool = completion->vdo->data_vio_pool; @@ -1287,12 +1290,12 @@ static void finish_cleanup(struct data_vio *data_vio) return; } - data_vio->remaining_discard -= min_t(u32, data_vio->remaining_discard, - VDO_BLOCK_SIZE - data_vio->offset); + data_vio->remaining_discard -= discard_size; data_vio->is_partial = (data_vio->remaining_discard < VDO_BLOCK_SIZE); data_vio->read = data_vio->is_partial; data_vio->offset = 0; completion->requeue = true; + data_vio->first_reference_operation_complete = false; launch_data_vio(data_vio, data_vio->logical.lbn + 1); } @@ -1965,7 +1968,8 @@ static void allocate_block(struct vdo_completion *completion) .state = VDO_MAPPING_STATE_UNCOMPRESSED, }; - if (data_vio->fua) { + if (data_vio->fua || + data_vio->remaining_discard > (u32) (VDO_BLOCK_SIZE - data_vio->offset)) { prepare_for_dedupe(data_vio); return; } @@ -2042,7 +2046,6 @@ void continue_data_vio_with_block_map_slot(struct vdo_completion *completion) return; } - /* * We don't need to write any data, so skip allocation and just update the block map and * reference counts (via the journal). @@ -2051,7 +2054,7 @@ void continue_data_vio_with_block_map_slot(struct vdo_completion *completion) if (data_vio->is_zero) data_vio->new_mapped.state = VDO_MAPPING_STATE_UNCOMPRESSED; - if (data_vio->remaining_discard > VDO_BLOCK_SIZE) { + if (data_vio->remaining_discard > (u32) (VDO_BLOCK_SIZE - data_vio->offset)) { /* This is not the final block of a discard so we can't acknowledge it yet. */ update_metadata_for_data_vio_write(data_vio, NULL); return; From 8e391ae0607fc820e9ece87d1415e6a1ff274e69 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 19 Sep 2024 14:40:04 +0200 Subject: [PATCH 406/655] s390/vdso: Get rid of permutation constants The three byte masks for VECTOR PERMUTE are not needed, since the instruction VECTOR SHIFT LEFT DOUBLE BY BYTE can be used to implement the required "rotate left" easily. Signed-off-by: Heiko Carstens Reviewed-by: Jens Remus Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso64/vgetrandom-chacha.S | 25 +++++++++------------ 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S index d802b0a96f41..475711e0c6a3 100644 --- a/arch/s390/kernel/vdso64/vgetrandom-chacha.S +++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #include #include #include @@ -12,9 +13,6 @@ #define COPY1 %v5 #define COPY2 %v6 #define COPY3 %v7 -#define PERM4 %v16 -#define PERM8 %v17 -#define PERM12 %v18 #define BEPERM %v19 #define TMP0 %v20 #define TMP1 %v21 @@ -23,12 +21,9 @@ .section .rodata - .balign 128 + .balign 32 .Lconstants: .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral - .long 0x04050607,0x08090a0b,0x0c0d0e0f,0x00010203 # rotl 4 bytes - .long 0x08090a0b,0x0c0d0e0f,0x00010203,0x04050607 # rotl 8 bytes - .long 0x0c0d0e0f,0x00010203,0x04050607,0x08090a0b # rotl 12 bytes .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap .text @@ -48,8 +43,8 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) /* COPY0 = "expand 32-byte k" */ VL COPY0,0,,%r1 - /* PERM4-PERM12,BEPERM = byte selectors for VPERM */ - VLM PERM4,BEPERM,16,%r1 + /* BEPERM = byte selectors for VPERM */ + ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148) /* COPY1,COPY2 = key */ VLM COPY1,COPY2,0,%r3 @@ -89,11 +84,11 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) VERLLF STATE1,STATE1,7 /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */ - VPERM STATE1,STATE1,STATE1,PERM4 + VSLDB STATE1,STATE1,STATE1,4 /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ - VPERM STATE2,STATE2,STATE2,PERM8 + VSLDB STATE2,STATE2,STATE2,8 /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */ - VPERM STATE3,STATE3,STATE3,PERM12 + VSLDB STATE3,STATE3,STATE3,12 /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */ VAF STATE0,STATE0,STATE1 @@ -116,11 +111,11 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) VERLLF STATE1,STATE1,7 /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */ - VPERM STATE1,STATE1,STATE1,PERM12 + VSLDB STATE1,STATE1,STATE1,12 /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ - VPERM STATE2,STATE2,STATE2,PERM8 + VSLDB STATE2,STATE2,STATE2,8 /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */ - VPERM STATE3,STATE3,STATE3,PERM4 + VSLDB STATE3,STATE3,STATE3,4 brctg %r0,.Ldoubleround /* OUTPUT0 = STATE0 + STATE0 */ From ff35a3f0ca5c88145c6da6630f3420071dfa296c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 19 Sep 2024 14:40:05 +0200 Subject: [PATCH 407/655] s390/vdso: Fix comment within __arch_chacha20_blocks_nostack() Fix comment within __arch_chacha20_blocks_nostack() so the comment reflects what the code is doing. Signed-off-by: Heiko Carstens Reviewed-by: Jens Remus Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso64/vgetrandom-chacha.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S index 475711e0c6a3..3f7e30886d1b 100644 --- a/arch/s390/kernel/vdso64/vgetrandom-chacha.S +++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S @@ -118,13 +118,13 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) VSLDB STATE3,STATE3,STATE3,4 brctg %r0,.Ldoubleround - /* OUTPUT0 = STATE0 + STATE0 */ + /* OUTPUT0 = STATE0 + COPY0 */ VAF STATE0,STATE0,COPY0 - /* OUTPUT1 = STATE1 + STATE1 */ + /* OUTPUT1 = STATE1 + COPY1 */ VAF STATE1,STATE1,COPY1 - /* OUTPUT2 = STATE2 + STATE2 */ + /* OUTPUT2 = STATE2 + COPY2 */ VAF STATE2,STATE2,COPY2 - /* OUTPUT2 = STATE3 + STATE3 */ + /* OUTPUT3 = STATE3 + COPY3 */ VAF STATE3,STATE3,COPY3 /* From 5cccfc8be6d256e91d155313edef20c1a89064b2 Mon Sep 17 00:00:00 2001 From: Jens Remus Date: Thu, 19 Sep 2024 14:40:06 +0200 Subject: [PATCH 408/655] s390/vdso: Add CFI annotations to __arch_chacha20_blocks_nostack() This allows proper unwinding, for instance when using a debugger such as GDB. Signed-off-by: Jens Remus Signed-off-by: Heiko Carstens Acked-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso64/vgetrandom-chacha.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S index 3f7e30886d1b..894954bf3b41 100644 --- a/arch/s390/kernel/vdso64/vgetrandom-chacha.S +++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S @@ -3,6 +3,7 @@ #include #include #include +#include #include #define STATE0 %v0 @@ -38,6 +39,7 @@ * size_t nblocks) */ SYM_FUNC_START(__arch_chacha20_blocks_nostack) + CFI_STARTPROC larl %r1,.Lconstants /* COPY0 = "expand 32-byte k" */ @@ -177,4 +179,5 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) VPERM TMP3,STATE3,STATE3,BEPERM VSTM TMP0,TMP3,0,%r2 j .Lstoredone + CFI_ENDPROC SYM_FUNC_END(__arch_chacha20_blocks_nostack) From d361390d9f2a332b52458cc69f1fe8e76d6c2943 Mon Sep 17 00:00:00 2001 From: Jens Remus Date: Thu, 19 Sep 2024 14:40:07 +0200 Subject: [PATCH 409/655] s390/vdso: Use macros for annotation of asm functions Use the macros SYM_FUNC_START() and SYM_FUNC_END() to annotate the functions in assembly. Signed-off-by: Jens Remus Signed-off-by: Heiko Carstens Acked-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso64/vdso_user_wrapper.S | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S index e26e68675c08..aa06c85bcbd3 100644 --- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S @@ -13,10 +13,7 @@ * for details. */ .macro vdso_func func - .globl __kernel_\func - .type __kernel_\func,@function - __ALIGN -__kernel_\func: +SYM_FUNC_START(__kernel_\func) CFI_STARTPROC aghi %r15,-STACK_FRAME_VDSO_OVERHEAD CFI_DEF_CFA_OFFSET (STACK_FRAME_USER_OVERHEAD + STACK_FRAME_VDSO_OVERHEAD) @@ -32,7 +29,7 @@ __kernel_\func: CFI_RESTORE 15 br %r14 CFI_ENDPROC - .size __kernel_\func,.-__kernel_\func +SYM_FUNC_END(__kernel_\func) .endm vdso_func gettimeofday @@ -41,16 +38,13 @@ vdso_func clock_gettime vdso_func getcpu .macro vdso_syscall func,syscall - .globl __kernel_\func - .type __kernel_\func,@function - __ALIGN -__kernel_\func: +SYM_FUNC_START(__kernel_\func) CFI_STARTPROC svc \syscall /* Make sure we notice when a syscall returns, which shouldn't happen */ .word 0 CFI_ENDPROC - .size __kernel_\func,.-__kernel_\func +SYM_FUNC_END(__kernel_\func) .endm vdso_syscall restart_syscall,__NR_restart_syscall From e08ec26928554c36e34e089f663dc9114d77b68c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 19 Sep 2024 14:40:08 +0200 Subject: [PATCH 410/655] tools: Add additional SYM_*() stubs to linkage.h Similar to commit f8d92fc527ff ("selftests: vDSO: fix include order in build of test_vdso_chacha") add SYM_DATA_START, SYM_DATA_START_LOCAL, and SYM_DATA_END stubs to tools/include/linux/linkage.h so that the proper macros can be used within the kernel's vdso code as well as in the vdso_test_chacha selftest. Signed-off-by: Heiko Carstens Reviewed-by: Jens Remus Signed-off-by: Vasily Gorbik --- tools/include/linux/linkage.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h index a48ff086899c..a89620c550ed 100644 --- a/tools/include/linux/linkage.h +++ b/tools/include/linux/linkage.h @@ -2,7 +2,9 @@ #define _TOOLS_INCLUDE_LINUX_LINKAGE_H #define SYM_FUNC_START(x) .globl x; x: - #define SYM_FUNC_END(x) +#define SYM_DATA_START(x) .globl x; x: +#define SYM_DATA_START_LOCAL(x) x: +#define SYM_DATA_END(x) #endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */ From c902b578eebfe0739e8ec491b60f2f37dfeb09c0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 19 Sep 2024 14:40:09 +0200 Subject: [PATCH 411/655] s390/vdso: Use SYM_DATA_START_LOCAL()/SYM_DATA_END() for data objects Use SYM_DATA_START_LOCAL()/SYM_DATA_END() in vgetrandom-chacha.S so that the constants end up in an object with correct size: readelf -Ws vgetrandom-chacha.o Num: Value Size Type Bind Vis Ndx Name ... 5: 0000000000000000 32 OBJECT LOCAL DEFAULT 5 chacha20_constants Signed-off-by: Heiko Carstens Reviewed-by: Jens Remus Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso64/vgetrandom-chacha.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S index 894954bf3b41..4c52ba78e060 100644 --- a/arch/s390/kernel/vdso64/vgetrandom-chacha.S +++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S @@ -23,9 +23,10 @@ .section .rodata .balign 32 -.Lconstants: +SYM_DATA_START_LOCAL(chacha20_constants) .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap +SYM_DATA_END(chacha20_constants) .text /* @@ -40,7 +41,7 @@ */ SYM_FUNC_START(__arch_chacha20_blocks_nostack) CFI_STARTPROC - larl %r1,.Lconstants + larl %r1,chacha20_constants /* COPY0 = "expand 32-byte k" */ VL COPY0,0,,%r1 From d714abee5fb64c4817dce477bd7f2bd1bb4fe814 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 19 Sep 2024 14:40:10 +0200 Subject: [PATCH 412/655] s390/vdso: Use one large alternative instead of an alternative branch Replace the alternative branch with a larger alternative that contains both paths. That way the two paths are closer together and it is easier to change both paths if the need should arise. Signed-off-by: Heiko Carstens Reviewed-by: Jens Remus Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso64/vgetrandom-chacha.S | 35 ++++++++++----------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S index 4c52ba78e060..09c034c2f853 100644 --- a/arch/s390/kernel/vdso64/vgetrandom-chacha.S +++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S @@ -130,16 +130,22 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) /* OUTPUT3 = STATE3 + COPY3 */ VAF STATE3,STATE3,COPY3 - /* - * 32 bit wise little endian store to OUTPUT. If the vector - * enhancement facility 2 is not installed use the slow path. - */ - ALTERNATIVE "brc 0xf,.Lstoreslow", "nop", ALT_FACILITY(148) - VSTBRF STATE0,0,,%r2 - VSTBRF STATE1,16,,%r2 - VSTBRF STATE2,32,,%r2 - VSTBRF STATE3,48,,%r2 -.Lstoredone: + ALTERNATIVE \ + __stringify( \ + /* Convert STATE to little endian and store to OUTPUT */\ + VPERM TMP0,STATE0,STATE0,BEPERM; \ + VPERM TMP1,STATE1,STATE1,BEPERM; \ + VPERM TMP2,STATE2,STATE2,BEPERM; \ + VPERM TMP3,STATE3,STATE3,BEPERM; \ + VSTM TMP0,TMP3,0,%r2), \ + __stringify( \ + /* 32 bit wise little endian store to OUTPUT */ \ + VSTBRF STATE0,0,,%r2; \ + VSTBRF STATE1,16,,%r2; \ + VSTBRF STATE2,32,,%r2; \ + VSTBRF STATE3,48,,%r2; \ + brcl 0,0), \ + ALT_FACILITY(148) /* ++COPY3.COUNTER */ /* alsih %r3,1 */ @@ -171,14 +177,5 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) VZERO TMP3 br %r14 - -.Lstoreslow: - /* Convert STATE to little endian format and store to OUTPUT */ - VPERM TMP0,STATE0,STATE0,BEPERM - VPERM TMP1,STATE1,STATE1,BEPERM - VPERM TMP2,STATE2,STATE2,BEPERM - VPERM TMP3,STATE3,STATE3,BEPERM - VSTM TMP0,TMP3,0,%r2 - j .Lstoredone CFI_ENDPROC SYM_FUNC_END(__arch_chacha20_blocks_nostack) From 2d8721364ce83956d0a184a64052928589ef15df Mon Sep 17 00:00:00 2001 From: "Jason J. Herne" Date: Mon, 16 Sep 2024 08:01:23 -0400 Subject: [PATCH 413/655] s390/vfio-ap: Driver feature advertisement Advertise features of the driver for the benefit of automated tooling like Libvirt and mdevctl. Signed-off-by: Jason J. Herne Reviewed-by: Anthony Krowiak Reviewed-by: Boris Fiuczynski Link: https://lore.kernel.org/r/20240916120123.11484-1-jjherne@linux.ibm.com Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- Documentation/arch/s390/vfio-ap.rst | 30 +++++++++++++++++++++++++++++ drivers/s390/crypto/vfio_ap_drv.c | 13 +++++++++++++ 2 files changed, 43 insertions(+) diff --git a/Documentation/arch/s390/vfio-ap.rst b/Documentation/arch/s390/vfio-ap.rst index ea744cbc8687..eba1991fbdba 100644 --- a/Documentation/arch/s390/vfio-ap.rst +++ b/Documentation/arch/s390/vfio-ap.rst @@ -999,6 +999,36 @@ the vfio_ap mediated device to which it is assigned as long as each new APQN resulting from plugging it in references a queue device bound to the vfio_ap device driver. +Driver Features +=============== +The vfio_ap driver exposes a sysfs file containing supported features. +This exists so third party tools (like Libvirt and mdevctl) can query the +availability of specific features. + +The features list can be found here: /sys/bus/matrix/devices/matrix/features + +Entries are space delimited. Each entry consists of a combination of +alphanumeric and underscore characters. + +Example: +cat /sys/bus/matrix/devices/matrix/features +guest_matrix dyn ap_config + +the following features are advertised: + +---------------+---------------------------------------------------------------+ +| Flag | Description | ++==============+===============================================================+ +| guest_matrix | guest_matrix attribute exists. It reports the matrix of | +| | adapters and domains that are or will be passed through to a | +| | guest when the mdev is attached to it. | ++--------------+---------------------------------------------------------------+ +| dyn | Indicates hot plug/unplug of AP adapters, domains and control | +| | domains for a guest to which the mdev is attached. | ++------------+-----------------------------------------------------------------+ +| ap_config | ap_config interface for one-shot modifications to mdev config | ++--------------+---------------------------------------------------------------+ + Limitations =========== Live guest migration is not supported for guests using AP devices without diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index 4aeb3e1213c7..67a807e2e75b 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -26,6 +26,18 @@ MODULE_LICENSE("GPL v2"); struct ap_matrix_dev *matrix_dev; debug_info_t *vfio_ap_dbf_info; +static ssize_t features_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "guest_matrix hotplug ap_config\n"); +} +static DEVICE_ATTR_RO(features); + +static struct attribute *matrix_dev_attrs[] = { + &dev_attr_features.attr, + NULL, +}; +ATTRIBUTE_GROUPS(matrix_dev); + /* Only type 10 adapters (CEX4 and later) are supported * by the AP matrix device driver */ @@ -68,6 +80,7 @@ static struct device_driver matrix_driver = { .name = "vfio_ap", .bus = &matrix_bus, .suppress_bind_attrs = true, + .dev_groups = matrix_dev_groups, }; static int vfio_ap_matrix_dev_create(void) From 6d12d7ace99ec74cb1f479bb851b5ed65b3bc105 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Sep 2024 02:10:30 -0400 Subject: [PATCH 414/655] bcachefs: Ensure BCH_FS_accounting_replay_done is always set if it doesn't get set we'll never be able to flush the btree write buffer; this only happens in fake rw mode, but prevents us from shutting down. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index be1e7ca4362f..d1699aecc9fb 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -862,6 +862,9 @@ int bch2_fs_recovery(struct bch_fs *c) clear_bit(BCH_FS_fsck_running, &c->flags); + /* in case we don't run journal replay, i.e. norecovery mode */ + set_bit(BCH_FS_accounting_replay_done, &c->flags); + /* fsync if we fixed errors */ if (test_bit(BCH_FS_errors_fixed, &c->flags) && bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) { From 7eb4a319db65566005989121563ead344ca79140 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 16:39:49 -0400 Subject: [PATCH 415/655] bcachefs: Fix infinite loop in propagate_key_to_snapshot_leaves() As we iterate we need to mark that we no longer need iterators - otherwise we'll infinite loop via the "too many iters" check when there's many snapshots. Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index dff83ebbd912..1809442b00ee 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1784,6 +1784,7 @@ static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans, new->k.p.snapshot = leaf_id; ret = bch2_trans_update(trans, &iter, new, 0); out: + bch2_set_btree_iter_dontneed(&iter); bch2_trans_iter_exit(trans, &iter); return ret; } From e8dd556c74325f29597665c2c557a8ea699a0686 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 24 Sep 2024 15:31:51 +0800 Subject: [PATCH 416/655] LoongArch: Enable generic CPU vulnerabilites support Currently, many architectures support generic CPU vulnerabilites, such as x86, arm64 and riscv: commit 61dc0f555b5c ("x86/cpu: Implement CPU vulnerabilites sysfs functions") commit 61ae1321f06c ("arm64: enable generic CPU vulnerabilites support") commit 0e3f3649d44b ("riscv: Enable generic CPU vulnerabilites support") All LoongArch CPUs (since Loongson-3A5000) implement a special mechanism in the processor core to prevent "Meltdown" and "Spectre" attacks, so it can enable generic CPU vulnerabilites support for LoongArch too. Without this patch, there are no user interfaces of vulnerabilities to check on LoongArch. The output of those files reflects the state of the CPUs in the system, the output value "Not affected" means "CPU is not affected by the vulnerability". Before: # cat /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow cat: /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow: No such file or directory # cat /sys/devices/system/cpu/vulnerabilities/spec_store_bypass cat: /sys/devices/system/cpu/vulnerabilities/spec_store_bypass: No such file or directory # cat /sys/devices/system/cpu/vulnerabilities/meltdown cat: /sys/devices/system/cpu/vulnerabilities/meltdown: No such file or directory After: # cat /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow Not affected # cat /sys/devices/system/cpu/vulnerabilities/spec_store_bypass Not affected # cat /sys/devices/system/cpu/vulnerabilities/meltdown Not affected Link: https://www.loongson.cn/EN/news/show?id=633 Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 34531dc8a61f..b9d3c98efe18 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -82,6 +82,7 @@ config LOONGARCH select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_DEVICES + select GENERIC_CPU_VULNERABILITIES select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY select GENERIC_IOREMAP if !ARCH_IOREMAP From d0bb0b600081bc7c246b97b2901abbb6c357ff7b Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 24 Sep 2024 15:32:06 +0800 Subject: [PATCH 417/655] LoongArch: Enable ACPI BGRT handling Add ACPI BGRT support on LoongArch so it can display image provied by acpi table at boot stage and switch to graphical UI smoothly. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/acpi.c | 4 ++++ drivers/acpi/Kconfig | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 929a497c987e..f1a74b80f22c 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -212,6 +213,9 @@ void __init acpi_boot_table_init(void) /* Do not enable ACPI SPCR console by default */ acpi_parse_spcr(earlycon_acpi_spcr_enable, false); + if (IS_ENABLED(CONFIG_ACPI_BGRT)) + acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); + return; fdt_earlycon: diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index e3a7c2aedd5f..d67f63d93b2a 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -451,7 +451,7 @@ config ACPI_HED config ACPI_BGRT bool "Boottime Graphics Resource Table support" - depends on EFI && (X86 || ARM64) + depends on EFI && (X86 || ARM64 || LOONGARCH) help This driver adds support for exposing the ACPI Boottime Graphics Resource Table, which allows the operating system to obtain From 34e3c4500cdc06094b37a41b622598098308ba8f Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 24 Sep 2024 15:32:20 +0800 Subject: [PATCH 418/655] LoongArch: Rework CPU feature probe from CPUCFG/IOCSR Probe ISA level, TLB, IOCSR information from CPUCFG to improve kernel resilience to different core implementations. BTW, IOCSR register definition appears to be a platform-specific spec instead of an architecture spec, even for the Loongson CPUs there is no guarantee that IOCSR will always present. Thus it's dangerous to perform IOCSR probing without checking CPU type and instruction availability. Signed-off-by: Jiaxun Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cpu-features.h | 2 + arch/loongarch/include/asm/cpu.h | 30 ++--- arch/loongarch/include/asm/loongarch.h | 1 + arch/loongarch/include/uapi/asm/hwcap.h | 1 + arch/loongarch/kernel/cpu-probe.c | 134 +++++++++++++--------- arch/loongarch/kernel/proc.c | 10 +- 6 files changed, 107 insertions(+), 71 deletions(-) diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index 16a716f88a5c..fc83bb32f9f0 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -51,6 +51,7 @@ #define cpu_has_lbt_mips cpu_opt(LOONGARCH_CPU_LBT_MIPS) #define cpu_has_lbt (cpu_has_lbt_x86|cpu_has_lbt_arm|cpu_has_lbt_mips) #define cpu_has_csr cpu_opt(LOONGARCH_CPU_CSR) +#define cpu_has_iocsr cpu_opt(LOONGARCH_CPU_IOCSR) #define cpu_has_tlb cpu_opt(LOONGARCH_CPU_TLB) #define cpu_has_watch cpu_opt(LOONGARCH_CPU_WATCH) #define cpu_has_vint cpu_opt(LOONGARCH_CPU_VINT) @@ -65,6 +66,7 @@ #define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID) #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) #define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) +#define cpu_has_lspw cpu_opt(LOONGARCH_CPU_LSPW) #define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT) #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 843f9c4ec980..98cf4d7b4b0a 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -87,19 +87,21 @@ enum cpu_type_enum { #define CPU_FEATURE_LBT_MIPS 12 /* CPU has MIPS Binary Translation */ #define CPU_FEATURE_TLB 13 /* CPU has TLB */ #define CPU_FEATURE_CSR 14 /* CPU has CSR */ -#define CPU_FEATURE_WATCH 15 /* CPU has watchpoint registers */ -#define CPU_FEATURE_VINT 16 /* CPU has vectored interrupts */ -#define CPU_FEATURE_CSRIPI 17 /* CPU has CSR-IPI */ -#define CPU_FEATURE_EXTIOI 18 /* CPU has EXT-IOI */ -#define CPU_FEATURE_PREFETCH 19 /* CPU has prefetch instructions */ -#define CPU_FEATURE_PMP 20 /* CPU has perfermance counter */ -#define CPU_FEATURE_SCALEFREQ 21 /* CPU supports cpufreq scaling */ -#define CPU_FEATURE_FLATMODE 22 /* CPU has flat mode */ -#define CPU_FEATURE_EIODECODE 23 /* CPU has EXTIOI interrupt pin decode mode */ -#define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */ -#define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */ -#define CPU_FEATURE_PTW 26 /* CPU has hardware page table walker */ -#define CPU_FEATURE_AVECINT 27 /* CPU has avec interrupt */ +#define CPU_FEATURE_IOCSR 15 /* CPU has IOCSR */ +#define CPU_FEATURE_WATCH 16 /* CPU has watchpoint registers */ +#define CPU_FEATURE_VINT 17 /* CPU has vectored interrupts */ +#define CPU_FEATURE_CSRIPI 18 /* CPU has CSR-IPI */ +#define CPU_FEATURE_EXTIOI 19 /* CPU has EXT-IOI */ +#define CPU_FEATURE_PREFETCH 20 /* CPU has prefetch instructions */ +#define CPU_FEATURE_PMP 21 /* CPU has perfermance counter */ +#define CPU_FEATURE_SCALEFREQ 22 /* CPU supports cpufreq scaling */ +#define CPU_FEATURE_FLATMODE 23 /* CPU has flat mode */ +#define CPU_FEATURE_EIODECODE 24 /* CPU has EXTIOI interrupt pin decode mode */ +#define CPU_FEATURE_GUESTID 25 /* CPU has GuestID feature */ +#define CPU_FEATURE_HYPERVISOR 26 /* CPU has hypervisor (running in VM) */ +#define CPU_FEATURE_PTW 27 /* CPU has hardware page table walker */ +#define CPU_FEATURE_LSPW 28 /* CPU has LSPW (lddir/ldpte instructions) */ +#define CPU_FEATURE_AVECINT 29 /* CPU has AVEC interrupt */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -115,6 +117,7 @@ enum cpu_type_enum { #define LOONGARCH_CPU_LBT_ARM BIT_ULL(CPU_FEATURE_LBT_ARM) #define LOONGARCH_CPU_LBT_MIPS BIT_ULL(CPU_FEATURE_LBT_MIPS) #define LOONGARCH_CPU_TLB BIT_ULL(CPU_FEATURE_TLB) +#define LOONGARCH_CPU_IOCSR BIT_ULL(CPU_FEATURE_IOCSR) #define LOONGARCH_CPU_CSR BIT_ULL(CPU_FEATURE_CSR) #define LOONGARCH_CPU_WATCH BIT_ULL(CPU_FEATURE_WATCH) #define LOONGARCH_CPU_VINT BIT_ULL(CPU_FEATURE_VINT) @@ -128,6 +131,7 @@ enum cpu_type_enum { #define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID) #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) #define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW) +#define LOONGARCH_CPU_LSPW BIT_ULL(CPU_FEATURE_LSPW) #define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT) #endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 631d249b3ef2..4084cb64ba06 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -62,6 +62,7 @@ #define LOONGARCH_CPUCFG1 0x1 #define CPUCFG1_ISGR32 BIT(0) #define CPUCFG1_ISGR64 BIT(1) +#define CPUCFG1_ISA GENMASK(1, 0) #define CPUCFG1_PAGING BIT(2) #define CPUCFG1_IOCSR BIT(3) #define CPUCFG1_PABITS GENMASK(11, 4) diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h index 6955a7cb2c65..2b34e56cfa9e 100644 --- a/arch/loongarch/include/uapi/asm/hwcap.h +++ b/arch/loongarch/include/uapi/asm/hwcap.h @@ -17,5 +17,6 @@ #define HWCAP_LOONGARCH_LBT_ARM (1 << 11) #define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) #define HWCAP_LOONGARCH_PTW (1 << 13) +#define HWCAP_LOONGARCH_LSPW (1 << 14) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 14f0449f5452..cbce099037b2 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -91,12 +91,30 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) unsigned int config; unsigned long asid_mask; - c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR | - LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH; + c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR | LOONGARCH_CPU_VINT; elf_hwcap = HWCAP_LOONGARCH_CPUCFG; config = read_cpucfg(LOONGARCH_CPUCFG1); + + switch (config & CPUCFG1_ISA) { + case 0: + set_isa(c, LOONGARCH_CPU_ISA_LA32R); + break; + case 1: + set_isa(c, LOONGARCH_CPU_ISA_LA32S); + break; + case 2: + set_isa(c, LOONGARCH_CPU_ISA_LA64); + break; + default: + pr_warn("Warning: unknown ISA level\n"); + } + + if (config & CPUCFG1_PAGING) + c->options |= LOONGARCH_CPU_TLB; + if (config & CPUCFG1_IOCSR) + c->options |= LOONGARCH_CPU_IOCSR; if (config & CPUCFG1_UAL) { c->options |= LOONGARCH_CPU_UAL; elf_hwcap |= HWCAP_LOONGARCH_UAL; @@ -139,6 +157,10 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_PTW; elf_hwcap |= HWCAP_LOONGARCH_PTW; } + if (config & CPUCFG2_LSPW) { + c->options |= LOONGARCH_CPU_LSPW; + elf_hwcap |= HWCAP_LOONGARCH_LSPW; + } if (config & CPUCFG2_LVZP) { c->options |= LOONGARCH_CPU_LVZ; elf_hwcap |= HWCAP_LOONGARCH_LVZ; @@ -162,22 +184,6 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) if (config & CPUCFG6_PMP) c->options |= LOONGARCH_CPU_PMP; - config = iocsr_read32(LOONGARCH_IOCSR_FEATURES); - if (config & IOCSRF_CSRIPI) - c->options |= LOONGARCH_CPU_CSRIPI; - if (config & IOCSRF_EXTIOI) - c->options |= LOONGARCH_CPU_EXTIOI; - if (config & IOCSRF_FREQSCALE) - c->options |= LOONGARCH_CPU_SCALEFREQ; - if (config & IOCSRF_FLATMODE) - c->options |= LOONGARCH_CPU_FLATMODE; - if (config & IOCSRF_EIODECODE) - c->options |= LOONGARCH_CPU_EIODECODE; - if (config & IOCSRF_AVEC) - c->options |= LOONGARCH_CPU_AVECINT; - if (config & IOCSRF_VM) - c->options |= LOONGARCH_CPU_HYPERVISOR; - config = csr_read32(LOONGARCH_CSR_ASID); config = (config & CSR_ASID_BIT) >> CSR_ASID_BIT_SHIFT; asid_mask = GENMASK(config - 1, 0); @@ -210,6 +216,9 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) default: pr_warn("Warning: unknown TLB type\n"); } + + if (get_num_brps() + get_num_wrps()) + c->options |= LOONGARCH_CPU_WATCH; } #define MAX_NAME_LEN 32 @@ -220,8 +229,45 @@ static char cpu_full_name[MAX_NAME_LEN] = " - "; static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int cpu) { + uint32_t config; uint64_t *vendor = (void *)(&cpu_full_name[VENDOR_OFFSET]); uint64_t *cpuname = (void *)(&cpu_full_name[CPUNAME_OFFSET]); + const char *core_name = "Unknown"; + + switch (BIT(fls(c->isa_level) - 1)) { + case LOONGARCH_CPU_ISA_LA32R: + case LOONGARCH_CPU_ISA_LA32S: + c->cputype = CPU_LOONGSON32; + __cpu_family[cpu] = "Loongson-32bit"; + break; + case LOONGARCH_CPU_ISA_LA64: + c->cputype = CPU_LOONGSON64; + __cpu_family[cpu] = "Loongson-64bit"; + break; + } + + switch (c->processor_id & PRID_SERIES_MASK) { + case PRID_SERIES_LA132: + core_name = "LA132"; + break; + case PRID_SERIES_LA264: + core_name = "LA264"; + break; + case PRID_SERIES_LA364: + core_name = "LA364"; + break; + case PRID_SERIES_LA464: + core_name = "LA464"; + break; + case PRID_SERIES_LA664: + core_name = "LA664"; + break; + } + + pr_info("%s Processor probed (%s Core)\n", __cpu_family[cpu], core_name); + + if (!cpu_has_iocsr) + return; if (!__cpu_full_name[cpu]) __cpu_full_name[cpu] = cpu_full_name; @@ -229,43 +275,21 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int *vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR); *cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME); - switch (c->processor_id & PRID_SERIES_MASK) { - case PRID_SERIES_LA132: - c->cputype = CPU_LOONGSON32; - set_isa(c, LOONGARCH_CPU_ISA_LA32S); - __cpu_family[cpu] = "Loongson-32bit"; - pr_info("32-bit Loongson Processor probed (LA132 Core)\n"); - break; - case PRID_SERIES_LA264: - c->cputype = CPU_LOONGSON64; - set_isa(c, LOONGARCH_CPU_ISA_LA64); - __cpu_family[cpu] = "Loongson-64bit"; - pr_info("64-bit Loongson Processor probed (LA264 Core)\n"); - break; - case PRID_SERIES_LA364: - c->cputype = CPU_LOONGSON64; - set_isa(c, LOONGARCH_CPU_ISA_LA64); - __cpu_family[cpu] = "Loongson-64bit"; - pr_info("64-bit Loongson Processor probed (LA364 Core)\n"); - break; - case PRID_SERIES_LA464: - c->cputype = CPU_LOONGSON64; - set_isa(c, LOONGARCH_CPU_ISA_LA64); - __cpu_family[cpu] = "Loongson-64bit"; - pr_info("64-bit Loongson Processor probed (LA464 Core)\n"); - break; - case PRID_SERIES_LA664: - c->cputype = CPU_LOONGSON64; - set_isa(c, LOONGARCH_CPU_ISA_LA64); - __cpu_family[cpu] = "Loongson-64bit"; - pr_info("64-bit Loongson Processor probed (LA664 Core)\n"); - break; - default: /* Default to 64 bit */ - c->cputype = CPU_LOONGSON64; - set_isa(c, LOONGARCH_CPU_ISA_LA64); - __cpu_family[cpu] = "Loongson-64bit"; - pr_info("64-bit Loongson Processor probed (Unknown Core)\n"); - } + config = iocsr_read32(LOONGARCH_IOCSR_FEATURES); + if (config & IOCSRF_CSRIPI) + c->options |= LOONGARCH_CPU_CSRIPI; + if (config & IOCSRF_EXTIOI) + c->options |= LOONGARCH_CPU_EXTIOI; + if (config & IOCSRF_FREQSCALE) + c->options |= LOONGARCH_CPU_SCALEFREQ; + if (config & IOCSRF_FLATMODE) + c->options |= LOONGARCH_CPU_FLATMODE; + if (config & IOCSRF_EIODECODE) + c->options |= LOONGARCH_CPU_EIODECODE; + if (config & IOCSRF_AVEC) + c->options |= LOONGARCH_CPU_AVECINT; + if (config & IOCSRF_VM) + c->options |= LOONGARCH_CPU_HYPERVISOR; } #ifdef CONFIG_64BIT diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index 0d33cbc47e51..6ce46d92f1f1 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -31,6 +31,7 @@ int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v) static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; + unsigned int isa = cpu_data[n].isa_level; unsigned int version = cpu_data[n].processor_id & 0xff; unsigned int fp_version = cpu_data[n].fpu_vers; struct proc_cpuinfo_notifier_args proc_cpuinfo_notifier_args; @@ -64,9 +65,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) cpu_pabits + 1, cpu_vabits + 1); seq_printf(m, "ISA\t\t\t:"); - if (cpu_has_loongarch32) - seq_printf(m, " loongarch32"); - if (cpu_has_loongarch64) + if (isa & LOONGARCH_CPU_ISA_LA32R) + seq_printf(m, " loongarch32r"); + if (isa & LOONGARCH_CPU_ISA_LA32S) + seq_printf(m, " loongarch32s"); + if (isa & LOONGARCH_CPU_ISA_LA64) seq_printf(m, " loongarch64"); seq_printf(m, "\n"); @@ -81,6 +84,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has_complex) seq_printf(m, " complex"); if (cpu_has_crypto) seq_printf(m, " crypto"); if (cpu_has_ptw) seq_printf(m, " ptw"); + if (cpu_has_lspw) seq_printf(m, " lspw"); if (cpu_has_lvz) seq_printf(m, " lvz"); if (cpu_has_lbt_x86) seq_printf(m, " lbt_x86"); if (cpu_has_lbt_arm) seq_printf(m, " lbt_arm"); From e86935f705fa732d8c7c3ecf0c50ea461ffab76f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 24 Sep 2024 15:32:20 +0800 Subject: [PATCH 419/655] LoongArch: Add ARCH_HAS_SET_MEMORY support Add set_memory_ro/rw/x/nx architecture hooks to change the page attribution. Use own set_memory.h rather than generic set_memory.h (i.e. include/asm-generic/set_memory.h), because we want to add other function prototypes here. Note: We can only set attributes for KVRANGE/XKVRANGE kernel addresses. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/set_memory.h | 17 +++ arch/loongarch/mm/Makefile | 3 +- arch/loongarch/mm/pageattr.c | 158 ++++++++++++++++++++++++ 4 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/set_memory.h create mode 100644 arch/loongarch/mm/pageattr.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index b9d3c98efe18..46fa58bde685 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -25,6 +25,7 @@ config LOONGARCH select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_SET_MEMORY select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_INLINE_READ_LOCK if !PREEMPTION select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION diff --git a/arch/loongarch/include/asm/set_memory.h b/arch/loongarch/include/asm/set_memory.h new file mode 100644 index 000000000000..64c7f942e8ec --- /dev/null +++ b/arch/loongarch/include/asm/set_memory.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_LOONGARCH_SET_MEMORY_H +#define _ASM_LOONGARCH_SET_MEMORY_H + +/* + * Functions to change memory attributes. + */ +int set_memory_x(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); + +#endif /* _ASM_LOONGARCH_SET_MEMORY_H */ diff --git a/arch/loongarch/mm/Makefile b/arch/loongarch/mm/Makefile index e4d1e581dbae..278be2c8fc36 100644 --- a/arch/loongarch/mm/Makefile +++ b/arch/loongarch/mm/Makefile @@ -4,7 +4,8 @@ # obj-y += init.o cache.o tlb.o tlbex.o extable.o \ - fault.o ioremap.o maccess.o mmap.o pgtable.o page.o + fault.o ioremap.o maccess.o mmap.o pgtable.o \ + page.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/loongarch/mm/pageattr.c b/arch/loongarch/mm/pageattr.c new file mode 100644 index 000000000000..e3db1c38b5fe --- /dev/null +++ b/arch/loongarch/mm/pageattr.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +struct pageattr_masks { + pgprot_t set_mask; + pgprot_t clear_mask; +}; + +static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk) +{ + unsigned long new_val = val; + struct pageattr_masks *masks = walk->private; + + new_val &= ~(pgprot_val(masks->clear_mask)); + new_val |= (pgprot_val(masks->set_mask)); + + return new_val; +} + +static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pgd_t val = pgdp_get(pgd); + + if (pgd_leaf(val)) { + val = __pgd(set_pageattr_masks(pgd_val(val), walk)); + set_pgd(pgd, val); + } + + return 0; +} + +static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + p4d_t val = p4dp_get(p4d); + + if (p4d_leaf(val)) { + val = __p4d(set_pageattr_masks(p4d_val(val), walk)); + set_p4d(p4d, val); + } + + return 0; +} + +static int pageattr_pud_entry(pud_t *pud, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pud_t val = pudp_get(pud); + + if (pud_leaf(val)) { + val = __pud(set_pageattr_masks(pud_val(val), walk)); + set_pud(pud, val); + } + + return 0; +} + +static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pmd_t val = pmdp_get(pmd); + + if (pmd_leaf(val)) { + val = __pmd(set_pageattr_masks(pmd_val(val), walk)); + set_pmd(pmd, val); + } + + return 0; +} + +static int pageattr_pte_entry(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t val = ptep_get(pte); + + val = __pte(set_pageattr_masks(pte_val(val), walk)); + set_pte(pte, val); + + return 0; +} + +static int pageattr_pte_hole(unsigned long addr, unsigned long next, + int depth, struct mm_walk *walk) +{ + return 0; +} + +static const struct mm_walk_ops pageattr_ops = { + .pgd_entry = pageattr_pgd_entry, + .p4d_entry = pageattr_p4d_entry, + .pud_entry = pageattr_pud_entry, + .pmd_entry = pageattr_pmd_entry, + .pte_entry = pageattr_pte_entry, + .pte_hole = pageattr_pte_hole, + .walk_lock = PGWALK_RDLOCK, +}; + +static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) +{ + int ret; + unsigned long start = addr; + unsigned long end = start + PAGE_SIZE * numpages; + struct pageattr_masks masks = { + .set_mask = set_mask, + .clear_mask = clear_mask + }; + + if (!numpages) + return 0; + + mmap_write_lock(&init_mm); + ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks); + mmap_write_unlock(&init_mm); + + flush_tlb_kernel_range(start, end); + + return ret; +} + +int set_memory_x(unsigned long addr, int numpages) +{ + if (addr < vm_map_base) + return 0; + + return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_NO_EXEC)); +} + +int set_memory_nx(unsigned long addr, int numpages) +{ + if (addr < vm_map_base) + return 0; + + return __set_memory(addr, numpages, __pgprot(_PAGE_NO_EXEC), __pgprot(0)); +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + if (addr < vm_map_base) + return 0; + + return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_WRITE | _PAGE_DIRTY)); +} + +int set_memory_rw(unsigned long addr, int numpages) +{ + if (addr < vm_map_base) + return 0; + + return __set_memory(addr, numpages, __pgprot(_PAGE_WRITE | _PAGE_DIRTY), __pgprot(0)); +} From f04de6d8f252ec6434b846895474cc205527b8b8 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 24 Sep 2024 15:32:20 +0800 Subject: [PATCH 420/655] LoongArch: Add ARCH_HAS_SET_DIRECT_MAP support Add set_direct_map_*() functions for setting the direct map alias for the page to its default permissions and to an invalid state that cannot be cached in a TLB. (See d253ca0c3 ("x86/mm/cpa: Add set_direct_map_*() functions")) Add a similar implementation for LoongArch. This fixes the KFENCE warnings during hibernation: ================================================================== BUG: KFENCE: invalid read in swsusp_save+0x368/0x4d8 Invalid read at 0x00000000f7b89a3c: swsusp_save+0x368/0x4d8 hibernation_snapshot+0x3f0/0x4e0 hibernate+0x20c/0x440 state_store+0x128/0x140 kernfs_fop_write_iter+0x160/0x260 vfs_write+0x2c0/0x520 ksys_write+0x74/0x160 do_syscall+0xb0/0x160 CPU: 0 UID: 0 PID: 812 Comm: bash Tainted: G B 6.11.0-rc1+ #1566 Tainted: [B]=BAD_PAGE Hardware name: Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.0 10/21/2022 ================================================================== Note: We can only set permissions for KVRANGE/XKVRANGE kernel addresses. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/set_memory.h | 4 ++ arch/loongarch/mm/pageattr.c | 60 +++++++++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 46fa58bde685..4b02b0492c3b 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -26,6 +26,7 @@ config LOONGARCH select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_MEMORY + select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_INLINE_READ_LOCK if !PREEMPTION select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION diff --git a/arch/loongarch/include/asm/set_memory.h b/arch/loongarch/include/asm/set_memory.h index 64c7f942e8ec..d70505b6676c 100644 --- a/arch/loongarch/include/asm/set_memory.h +++ b/arch/loongarch/include/asm/set_memory.h @@ -14,4 +14,8 @@ int set_memory_nx(unsigned long addr, int numpages); int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); +bool kernel_page_present(struct page *page); +int set_direct_map_default_noflush(struct page *page); +int set_direct_map_invalid_noflush(struct page *page); + #endif /* _ASM_LOONGARCH_SET_MEMORY_H */ diff --git a/arch/loongarch/mm/pageattr.c b/arch/loongarch/mm/pageattr.c index e3db1c38b5fe..ffd8d76021d4 100644 --- a/arch/loongarch/mm/pageattr.c +++ b/arch/loongarch/mm/pageattr.c @@ -156,3 +156,63 @@ int set_memory_rw(unsigned long addr, int numpages) return __set_memory(addr, numpages, __pgprot(_PAGE_WRITE | _PAGE_DIRTY), __pgprot(0)); } + +bool kernel_page_present(struct page *page) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long addr = (unsigned long)page_address(page); + + if (addr < vm_map_base) + return true; + + pgd = pgd_offset_k(addr); + if (pgd_none(pgdp_get(pgd))) + return false; + if (pgd_leaf(pgdp_get(pgd))) + return true; + + p4d = p4d_offset(pgd, addr); + if (p4d_none(p4dp_get(p4d))) + return false; + if (p4d_leaf(p4dp_get(p4d))) + return true; + + pud = pud_offset(p4d, addr); + if (pud_none(pudp_get(pud))) + return false; + if (pud_leaf(pudp_get(pud))) + return true; + + pmd = pmd_offset(pud, addr); + if (pmd_none(pmdp_get(pmd))) + return false; + if (pmd_leaf(pmdp_get(pmd))) + return true; + + pte = pte_offset_kernel(pmd, addr); + return pte_present(ptep_get(pte)); +} + +int set_direct_map_default_noflush(struct page *page) +{ + unsigned long addr = (unsigned long)page_address(page); + + if (addr < vm_map_base) + return 0; + + return __set_memory(addr, 1, PAGE_KERNEL, __pgprot(0)); +} + +int set_direct_map_invalid_noflush(struct page *page) +{ + unsigned long addr = (unsigned long)page_address(page); + + if (addr < vm_map_base) + return 0; + + return __set_memory(addr, 1, __pgprot(0), __pgprot(_PAGE_PRESENT | _PAGE_VALID)); +} From f93f67d06b1023313ef1662eac490e29c025c030 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 24 Sep 2024 15:32:20 +0800 Subject: [PATCH 421/655] LoongArch: Improve hardware page table walker LoongArch has similar problems explained in commit 7f0b1bf04511348995d6 ("arm64: Fix barriers used for page table modifications"), when hardware page table walker (PTW) enabled, speculative accesses may cause spurious page fault in kernel space. Theoretically, in order to completely avoid spurious page fault we need a "dbar + ibar" pair between the page table modifications and the subsequent memory accesses using the corresponding virtual address. But "ibar" is too heavy for performace, so we only use a "dbar 0b11000" in set_pte(). And let spurious_fault() filter the rest rare spurious page faults which should be avoided by "ibar". Besides, we replace the llsc loop with amo in set_pte() which has better performace, and refactor mmu_context.h to 1) avoid any load/store/branch instructions between the writing of CSR.ASID & CSR.PGDL, 2) ensure flush tlb operation is after updating ASID. Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/atomic.h | 2 ++ arch/loongarch/include/asm/mmu_context.h | 35 +++++++++++++++----- arch/loongarch/include/asm/pgtable.h | 30 +++++++---------- arch/loongarch/mm/fault.c | 41 ++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 26 deletions(-) diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index 99af8b3160a8..c86f0ab922ec 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -15,6 +15,7 @@ #define __LL "ll.w " #define __SC "sc.w " #define __AMADD "amadd.w " +#define __AMOR "amor.w " #define __AMAND_DB "amand_db.w " #define __AMOR_DB "amor_db.w " #define __AMXOR_DB "amxor_db.w " @@ -22,6 +23,7 @@ #define __LL "ll.d " #define __SC "sc.d " #define __AMADD "amadd.d " +#define __AMOR "amor.d " #define __AMAND_DB "amand_db.d " #define __AMOR_DB "amor_db.d " #define __AMXOR_DB "amxor_db.d " diff --git a/arch/loongarch/include/asm/mmu_context.h b/arch/loongarch/include/asm/mmu_context.h index 9f97c3453b9c..304363bd3935 100644 --- a/arch/loongarch/include/asm/mmu_context.h +++ b/arch/loongarch/include/asm/mmu_context.h @@ -49,12 +49,12 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) /* Normal, classic get_new_mmu_context */ static inline void -get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) +get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, bool *need_flush) { u64 asid = asid_cache(cpu); if (!((++asid) & cpu_asid_mask(&cpu_data[cpu]))) - local_flush_tlb_user(); /* start new asid cycle */ + *need_flush = true; /* start new asid cycle */ cpu_context(cpu, mm) = asid_cache(cpu) = asid; } @@ -74,21 +74,34 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) return 0; } +static inline void atomic_update_pgd_asid(unsigned long asid, unsigned long pgdl) +{ + __asm__ __volatile__( + "csrwr %[pgdl_val], %[pgdl_reg] \n\t" + "csrwr %[asid_val], %[asid_reg] \n\t" + : [asid_val] "+r" (asid), [pgdl_val] "+r" (pgdl) + : [asid_reg] "i" (LOONGARCH_CSR_ASID), [pgdl_reg] "i" (LOONGARCH_CSR_PGDL) + : "memory" + ); +} + static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { + bool need_flush = false; unsigned int cpu = smp_processor_id(); /* Check if our ASID is of an older version and thus invalid */ if (!asid_valid(next, cpu)) - get_new_mmu_context(next, cpu); - - write_csr_asid(cpu_asid(cpu, next)); + get_new_mmu_context(next, cpu, &need_flush); if (next != &init_mm) - csr_write64((unsigned long)next->pgd, LOONGARCH_CSR_PGDL); + atomic_update_pgd_asid(cpu_asid(cpu, next), (unsigned long)next->pgd); else - csr_write64((unsigned long)invalid_pg_dir, LOONGARCH_CSR_PGDL); + atomic_update_pgd_asid(cpu_asid(cpu, next), (unsigned long)invalid_pg_dir); + + if (need_flush) + local_flush_tlb_user(); /* Flush tlb after update ASID */ /* * Mark current->active_mm as not "active" anymore. @@ -135,9 +148,15 @@ drop_mmu_context(struct mm_struct *mm, unsigned int cpu) asid = read_csr_asid() & cpu_asid_mask(¤t_cpu_data); if (asid == cpu_asid(cpu, mm)) { + bool need_flush = false; + if (!current->mm || (current->mm == mm)) { - get_new_mmu_context(mm, cpu); + get_new_mmu_context(mm, cpu, &need_flush); + write_csr_asid(cpu_asid(cpu, mm)); + if (need_flush) + local_flush_tlb_user(); /* Flush tlb after update ASID */ + goto out; } } diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 85431f20a14d..9965f52ef65b 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -331,29 +331,23 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) * Make sure the buddy is global too (if it's !none, * it better already be global) */ + if (pte_none(ptep_get(buddy))) { #ifdef CONFIG_SMP - /* - * For SMP, multiple CPUs can race, so we need to do - * this atomically. - */ - unsigned long page_global = _PAGE_GLOBAL; - unsigned long tmp; + /* + * For SMP, multiple CPUs can race, so we need + * to do this atomically. + */ + __asm__ __volatile__( + __AMOR "$zero, %[global], %[buddy] \n" + : [buddy] "+ZB" (buddy->pte) + : [global] "r" (_PAGE_GLOBAL) + : "memory"); - __asm__ __volatile__ ( - "1:" __LL "%[tmp], %[buddy] \n" - " bnez %[tmp], 2f \n" - " or %[tmp], %[tmp], %[global] \n" - __SC "%[tmp], %[buddy] \n" - " beqz %[tmp], 1b \n" - " nop \n" - "2: \n" - __WEAK_LLSC_MB - : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) - : [global] "r" (page_global)); + DBAR(0b11000); /* o_wrw = 0b11000 */ #else /* !CONFIG_SMP */ - if (pte_none(ptep_get(buddy))) WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL)); #endif /* CONFIG_SMP */ + } } } diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index 97b40defde06..deefd9617d00 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -31,11 +31,52 @@ int show_unhandled_signals = 1; +static int __kprobes spurious_fault(unsigned long write, unsigned long address) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (!(address & __UA_LIMIT)) + return 0; + + pgd = pgd_offset_k(address); + if (!pgd_present(pgdp_get(pgd))) + return 0; + + p4d = p4d_offset(pgd, address); + if (!p4d_present(p4dp_get(p4d))) + return 0; + + pud = pud_offset(p4d, address); + if (!pud_present(pudp_get(pud))) + return 0; + + pmd = pmd_offset(pud, address); + if (!pmd_present(pmdp_get(pmd))) + return 0; + + if (pmd_leaf(*pmd)) { + return write ? pmd_write(pmdp_get(pmd)) : 1; + } else { + pte = pte_offset_kernel(pmd, address); + if (!pte_present(ptep_get(pte))) + return 0; + + return write ? pte_write(ptep_get(pte)) : 1; + } +} + static void __kprobes no_context(struct pt_regs *regs, unsigned long write, unsigned long address) { const int field = sizeof(unsigned long) * 2; + if (spurious_fault(write, address)) + return; + /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; From d4f31acf1302088a5b16d1e4de890729acfa9638 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 24 Sep 2024 15:32:20 +0800 Subject: [PATCH 422/655] LoongArch: Simplify _percpu_read() and _percpu_write() Now _percpu_read() and _percpu_write() macros call __percpu_read() and __percpu_write() static inline functions that result in a single assembly instruction. However, percpu infrastructure expects its leaf definitions to encode the size of their percpu variable, so the patch merges all the asm clauses from the static inline function into the corresponding leaf macros. The secondary effect of this change is to avoid explicit __percpu annotations for function arguments. Currently, __percpu macro is defined in include/linux/compiler_types.h, but with proposed patch [1], __percpu definition will need macros from include/asm-generic/percpu.h, creating forward dependency loop. The proposed solution is the same as x86 architecture uses. [1] https://lore.kernel.org/lkml/20240812115945.484051-4-ubizjak@gmail.com/ Tested-by: Xi Ruoyao Signed-off-by: Uros Bizjak Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/percpu.h | 124 ++++++++-------------------- 1 file changed, 35 insertions(+), 89 deletions(-) diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 8f290e5546cf..87be9b14e9da 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -68,75 +68,6 @@ PERCPU_OP(and, and, &) PERCPU_OP(or, or, |) #undef PERCPU_OP -static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size) -{ - unsigned long ret; - - switch (size) { - case 1: - __asm__ __volatile__ ("ldx.b %[ret], $r21, %[ptr] \n" - : [ret] "=&r"(ret) - : [ptr] "r"(ptr) - : "memory"); - break; - case 2: - __asm__ __volatile__ ("ldx.h %[ret], $r21, %[ptr] \n" - : [ret] "=&r"(ret) - : [ptr] "r"(ptr) - : "memory"); - break; - case 4: - __asm__ __volatile__ ("ldx.w %[ret], $r21, %[ptr] \n" - : [ret] "=&r"(ret) - : [ptr] "r"(ptr) - : "memory"); - break; - case 8: - __asm__ __volatile__ ("ldx.d %[ret], $r21, %[ptr] \n" - : [ret] "=&r"(ret) - : [ptr] "r"(ptr) - : "memory"); - break; - default: - ret = 0; - BUILD_BUG(); - } - - return ret; -} - -static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size) -{ - switch (size) { - case 1: - __asm__ __volatile__("stx.b %[val], $r21, %[ptr] \n" - : - : [val] "r" (val), [ptr] "r" (ptr) - : "memory"); - break; - case 2: - __asm__ __volatile__("stx.h %[val], $r21, %[ptr] \n" - : - : [val] "r" (val), [ptr] "r" (ptr) - : "memory"); - break; - case 4: - __asm__ __volatile__("stx.w %[val], $r21, %[ptr] \n" - : - : [val] "r" (val), [ptr] "r" (ptr) - : "memory"); - break; - case 8: - __asm__ __volatile__("stx.d %[val], $r21, %[ptr] \n" - : - : [val] "r" (val), [ptr] "r" (ptr) - : "memory"); - break; - default: - BUILD_BUG(); - } -} - static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size) { switch (size) { @@ -157,6 +88,33 @@ static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, return 0; } +#define __pcpu_op_1(op) op ".b " +#define __pcpu_op_2(op) op ".h " +#define __pcpu_op_4(op) op ".w " +#define __pcpu_op_8(op) op ".d " + +#define _percpu_read(size, _pcp) \ +({ \ + typeof(_pcp) __pcp_ret; \ + \ + __asm__ __volatile__( \ + __pcpu_op_##size("ldx") "%[ret], $r21, %[ptr] \n" \ + : [ret] "=&r"(__pcp_ret) \ + : [ptr] "r"(&(_pcp)) \ + : "memory"); \ + \ + __pcp_ret; \ +}) + +#define _percpu_write(size, _pcp, _val) \ +do { \ + __asm__ __volatile__( \ + __pcpu_op_##size("stx") "%[val], $r21, %[ptr] \n" \ + : \ + : [val] "r"(_val), [ptr] "r"(&(_pcp)) \ + : "memory"); \ +} while (0) + /* this_cpu_cmpxchg */ #define _protect_cmpxchg_local(pcp, o, n) \ ({ \ @@ -167,18 +125,6 @@ static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, __ret; \ }) -#define _percpu_read(pcp) \ -({ \ - typeof(pcp) __retval; \ - __retval = (typeof(pcp))__percpu_read(&(pcp), sizeof(pcp)); \ - __retval; \ -}) - -#define _percpu_write(pcp, val) \ -do { \ - __percpu_write(&(pcp), (unsigned long)(val), sizeof(pcp)); \ -} while (0) \ - #define _pcp_protect(operation, pcp, val) \ ({ \ typeof(pcp) __retval; \ @@ -215,15 +161,15 @@ do { \ #define this_cpu_or_4(pcp, val) _percpu_or(pcp, val) #define this_cpu_or_8(pcp, val) _percpu_or(pcp, val) -#define this_cpu_read_1(pcp) _percpu_read(pcp) -#define this_cpu_read_2(pcp) _percpu_read(pcp) -#define this_cpu_read_4(pcp) _percpu_read(pcp) -#define this_cpu_read_8(pcp) _percpu_read(pcp) +#define this_cpu_read_1(pcp) _percpu_read(1, pcp) +#define this_cpu_read_2(pcp) _percpu_read(2, pcp) +#define this_cpu_read_4(pcp) _percpu_read(4, pcp) +#define this_cpu_read_8(pcp) _percpu_read(8, pcp) -#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val) -#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val) -#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val) -#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_1(pcp, val) _percpu_write(1, pcp, val) +#define this_cpu_write_2(pcp, val) _percpu_write(2, pcp, val) +#define this_cpu_write_4(pcp, val) _percpu_write(4, pcp, val) +#define this_cpu_write_8(pcp, val) _percpu_write(8, pcp, val) #define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val) #define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val) From 5016c3a31a6d74eaf2fdfdec673eae8fcf90379e Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Tue, 24 Sep 2024 15:32:20 +0800 Subject: [PATCH 423/655] LoongArch: Fix memleak in pci_acpi_scan_root() Add kfree(root_ops) in this case to avoid memleak of root_ops, leaks when pci_find_bus() != 0. Signed-off-by: Yuli Wang Signed-off-by: Wentao Guan Signed-off-by: Huacai Chen --- arch/loongarch/pci/acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index 365f7de771cb..1da4dc46df43 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -225,6 +225,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) if (bus) { memcpy(bus->sysdata, info->cfg, sizeof(struct pci_config_window)); kfree(info); + kfree(root_ops); } else { struct pci_bus *child; From 64c35d6c0ff95e9507f5fb3cce4936c7c62f3d3a Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 24 Sep 2024 15:32:20 +0800 Subject: [PATCH 424/655] LoongArch: Remove posix_types.h include from sigcontext.h Nothing in sigcontext.h seems to require anything from linux/posix_types.h. This include seems a MIPS relic originated from an error in Linux 2.6.11-rc2 (in 2005). The unneeded include was found debugging some vDSO self test build failure (it's not the root cause though). Link: https://lore.kernel.org/linux-mips/20240828030413.143930-2-xry111@xry111.site/ Link: https://lore.kernel.org/loongarch/0b540679ec8cfccec75aeb3463810924f6ff71e6.camel@xry111.site/ Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/include/uapi/asm/sigcontext.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h index 6c22f616b8f1..5cd121275bac 100644 --- a/arch/loongarch/include/uapi/asm/sigcontext.h +++ b/arch/loongarch/include/uapi/asm/sigcontext.h @@ -9,7 +9,6 @@ #define _UAPI_ASM_SIGCONTEXT_H #include -#include /* FP context was used */ #define SC_USED_FP (1 << 0) From f339bd3b51dac675fbbc08b861d2371ae3df0c0b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 24 Sep 2024 15:32:20 +0800 Subject: [PATCH 425/655] Docs/LoongArch: Add advanced extended IRQ model description Introduce the advanced extended interrupt controllers (AVECINTC). This feature will allow each core to have 256 independent interrupt vectors and MSI interrupts can be independently routed to any vector on any CPU. The whole topology of irqchips in LoongArch machines looks like this if AVECINTC is supported: +-----+ +-----------------------+ +-------+ | IPI | --> | CPUINTC | <-- | Timer | +-----+ +-----------------------+ +-------+ ^ ^ ^ | | | +---------+ +----------+ +---------+ +-------+ | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs | +---------+ +----------+ +---------+ +-------+ ^ ^ | | +---------+ +---------+ | PCH-PIC | | PCH-MSI | +---------+ +---------+ ^ ^ ^ | | | +---------+ +---------+ +---------+ | Devices | | PCH-LPC | | Devices | +---------+ +---------+ +---------+ ^ | +---------+ | Devices | +---------+ Signed-off-by: Huacai Chen Signed-off-by: Tianyang Zhang --- .../arch/loongarch/irq-chip-model.rst | 32 +++++++++++++++++++ .../zh_CN/arch/loongarch/irq-chip-model.rst | 32 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/Documentation/arch/loongarch/irq-chip-model.rst b/Documentation/arch/loongarch/irq-chip-model.rst index 7988f4192363..6dd48256e39f 100644 --- a/Documentation/arch/loongarch/irq-chip-model.rst +++ b/Documentation/arch/loongarch/irq-chip-model.rst @@ -85,6 +85,38 @@ to CPUINTC directly:: | Devices | +---------+ +Advanced Extended IRQ model +=========================== + +In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go +to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, PCH-MSI interrupts go +to AVECINTC, and then go to CPUINTC directly, while all other devices interrupts +go to PCH-PIC/PCH-LPC and gathered by EIOINTC, and then go to CPUINTC directly:: + + +-----+ +-----------------------+ +-------+ + | IPI | --> | CPUINTC | <-- | Timer | + +-----+ +-----------------------+ +-------+ + ^ ^ ^ + | | | + +---------+ +----------+ +---------+ +-------+ + | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs | + +---------+ +----------+ +---------+ +-------+ + ^ ^ + | | + +---------+ +---------+ + | PCH-PIC | | PCH-MSI | + +---------+ +---------+ + ^ ^ ^ + | | | + +---------+ +---------+ +---------+ + | Devices | | PCH-LPC | | Devices | + +---------+ +---------+ +---------+ + ^ + | + +---------+ + | Devices | + +---------+ + ACPI-related definitions ======================== diff --git a/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst b/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst index f1e9ab18206c..472761938682 100644 --- a/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst +++ b/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst @@ -87,6 +87,38 @@ PCH-LPC/PCH-MSI,然后被EIOINTC统一收集,再直接到达CPUINTC:: | Devices | +---------+ +高级扩展IRQ模型 +=============== + +在这种模型里面,IPI(Inter-Processor Interrupt)和CPU本地时钟中断直接发送到CPUINTC, +CPU串口(UARTs)中断发送到LIOINTC,PCH-MSI中断发送到AVECINTC,而后通过AVECINTC直接 +送达CPUINTC,而其他所有设备的中断则分别发送到所连接的PCH-PIC/PCH-LPC,然后由EIOINTC +统一收集,再直接到达CPUINTC:: + + +-----+ +-----------------------+ +-------+ + | IPI | --> | CPUINTC | <-- | Timer | + +-----+ +-----------------------+ +-------+ + ^ ^ ^ + | | | + +---------+ +----------+ +---------+ +-------+ + | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs | + +---------+ +----------+ +---------+ +-------+ + ^ ^ + | | + +---------+ +---------+ + | PCH-PIC | | PCH-MSI | + +---------+ +---------+ + ^ ^ ^ + | | | + +---------+ +---------+ +---------+ + | Devices | | PCH-LPC | | Devices | + +---------+ +---------+ +---------+ + ^ + | + +---------+ + | Devices | + +---------+ + ACPI相关的定义 ============== From 03a9cfc1314bf75cc7a83995f3a029a7ebf49c05 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 23 Sep 2024 18:06:46 +0900 Subject: [PATCH 426/655] ata: libata-scsi: Fix ata_msense_control_spgt2() ata_msense_control_spgt2() can be called even for devices that do not support CDL when the user requests the ALL_SUB_MPAGES mode sense page, but for such device, this will cause a NULL pointer dereference as dev->cdl is NULL. Similarly, we should not return any data if ata_msense_control_spgt2() is called when the user requested the CDL_T2A_SUB_MPAGE or CDL_T2B_SUB_MPAGE pages for a device that does not support CDL. Avoid this potential NULL pointer dereference by checking if the device support CDL on entry to ata_msense_control_spgt2() and return 0 if it does not support CDL. Reported-by: syzbot+37757dc11ee77ef850bb@syzkaller.appspotmail.com Tested-by: syzbot+37757dc11ee77ef850bb@syzkaller.appspotmail.com Fixes: 602bcf212637 ("ata: libata: Improve CDL resource management") Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/libata-scsi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 061fe63497bf..97c84b0ec472 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2245,10 +2245,15 @@ static inline u16 ata_xlat_cdl_limit(u8 *buf) static unsigned int ata_msense_control_spgt2(struct ata_device *dev, u8 *buf, u8 spg) { - u8 *b, *cdl = dev->cdl->desc_log_buf, *desc; + u8 *b, *cdl, *desc; u32 policy; int i; + if (!(dev->flags & ATA_DFLAG_CDL) || !dev->cdl) + return 0; + + cdl = dev->cdl->desc_log_buf; + /* * Fill the subpage. The first four bytes of the T2A/T2B mode pages * are a header. The PAGE LENGTH field is the size of the page From 0e9a2990a93f27daa643b6fa73cfa47b128947a7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 23 Sep 2024 18:14:36 +0900 Subject: [PATCH 427/655] ata: libata-scsi: Fix ata_msense_control() CDL page reporting When the user requests the ALL_SUB_MPAGES mode sense page, ata_msense_control() adds the CDL_T2A_SUB_MPAGE twice instead of adding the CDL_T2A_SUB_MPAGE and CDL_T2B_SUB_MPAGE pages information. Correct the second call to ata_msense_control_spgt2() to report the CDL_T2B_SUB_MPAGE page. Fixes: 673b2fe6ff1d ("scsi: ata: libata-scsi: Add support for CDL pages mode sense") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/libata-scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 97c84b0ec472..ea7d365fb7a9 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2350,7 +2350,7 @@ static unsigned int ata_msense_control(struct ata_device *dev, u8 *buf, case ALL_SUB_MPAGES: n = ata_msense_control_spg0(dev, buf, changeable); n += ata_msense_control_spgt2(dev, buf + n, CDL_T2A_SUB_MPAGE); - n += ata_msense_control_spgt2(dev, buf + n, CDL_T2A_SUB_MPAGE); + n += ata_msense_control_spgt2(dev, buf + n, CDL_T2B_SUB_MPAGE); n += ata_msense_control_ata_feature(dev, buf + n); return n; default: From bff1709b3980bd7f80be6786f64cc9a9ee9e56da Mon Sep 17 00:00:00 2001 From: Jiawei Ye Date: Tue, 24 Sep 2024 06:58:05 +0000 Subject: [PATCH 428/655] mac802154: Fix potential RCU dereference issue in mac802154_scan_worker In the `mac802154_scan_worker` function, the `scan_req->type` field was accessed after the RCU read-side critical section was unlocked. According to RCU usage rules, this is illegal and can lead to unpredictable behavior, such as accessing memory that has been updated or causing use-after-free issues. This possible bug was identified using a static analysis tool developed by myself, specifically designed to detect RCU-related issues. To address this, the `scan_req->type` value is now stored in a local variable `scan_req_type` while still within the RCU read-side critical section. The `scan_req_type` is then used after the RCU lock is released, ensuring that the type value is safely accessed without violating RCU rules. Fixes: e2c3e6f53a7a ("mac802154: Handle active scanning") Cc: stable@vger.kernel.org Signed-off-by: Jiawei Ye Acked-by: Miquel Raynal Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/tencent_3B2F4F2B4DA30FAE2F51A9634A16B3AD4908@qq.com Signed-off-by: Stefan Schmidt --- net/mac802154/scan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac802154/scan.c b/net/mac802154/scan.c index 1c0eeaa76560..a6dab3cc3ad8 100644 --- a/net/mac802154/scan.c +++ b/net/mac802154/scan.c @@ -176,6 +176,7 @@ void mac802154_scan_worker(struct work_struct *work) struct ieee802154_local *local = container_of(work, struct ieee802154_local, scan_work.work); struct cfg802154_scan_request *scan_req; + enum nl802154_scan_types scan_req_type; struct ieee802154_sub_if_data *sdata; unsigned int scan_duration = 0; struct wpan_phy *wpan_phy; @@ -209,6 +210,7 @@ void mac802154_scan_worker(struct work_struct *work) } wpan_phy = scan_req->wpan_phy; + scan_req_type = scan_req->type; scan_req_duration = scan_req->duration; /* Look for the next valid chan */ @@ -246,7 +248,7 @@ void mac802154_scan_worker(struct work_struct *work) goto end_scan; } - if (scan_req->type == NL802154_SCAN_ACTIVE) { + if (scan_req_type == NL802154_SCAN_ACTIVE) { ret = mac802154_transmit_beacon_req(local, sdata); if (ret) dev_err(&sdata->dev->dev, From 9805f39d423a30a7189158905ec3d71774fe98a1 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Thu, 19 Sep 2024 17:13:59 +0800 Subject: [PATCH 429/655] LoongArch: vDSO: Tune chacha implementation As Christophe pointed out, tuning the chacha implementation by scheduling the instructions like what GCC does can improve the performance. The tuning does not introduce too much complexity (basically it's just reordering some instructions). And the tuning does not hurt readibility too much: actually the tuned code looks even more similar to a textbook-style implementation based on 128-bit vectors. So overall it's a good deal to me. Tested with vdso_test_getchacha and benched with vdso_test_getrandom. On a LA664 the speedup is 5%, and I expect a larger speedup on LA[2-4]64 with a lower issue rate. Suggested-by: Christophe Leroy Link: https://lore.kernel.org/all/77655d9e-fc05-4300-8f0d-7b2ad840d091@csgroup.eu/ Signed-off-by: Xi Ruoyao Reviewed-by: Huacai Chen Signed-off-by: Jason A. Donenfeld --- arch/loongarch/vdso/vgetrandom-chacha.S | 92 +++++++++++++++---------- 1 file changed, 55 insertions(+), 37 deletions(-) diff --git a/arch/loongarch/vdso/vgetrandom-chacha.S b/arch/loongarch/vdso/vgetrandom-chacha.S index 7e86a50f6e85..c2733e6c3a8d 100644 --- a/arch/loongarch/vdso/vgetrandom-chacha.S +++ b/arch/loongarch/vdso/vgetrandom-chacha.S @@ -9,23 +9,11 @@ .text -/* Salsa20 quarter-round */ -.macro QR a b c d - add.w \a, \a, \b - xor \d, \d, \a - rotri.w \d, \d, 16 - - add.w \c, \c, \d - xor \b, \b, \c - rotri.w \b, \b, 20 - - add.w \a, \a, \b - xor \d, \d, \a - rotri.w \d, \d, 24 - - add.w \c, \c, \d - xor \b, \b, \c - rotri.w \b, \b, 25 +.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3 + \op \d0, \d0, \s0 + \op \d1, \d1, \s1 + \op \d2, \d2, \s2 + \op \d3, \d3, \s3 .endm /* @@ -74,6 +62,23 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) /* Reuse i as copy3 */ #define copy3 i +/* Packs to be used with OP_4REG */ +#define line0 state0, state1, state2, state3 +#define line1 state4, state5, state6, state7 +#define line2 state8, state9, state10, state11 +#define line3 state12, state13, state14, state15 + +#define line1_perm state5, state6, state7, state4 +#define line2_perm state10, state11, state8, state9 +#define line3_perm state15, state12, state13, state14 + +#define copy copy0, copy1, copy2, copy3 + +#define _16 16, 16, 16, 16 +#define _20 20, 20, 20, 20 +#define _24 24, 24, 24, 24 +#define _25 25, 25, 25, 25 + /* * The ABI requires s0-s9 saved, and sp aligned to 16-byte. * This does not violate the stack-less requirement: no sensitive data @@ -126,16 +131,38 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) li.w i, 10 .Lpermute: /* odd round */ - QR state0, state4, state8, state12 - QR state1, state5, state9, state13 - QR state2, state6, state10, state14 - QR state3, state7, state11, state15 + OP_4REG add.w line0, line1 + OP_4REG xor line3, line0 + OP_4REG rotri.w line3, _16 + + OP_4REG add.w line2, line3 + OP_4REG xor line1, line2 + OP_4REG rotri.w line1, _20 + + OP_4REG add.w line0, line1 + OP_4REG xor line3, line0 + OP_4REG rotri.w line3, _24 + + OP_4REG add.w line2, line3 + OP_4REG xor line1, line2 + OP_4REG rotri.w line1, _25 /* even round */ - QR state0, state5, state10, state15 - QR state1, state6, state11, state12 - QR state2, state7, state8, state13 - QR state3, state4, state9, state14 + OP_4REG add.w line0, line1_perm + OP_4REG xor line3_perm, line0 + OP_4REG rotri.w line3_perm, _16 + + OP_4REG add.w line2_perm, line3_perm + OP_4REG xor line1_perm, line2_perm + OP_4REG rotri.w line1_perm, _20 + + OP_4REG add.w line0, line1_perm + OP_4REG xor line3_perm, line0 + OP_4REG rotri.w line3_perm, _24 + + OP_4REG add.w line2_perm, line3_perm + OP_4REG xor line1_perm, line2_perm + OP_4REG rotri.w line1_perm, _25 addi.w i, i, -1 bnez i, .Lpermute @@ -147,10 +174,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) li.w copy3, 0x6b206574 /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ - add.w state0, state0, copy0 - add.w state1, state1, copy1 - add.w state2, state2, copy2 - add.w state3, state3, copy3 + OP_4REG add.w line0, copy st.w state0, output, 0 st.w state1, output, 4 st.w state2, output, 8 @@ -165,10 +189,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) ld.w state3, key, 12 /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */ - add.w state4, state4, state0 - add.w state5, state5, state1 - add.w state6, state6, state2 - add.w state7, state7, state3 + OP_4REG add.w line1, line0 st.w state4, output, 16 st.w state5, output, 20 st.w state6, output, 24 @@ -181,10 +202,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) ld.w state3, key, 28 /* output[8,9,10,11] = state[0,1,2,3] + state[8,9,10,11] */ - add.w state8, state8, state0 - add.w state9, state9, state1 - add.w state10, state10, state2 - add.w state11, state11, state3 + OP_4REG add.w line2, line0 st.w state8, output, 32 st.w state9, output, 36 st.w state10, output, 40 From 8b985bbfabbe46c8b9200d7d299030232c8ebd05 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 23 Sep 2024 19:55:50 +0900 Subject: [PATCH 430/655] tomoyo: allow building as a loadable LSM module One of concerns for enabling TOMOYO in prebuilt kernels is that distributor wants to avoid bloating kernel packages. Although boot-time kernel command line options allows selecting built-in LSMs to enable, file size increase of vmlinux and memory footprint increase of vmlinux caused by builtin-but- not-enabled LSMs remains. If it becomes possible to make LSMs dynamically appendable after boot using loadable kernel modules, these problems will go away. Another of concerns for enabling TOMOYO in prebuilt kernels is that who can provide support when distributor cannot provide support. Due to "those who compiled kernel code is expected to provide support for that kernel code" spell, TOMOYO is failing to get enabled in Fedora distribution [1]. The point of loadable kernel module is to share the workload. If it becomes possible to make LSMs dynamically appendable after boot using loadable kernel modules, as with people can use device drivers not supported by distributors but provided by third party device vendors, we can break this spell and can lower the barrier for using TOMOYO. This patch is intended for demonstrating that there is nothing difficult for supporting TOMOYO-like loadable LSM modules. For now we need to live with a mixture of built-in part and loadable part because fully loadable LSM modules are not supported since Linux 2.6.24 [2] and number of LSMs which can reserve static call slots is determined at compile time in Linux 6.12. Major changes in this patch are described below. There are no behavior changes as long as TOMOYO is built into vmlinux. Add CONFIG_SECURITY_TOMOYO_LKM as "bool" instead of changing CONFIG_SECURITY_TOMOYO from "bool" to "tristate", for something went wrong with how Makefile is evaluated if I choose "tristate". Add proxy.c for serving as a bridge between vmlinux and tomoyo.ko . Move callback functions from init.c to proxy.c when building as a loadable LSM module. init.c is built-in part and remains for reserving static call slots. proxy.c contains module's init function and tells init.c location of callback functions, making it possible to use static call for tomoyo.ko . By deferring initialization of "struct tomoyo_task" until tomoyo.ko is loaded, threads created between init.c reserved LSM hooks and proxy.c updates LSM hooks will have NULL "struct tomoyo_task" instances. Assuming that tomoyo.ko is loaded by the moment when the global init process starts, initialize "struct tomoyo_task" instance for current thread as a kernel thread when tomoyo_task(current) is called for the first time. There is a hack for exporting currently not-exported functions. This hack will be removed after all relevant functions are exported. Link: https://bugzilla.redhat.com/show_bug.cgi?id=542986 [1] Link: https://lkml.kernel.org/r/caafb609-8bef-4840-a080-81537356fc60@I-love.SAKURA.ne.jp [2] Signed-off-by: Tetsuo Handa --- security/tomoyo/Kconfig | 15 ++ security/tomoyo/Makefile | 8 +- security/tomoyo/common.c | 14 +- security/tomoyo/common.h | 72 +++++++++ security/tomoyo/init.c | 262 ++++++++++++++++++++++++++++++++ security/tomoyo/load_policy.c | 12 ++ security/tomoyo/proxy.c | 82 ++++++++++ security/tomoyo/securityfs_if.c | 6 +- 8 files changed, 467 insertions(+), 4 deletions(-) create mode 100644 security/tomoyo/proxy.c diff --git a/security/tomoyo/Kconfig b/security/tomoyo/Kconfig index 1e0dd1a6d0b0..90eccc6cd464 100644 --- a/security/tomoyo/Kconfig +++ b/security/tomoyo/Kconfig @@ -13,6 +13,21 @@ config SECURITY_TOMOYO found at . If you are unsure how to answer this question, answer N. +config SECURITY_TOMOYO_LKM + bool "Cut out most of TOMOYO's code to a loadable kernel module" + default n + depends on SECURITY_TOMOYO + depends on MODULES + help + Say Y here if you want to include TOMOYO without bloating + vmlinux file. If you say Y, most of TOMOYO code is cut out to + a loadable kernel module named tomoyo.ko . This option will be + useful for kernels built by Linux distributors where TOMOYO is + included but TOMOYO is not enabled by default. Please be sure + to explicitly load tomoyo.ko if you want to activate TOMOYO + without calling userspace policy loader, for tomoyo.ko is + loaded immediately before calling userspace policy loader. + config SECURITY_TOMOYO_MAX_ACCEPT_ENTRY int "Default maximal count for learning mode" default 2048 diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile index e573a5cad732..287a7d16fa15 100644 --- a/security/tomoyo/Makefile +++ b/security/tomoyo/Makefile @@ -1,5 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y = audit.o common.o condition.o domain.o environ.o file.o gc.o group.o init.o load_policy.o memory.o mount.o network.o realpath.o securityfs_if.o util.o +tomoyo-objs := audit.o common.o condition.o domain.o environ.o file.o gc.o group.o memory.o mount.o network.o proxy.o realpath.o securityfs_if.o util.o +obj-y += init.o load_policy.o +ifdef CONFIG_SECURITY_TOMOYO_LKM +obj-m += tomoyo.o +else +obj-y += tomoyo.o +endif targets += builtin-policy.h diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index 5c7b059a332a..c0ef014f8009 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -998,8 +998,13 @@ static bool tomoyo_select_domain(struct tomoyo_io_buffer *head, p = find_task_by_pid_ns(pid, &init_pid_ns); else p = find_task_by_vpid(pid); - if (p) + if (p) { domain = tomoyo_task(p)->domain_info; +#ifdef CONFIG_SECURITY_TOMOYO_LKM + if (!domain) + domain = &tomoyo_kernel_domain; +#endif + } rcu_read_unlock(); } else if (!strncmp(data, "domain=", 7)) { if (tomoyo_domain_def(data + 7)) @@ -1710,8 +1715,13 @@ static void tomoyo_read_pid(struct tomoyo_io_buffer *head) p = find_task_by_pid_ns(pid, &init_pid_ns); else p = find_task_by_vpid(pid); - if (p) + if (p) { domain = tomoyo_task(p)->domain_info; +#ifdef CONFIG_SECURITY_TOMOYO_LKM + if (!domain) + domain = &tomoyo_kernel_domain; +#endif + } rcu_read_unlock(); if (!domain) return; diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 0e8e2e959aef..4f6c52a9f478 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -978,6 +978,7 @@ int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile, int tomoyo_init_request_info(struct tomoyo_request_info *r, struct tomoyo_domain_info *domain, const u8 index); +int __init tomoyo_interface_init(void); int tomoyo_mkdev_perm(const u8 operation, const struct path *path, const unsigned int mode, unsigned int dev); int tomoyo_mount_permission(const char *dev_name, const struct path *path, @@ -1214,10 +1215,14 @@ static inline void tomoyo_put_group(struct tomoyo_group *group) * * Returns pointer to "struct tomoyo_task" for specified thread. */ +#ifdef CONFIG_SECURITY_TOMOYO_LKM +extern struct tomoyo_task *tomoyo_task(struct task_struct *task); +#else static inline struct tomoyo_task *tomoyo_task(struct task_struct *task) { return task->security + tomoyo_blob_sizes.lbs_task; } +#endif /** * tomoyo_same_name_union - Check for duplicated "struct tomoyo_name_union" entry. @@ -1284,4 +1289,71 @@ static inline struct tomoyo_policy_namespace *tomoyo_current_namespace(void) pos = srcu_dereference((head)->next, &tomoyo_ss); \ for ( ; pos != (head); pos = srcu_dereference(pos->next, &tomoyo_ss)) +#ifdef CONFIG_SECURITY_TOMOYO_LKM + +#define LSM_HOOK(RET, DEFAULT, NAME, ...) typedef RET (NAME##_t)(__VA_ARGS__); +#include +#undef LSM_HOOK + +struct tomoyo_hooks { + cred_prepare_t *cred_prepare; + bprm_committed_creds_t *bprm_committed_creds; + task_alloc_t *task_alloc; + task_free_t *task_free; + bprm_check_security_t *bprm_check_security; + file_fcntl_t *file_fcntl; + file_open_t *file_open; + file_truncate_t *file_truncate; + path_truncate_t *path_truncate; + path_unlink_t *path_unlink; + path_mkdir_t *path_mkdir; + path_rmdir_t *path_rmdir; + path_symlink_t *path_symlink; + path_mknod_t *path_mknod; + path_link_t *path_link; + path_rename_t *path_rename; + inode_getattr_t *inode_getattr; + file_ioctl_t *file_ioctl; + file_ioctl_compat_t *file_ioctl_compat; + path_chmod_t *path_chmod; + path_chown_t *path_chown; + path_chroot_t *path_chroot; + sb_mount_t *sb_mount; + sb_umount_t *sb_umount; + sb_pivotroot_t *sb_pivotroot; + socket_bind_t *socket_bind; + socket_connect_t *socket_connect; + socket_listen_t *socket_listen; + socket_sendmsg_t *socket_sendmsg; +}; + +extern void tomoyo_register_hooks(const struct tomoyo_hooks *tomoyo_hooks); + +struct tomoyo_operations { + void (*check_profile)(void); + int enabled; +}; + +extern struct tomoyo_operations tomoyo_ops; + +/* + * Temporary hack: functions needed by tomoyo.ko . This will be removed + * after all functions are marked as EXPORT_STMBOL_GPL(). + */ +struct tomoyo_tmp_exports { + struct task_struct * (*find_task_by_vpid)(pid_t nr); + struct task_struct * (*find_task_by_pid_ns)(pid_t nr, struct pid_namespace *ns); + void (*put_filesystem)(struct file_system_type *fs); + struct file * (*get_mm_exe_file)(struct mm_struct *mm); + char * (*d_absolute_path)(const struct path *path, char *buf, int buflen); +}; +extern const struct tomoyo_tmp_exports tomoyo_tmp_exports; +#define find_task_by_vpid tomoyo_tmp_exports.find_task_by_vpid +#define find_task_by_pid_ns tomoyo_tmp_exports.find_task_by_pid_ns +#define put_filesystem tomoyo_tmp_exports.put_filesystem +#define get_mm_exe_file tomoyo_tmp_exports.get_mm_exe_file +#define d_absolute_path tomoyo_tmp_exports.d_absolute_path + +#endif /* defined(CONFIG_SECURITY_TOMOYO_LKM) */ + #endif /* !defined(_SECURITY_TOMOYO_COMMON_H) */ diff --git a/security/tomoyo/init.c b/security/tomoyo/init.c index 2b38dbc7a559..034e7db22d4e 100644 --- a/security/tomoyo/init.c +++ b/security/tomoyo/init.c @@ -9,8 +9,248 @@ #include #include "common.h" +#ifndef CONFIG_SECURITY_TOMOYO_LKM + #include "hooks.h" +#else + +#define DEFINE_STATIC_CALL_PROXY(NAME) \ + static NAME##_t tomoyo_##NAME; \ + DEFINE_STATIC_CALL_RET0(tomoyo_##NAME, tomoyo_##NAME); +DEFINE_STATIC_CALL_PROXY(cred_prepare) +DEFINE_STATIC_CALL_PROXY(bprm_committed_creds) +DEFINE_STATIC_CALL_PROXY(bprm_check_security) +DEFINE_STATIC_CALL_PROXY(inode_getattr) +DEFINE_STATIC_CALL_PROXY(path_truncate) +DEFINE_STATIC_CALL_PROXY(file_truncate) +DEFINE_STATIC_CALL_PROXY(path_unlink) +DEFINE_STATIC_CALL_PROXY(path_mkdir) +DEFINE_STATIC_CALL_PROXY(path_rmdir) +DEFINE_STATIC_CALL_PROXY(path_symlink) +DEFINE_STATIC_CALL_PROXY(path_mknod) +DEFINE_STATIC_CALL_PROXY(path_link) +DEFINE_STATIC_CALL_PROXY(path_rename) +DEFINE_STATIC_CALL_PROXY(file_fcntl) +DEFINE_STATIC_CALL_PROXY(file_open) +DEFINE_STATIC_CALL_PROXY(file_ioctl) +DEFINE_STATIC_CALL_PROXY(path_chmod) +DEFINE_STATIC_CALL_PROXY(path_chown) +DEFINE_STATIC_CALL_PROXY(path_chroot) +DEFINE_STATIC_CALL_PROXY(sb_mount) +DEFINE_STATIC_CALL_PROXY(sb_umount) +DEFINE_STATIC_CALL_PROXY(sb_pivotroot) +DEFINE_STATIC_CALL_PROXY(socket_listen) +DEFINE_STATIC_CALL_PROXY(socket_connect) +DEFINE_STATIC_CALL_PROXY(socket_bind) +DEFINE_STATIC_CALL_PROXY(socket_sendmsg) +DEFINE_STATIC_CALL_PROXY(task_alloc) +DEFINE_STATIC_CALL_PROXY(task_free) +#undef DEFINE_STATIC_CALL_PROXY + +static int tomoyo_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp) +{ + return static_call(tomoyo_cred_prepare)(new, old, gfp); +} + +static void tomoyo_bprm_committed_creds(const struct linux_binprm *bprm) +{ + static_call(tomoyo_bprm_committed_creds)(bprm); +} + +static int tomoyo_bprm_check_security(struct linux_binprm *bprm) +{ + return static_call(tomoyo_bprm_check_security)(bprm); +} + +static int tomoyo_inode_getattr(const struct path *path) +{ + return static_call(tomoyo_inode_getattr)(path); +} + +static int tomoyo_path_truncate(const struct path *path) +{ + return static_call(tomoyo_path_truncate)(path); +} + +static int tomoyo_file_truncate(struct file *file) +{ + return static_call(tomoyo_file_truncate)(file); +} + +static int tomoyo_path_unlink(const struct path *parent, struct dentry *dentry) +{ + return static_call(tomoyo_path_unlink)(parent, dentry); +} + +static int tomoyo_path_mkdir(const struct path *parent, struct dentry *dentry, umode_t mode) +{ + return static_call(tomoyo_path_mkdir)(parent, dentry, mode); +} + +static int tomoyo_path_rmdir(const struct path *parent, struct dentry *dentry) +{ + return static_call(tomoyo_path_rmdir)(parent, dentry); +} + +static int tomoyo_path_symlink(const struct path *parent, struct dentry *dentry, + const char *old_name) +{ + return static_call(tomoyo_path_symlink)(parent, dentry, old_name); +} + +static int tomoyo_path_mknod(const struct path *parent, struct dentry *dentry, + umode_t mode, unsigned int dev) +{ + return static_call(tomoyo_path_mknod)(parent, dentry, mode, dev); +} + +static int tomoyo_path_link(struct dentry *old_dentry, const struct path *new_dir, + struct dentry *new_dentry) +{ + return static_call(tomoyo_path_link)(old_dentry, new_dir, new_dentry); +} + +static int tomoyo_path_rename(const struct path *old_parent, struct dentry *old_dentry, + const struct path *new_parent, struct dentry *new_dentry, + const unsigned int flags) +{ + return static_call(tomoyo_path_rename)(old_parent, old_dentry, new_parent, new_dentry, flags); +} + +static int tomoyo_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return static_call(tomoyo_file_fcntl)(file, cmd, arg); +} + +static int tomoyo_file_open(struct file *f) +{ + return static_call(tomoyo_file_open)(f); +} + +static int tomoyo_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return static_call(tomoyo_file_ioctl)(file, cmd, arg); +} + +static int tomoyo_path_chmod(const struct path *path, umode_t mode) +{ + return static_call(tomoyo_path_chmod)(path, mode); +} + +static int tomoyo_path_chown(const struct path *path, kuid_t uid, kgid_t gid) +{ + return static_call(tomoyo_path_chown)(path, uid, gid); +} + +static int tomoyo_path_chroot(const struct path *path) +{ + return static_call(tomoyo_path_chroot)(path); +} + +static int tomoyo_sb_mount(const char *dev_name, const struct path *path, + const char *type, unsigned long flags, void *data) +{ + return static_call(tomoyo_sb_mount)(dev_name, path, type, flags, data); +} + +static int tomoyo_sb_umount(struct vfsmount *mnt, int flags) +{ + return static_call(tomoyo_sb_umount)(mnt, flags); +} + +static int tomoyo_sb_pivotroot(const struct path *old_path, const struct path *new_path) +{ + return static_call(tomoyo_sb_pivotroot)(old_path, new_path); +} + +static int tomoyo_socket_listen(struct socket *sock, int backlog) +{ + return static_call(tomoyo_socket_listen)(sock, backlog); +} + +static int tomoyo_socket_connect(struct socket *sock, struct sockaddr *addr, int addr_len) +{ + return static_call(tomoyo_socket_connect)(sock, addr, addr_len); +} + +static int tomoyo_socket_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +{ + return static_call(tomoyo_socket_bind)(sock, addr, addr_len); +} + +static int tomoyo_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) +{ + return static_call(tomoyo_socket_sendmsg)(sock, msg, size); +} + +static int tomoyo_task_alloc(struct task_struct *task, unsigned long clone_flags) +{ + return static_call(tomoyo_task_alloc)(task, clone_flags); +} + +static void tomoyo_task_free(struct task_struct *task) +{ + static_call(tomoyo_task_free)(task); +} + +void tomoyo_register_hooks(const struct tomoyo_hooks *tomoyo_hooks) +{ + static void *registered; + + if (cmpxchg(®istered, NULL, ®istered)) + panic("%s was called twice!\n", __func__); + static_call_update(tomoyo_task_free, tomoyo_hooks->task_free); + static_call_update(tomoyo_task_alloc, tomoyo_hooks->task_alloc); + static_call_update(tomoyo_cred_prepare, tomoyo_hooks->cred_prepare); + static_call_update(tomoyo_bprm_committed_creds, tomoyo_hooks->bprm_committed_creds); + static_call_update(tomoyo_bprm_check_security, tomoyo_hooks->bprm_check_security); + static_call_update(tomoyo_inode_getattr, tomoyo_hooks->inode_getattr); + static_call_update(tomoyo_path_truncate, tomoyo_hooks->path_truncate); + static_call_update(tomoyo_file_truncate, tomoyo_hooks->file_truncate); + static_call_update(tomoyo_path_unlink, tomoyo_hooks->path_unlink); + static_call_update(tomoyo_path_mkdir, tomoyo_hooks->path_mkdir); + static_call_update(tomoyo_path_rmdir, tomoyo_hooks->path_rmdir); + static_call_update(tomoyo_path_symlink, tomoyo_hooks->path_symlink); + static_call_update(tomoyo_path_mknod, tomoyo_hooks->path_mknod); + static_call_update(tomoyo_path_link, tomoyo_hooks->path_link); + static_call_update(tomoyo_path_rename, tomoyo_hooks->path_rename); + static_call_update(tomoyo_file_fcntl, tomoyo_hooks->file_fcntl); + static_call_update(tomoyo_file_open, tomoyo_hooks->file_open); + static_call_update(tomoyo_file_ioctl, tomoyo_hooks->file_ioctl); + static_call_update(tomoyo_path_chmod, tomoyo_hooks->path_chmod); + static_call_update(tomoyo_path_chown, tomoyo_hooks->path_chown); + static_call_update(tomoyo_path_chroot, tomoyo_hooks->path_chroot); + static_call_update(tomoyo_sb_mount, tomoyo_hooks->sb_mount); + static_call_update(tomoyo_sb_umount, tomoyo_hooks->sb_umount); + static_call_update(tomoyo_sb_pivotroot, tomoyo_hooks->sb_pivotroot); + static_call_update(tomoyo_socket_listen, tomoyo_hooks->socket_listen); + static_call_update(tomoyo_socket_connect, tomoyo_hooks->socket_connect); + static_call_update(tomoyo_socket_bind, tomoyo_hooks->socket_bind); + static_call_update(tomoyo_socket_sendmsg, tomoyo_hooks->socket_sendmsg); +} +EXPORT_SYMBOL_GPL(tomoyo_register_hooks); + +/* + * Temporary hack: functions needed by tomoyo.ko . This hack will be removed + * after all functions are marked as EXPORT_STMBOL_GPL(). + */ +#undef find_task_by_vpid +#undef find_task_by_pid_ns +#undef put_filesystem +#undef get_mm_exe_file +#undef d_absolute_path +const struct tomoyo_tmp_exports tomoyo_tmp_exports = { + .find_task_by_vpid = find_task_by_vpid, + .find_task_by_pid_ns = find_task_by_pid_ns, + .put_filesystem = put_filesystem, + .get_mm_exe_file = get_mm_exe_file, + .d_absolute_path = d_absolute_path, +}; +EXPORT_SYMBOL_GPL(tomoyo_tmp_exports); + +#endif + #ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER static int tomoyo_bprm_creds_for_exec(struct linux_binprm *bprm) { @@ -74,6 +314,27 @@ int tomoyo_enabled __ro_after_init = 1; /* Has /sbin/init started? */ bool tomoyo_policy_loaded; +#ifdef CONFIG_SECURITY_TOMOYO_LKM +EXPORT_SYMBOL_GPL(tomoyo_blob_sizes); +EXPORT_SYMBOL_GPL(tomoyo_policy_loaded); + +struct tomoyo_operations tomoyo_ops; +EXPORT_SYMBOL_GPL(tomoyo_ops); + +/** + * tomoyo_init - Reserve hooks for TOMOYO Linux. + * + * Returns 0. + */ +static int __init tomoyo_init(void) +{ + /* register ourselves with the security framework */ + security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), &tomoyo_lsmid); + tomoyo_ops.enabled = tomoyo_enabled; + pr_info("Hooks for initializing TOMOYO Linux are ready\n"); + return 0; +} +#else /** * tomoyo_init - Register TOMOYO Linux as a LSM module. * @@ -94,6 +355,7 @@ static int __init tomoyo_init(void) return 0; } +#endif DEFINE_LSM(tomoyo) = { .name = "tomoyo", diff --git a/security/tomoyo/load_policy.c b/security/tomoyo/load_policy.c index 363b65be87ab..6a2a72354a64 100644 --- a/security/tomoyo/load_policy.c +++ b/security/tomoyo/load_policy.c @@ -97,6 +97,14 @@ void tomoyo_load_policy(const char *filename) if (!tomoyo_policy_loader_exists()) return; done = true; +#ifdef CONFIG_SECURITY_TOMOYO_LKM + /* Load tomoyo.ko if not yet loaded. */ + if (!tomoyo_ops.check_profile) + request_module("tomoyo"); + /* Check if tomoyo.ko was successfully loaded. */ + if (!tomoyo_ops.check_profile) + panic("Failed to load tomoyo module."); +#endif pr_info("Calling %s to load policy. Please wait.\n", tomoyo_loader); argv[0] = (char *) tomoyo_loader; argv[1] = NULL; @@ -104,7 +112,11 @@ void tomoyo_load_policy(const char *filename) envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp[2] = NULL; call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); +#ifdef CONFIG_SECURITY_TOMOYO_LKM + tomoyo_ops.check_profile(); +#else tomoyo_check_profile(); +#endif } #endif diff --git a/security/tomoyo/proxy.c b/security/tomoyo/proxy.c new file mode 100644 index 000000000000..1618cc0f2af8 --- /dev/null +++ b/security/tomoyo/proxy.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * security/tomoyo/proxy.c + * + * Copyright (C) 2005-2011 NTT DATA CORPORATION + */ + +#include +#include "common.h" + +#ifdef CONFIG_SECURITY_TOMOYO_LKM + +struct tomoyo_task *tomoyo_task(struct task_struct *task) +{ + struct tomoyo_task *s = task->security + tomoyo_blob_sizes.lbs_task; + + if (unlikely(!s->domain_info)) { + if (likely(task == current)) { + s->domain_info = &tomoyo_kernel_domain; + atomic_inc(&tomoyo_kernel_domain.users); + } else { + /* Caller handles s->domain_info == NULL case. */ + } + } + return s; +} + +#include "hooks.h" + +/** + * tomoyo_runtime_init - Register TOMOYO Linux as a loadable LSM module. + * + * Returns 0 if TOMOYO is enabled, -EINVAL otherwise. + */ +static int __init tomoyo_runtime_init(void) +{ + const struct tomoyo_hooks tomoyo_hooks = { + .cred_prepare = tomoyo_cred_prepare, + .bprm_committed_creds = tomoyo_bprm_committed_creds, + .task_alloc = tomoyo_task_alloc, + .task_free = tomoyo_task_free, + .bprm_check_security = tomoyo_bprm_check_security, + .file_fcntl = tomoyo_file_fcntl, + .file_open = tomoyo_file_open, + .file_truncate = tomoyo_file_truncate, + .path_truncate = tomoyo_path_truncate, + .path_unlink = tomoyo_path_unlink, + .path_mkdir = tomoyo_path_mkdir, + .path_rmdir = tomoyo_path_rmdir, + .path_symlink = tomoyo_path_symlink, + .path_mknod = tomoyo_path_mknod, + .path_link = tomoyo_path_link, + .path_rename = tomoyo_path_rename, + .inode_getattr = tomoyo_inode_getattr, + .file_ioctl = tomoyo_file_ioctl, + .file_ioctl_compat = tomoyo_file_ioctl, + .path_chmod = tomoyo_path_chmod, + .path_chown = tomoyo_path_chown, + .path_chroot = tomoyo_path_chroot, + .sb_mount = tomoyo_sb_mount, + .sb_umount = tomoyo_sb_umount, + .sb_pivotroot = tomoyo_sb_pivotroot, + .socket_bind = tomoyo_socket_bind, + .socket_connect = tomoyo_socket_connect, + .socket_listen = tomoyo_socket_listen, + .socket_sendmsg = tomoyo_socket_sendmsg, + }; + + if (!tomoyo_ops.enabled) + return -EINVAL; + tomoyo_ops.check_profile = tomoyo_check_profile; + pr_info("TOMOYO Linux initialized\n"); + tomoyo_task(current); + tomoyo_mm_init(); + tomoyo_interface_init(); + tomoyo_register_hooks(&tomoyo_hooks); + return 0; +} +module_init(tomoyo_runtime_init); +MODULE_LICENSE("GPL"); + +#endif diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c index 7e69747b2f77..a3b821b7f477 100644 --- a/security/tomoyo/securityfs_if.c +++ b/security/tomoyo/securityfs_if.c @@ -233,13 +233,15 @@ static void __init tomoyo_create_entry(const char *name, const umode_t mode, * * Returns 0. */ -static int __init tomoyo_interface_init(void) +int __init tomoyo_interface_init(void) { struct tomoyo_domain_info *domain; struct dentry *tomoyo_dir; +#ifndef CONFIG_SECURITY_TOMOYO_LKM if (!tomoyo_enabled) return 0; +#endif domain = tomoyo_domain(); /* Don't create securityfs entries unless registered. */ if (domain != &tomoyo_kernel_domain) @@ -270,4 +272,6 @@ static int __init tomoyo_interface_init(void) return 0; } +#ifndef CONFIG_SECURITY_TOMOYO_LKM fs_initcall(tomoyo_interface_init); +#endif From 5d69d5a00f80488ddcb4dee7d1374a0709398178 Mon Sep 17 00:00:00 2001 From: Kimriver Liu Date: Fri, 13 Sep 2024 11:31:46 +0800 Subject: [PATCH 431/655] i2c: designware: fix controller is holding SCL low while ENABLE bit is disabled It was observed that issuing the ABORT bit (IC_ENABLE[1]) will not work when IC_ENABLE is already disabled. Check if the ENABLE bit (IC_ENABLE[0]) is disabled when the controller is holding SCL low. If the ENABLE bit is disabled, the software needs to enable it before trying to issue the ABORT bit. otherwise, the controller ignores any write to ABORT bit. These kernel logs show up whenever an I2C transaction is attempted after this failure. i2c_designware e95e0000.i2c: timeout waiting for bus ready i2c_designware e95e0000.i2c: timeout in disabling adapter The patch fixes the issue where the controller cannot be disabled while SCL is held low if the ENABLE bit is already disabled. Fixes: 2409205acd3c ("i2c: designware: fix __i2c_dw_disable() in case master is holding SCL low") Signed-off-by: Kimriver Liu Cc: # v6.6+ Reviewed-by: Mika Westerberg Acked-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-designware-common.c | 14 ++++++++ drivers/i2c/busses/i2c-designware-core.h | 1 + drivers/i2c/busses/i2c-designware-master.c | 38 ++++++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index 080204182bb5..f31d352d98b5 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -523,6 +523,7 @@ int i2c_dw_set_sda_hold(struct dw_i2c_dev *dev) void __i2c_dw_disable(struct dw_i2c_dev *dev) { + struct i2c_timings *t = &dev->timings; unsigned int raw_intr_stats; unsigned int enable; int timeout = 100; @@ -535,6 +536,19 @@ void __i2c_dw_disable(struct dw_i2c_dev *dev) abort_needed = raw_intr_stats & DW_IC_INTR_MST_ON_HOLD; if (abort_needed) { + if (!(enable & DW_IC_ENABLE_ENABLE)) { + regmap_write(dev->map, DW_IC_ENABLE, DW_IC_ENABLE_ENABLE); + /* + * Wait 10 times the signaling period of the highest I2C + * transfer supported by the driver (for 400KHz this is + * 25us) to ensure the I2C ENABLE bit is already set + * as described in the DesignWare I2C databook. + */ + fsleep(DIV_ROUND_CLOSEST_ULL(10 * MICRO, t->bus_freq_hz)); + /* Set ENABLE bit before setting ABORT */ + enable |= DW_IC_ENABLE_ENABLE; + } + regmap_write(dev->map, DW_IC_ENABLE, enable | DW_IC_ENABLE_ABORT); ret = regmap_read_poll_timeout(dev->map, DW_IC_ENABLE, enable, !(enable & DW_IC_ENABLE_ABORT), 10, diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 1ac2afd03a0a..8e8854ec9882 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -108,6 +108,7 @@ DW_IC_INTR_RX_UNDER | \ DW_IC_INTR_RD_REQ) +#define DW_IC_ENABLE_ENABLE BIT(0) #define DW_IC_ENABLE_ABORT BIT(1) #define DW_IC_STATUS_ACTIVITY BIT(0) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index e46f1b22c360..e8ac9a7bf0b3 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -271,6 +271,34 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) __i2c_dw_write_intr_mask(dev, DW_IC_INTR_MASTER_MASK); } +/* + * This function waits for the controller to be idle before disabling I2C + * When the controller is not in the IDLE state, the MST_ACTIVITY bit + * (IC_STATUS[5]) is set. + * + * Values: + * 0x1 (ACTIVE): Controller not idle + * 0x0 (IDLE): Controller is idle + * + * The function is called after completing the current transfer. + * + * Returns: + * False when the controller is in the IDLE state. + * True when the controller is in the ACTIVE state. + */ +static bool i2c_dw_is_controller_active(struct dw_i2c_dev *dev) +{ + u32 status; + + regmap_read(dev->map, DW_IC_STATUS, &status); + if (!(status & DW_IC_STATUS_MASTER_ACTIVITY)) + return false; + + return regmap_read_poll_timeout(dev->map, DW_IC_STATUS, status, + !(status & DW_IC_STATUS_MASTER_ACTIVITY), + 1100, 20000) != 0; +} + static int i2c_dw_check_stopbit(struct dw_i2c_dev *dev) { u32 val; @@ -806,6 +834,16 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) goto done; } + /* + * This happens rarely (~1:500) and is hard to reproduce. Debug trace + * showed that IC_STATUS had value of 0x23 when STOP_DET occurred, + * if disable IC_ENABLE.ENABLE immediately that can result in + * IC_RAW_INTR_STAT.MASTER_ON_HOLD holding SCL low. Check if + * controller is still ACTIVE before disabling I2C. + */ + if (i2c_dw_is_controller_active(dev)) + dev_err(dev->dev, "controller active\n"); + /* * We must disable the adapter before returning and signaling the end * of the current transfer. Otherwise the hardware might continue From f2990f8630531a99cad4dc5c44cb2a11ded42492 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 12 Sep 2024 12:46:31 +0200 Subject: [PATCH 432/655] i2c: synquacer: Deal with optional PCLK correctly ACPI boot does not provide clocks and regulators, but instead, provides the PCLK rate directly, and enables the clock in firmware. So deal gracefully with this. Fixes: 55750148e559 ("i2c: synquacer: Fix an error handling path in synquacer_i2c_probe()") Cc: stable@vger.kernel.org # v6.10+ Cc: Andi Shyti Cc: Christophe JAILLET Signed-off-by: Ard Biesheuvel Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-synquacer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c index 4eccbcd0fbfc..bbb9062669e4 100644 --- a/drivers/i2c/busses/i2c-synquacer.c +++ b/drivers/i2c/busses/i2c-synquacer.c @@ -550,12 +550,13 @@ static int synquacer_i2c_probe(struct platform_device *pdev) device_property_read_u32(&pdev->dev, "socionext,pclk-rate", &i2c->pclkrate); - pclk = devm_clk_get_enabled(&pdev->dev, "pclk"); + pclk = devm_clk_get_optional_enabled(&pdev->dev, "pclk"); if (IS_ERR(pclk)) return dev_err_probe(&pdev->dev, PTR_ERR(pclk), "failed to get and enable clock\n"); - i2c->pclkrate = clk_get_rate(pclk); + if (pclk) + i2c->pclkrate = clk_get_rate(pclk); if (i2c->pclkrate < SYNQUACER_I2C_MIN_CLK_RATE || i2c->pclkrate > SYNQUACER_I2C_MAX_CLK_RATE) From c085f6ca956f75d40422db96eaa6298867db8dca Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 29 Jul 2024 16:02:02 +0800 Subject: [PATCH 433/655] ceph: rename ceph_flush_cap_releases() to ceph_flush_session_cap_releases() Prepare for adding a helper to flush the cap releases for all sessions. Signed-off-by: Xiubo Li Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 2 +- fs/ceph/mds_client.c | 10 +++++----- fs/ceph/mds_client.h | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 6561a6cd94de..888bf9a290a8 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4603,7 +4603,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, __ceph_queue_cap_release(session, cap); spin_unlock(&session->s_cap_lock); } - ceph_flush_cap_releases(mdsc, session); + ceph_flush_session_cap_releases(mdsc, session); goto done; bad: diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 276e34ab3e2c..36c5886b5aac 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2266,7 +2266,7 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc, trim_caps - remaining); } - ceph_flush_cap_releases(mdsc, session); + ceph_flush_session_cap_releases(mdsc, session); return 0; } @@ -2420,7 +2420,7 @@ static void ceph_cap_release_work(struct work_struct *work) ceph_put_mds_session(session); } -void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, +void ceph_flush_session_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { struct ceph_client *cl = mdsc->fsc->client; @@ -2447,7 +2447,7 @@ void __ceph_queue_cap_release(struct ceph_mds_session *session, session->s_num_cap_releases++; if (!(session->s_num_cap_releases % CEPH_CAPS_PER_RELEASE)) - ceph_flush_cap_releases(session->s_mdsc, session); + ceph_flush_session_cap_releases(session->s_mdsc, session); } static void ceph_cap_reclaim_work(struct work_struct *work) @@ -4340,7 +4340,7 @@ static void handle_session(struct ceph_mds_session *session, /* flush cap releases */ spin_lock(&session->s_cap_lock); if (session->s_num_cap_releases) - ceph_flush_cap_releases(mdsc, session); + ceph_flush_session_cap_releases(mdsc, session); spin_unlock(&session->s_cap_lock); send_flushmsg_ack(mdsc, session, seq); @@ -5446,7 +5446,7 @@ static void delayed_work(struct work_struct *work) } mutex_unlock(&mdsc->mutex); - ceph_flush_cap_releases(mdsc, s); + ceph_flush_session_cap_releases(mdsc, s); mutex_lock(&s->s_mutex); if (renew_caps) diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 585ab5a6d87d..3dd54587944a 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -599,8 +599,8 @@ extern void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc, extern struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq); extern void __ceph_queue_cap_release(struct ceph_mds_session *session, struct ceph_cap *cap); -extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session); +extern void ceph_flush_session_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session); extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr); extern void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc); From adc52461767f675264f2876d61e7220c113023e8 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 29 Jul 2024 16:04:11 +0800 Subject: [PATCH 434/655] ceph: flush all caps releases when syncing the whole filesystem We have hit a race between cap releases and cap revoke request that will cause the check_caps() to miss sending a cap revoke ack to MDS. And the client will depend on the cap release to release that revoking caps, which could be delayed for some unknown reasons. In Kclient we have figured out the RCA about race and we need a way to explictly trigger this manually could help to get rid of the caps revoke stuck issue. Link: https://tracker.ceph.com/issues/67221 Signed-off-by: Xiubo Li Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 22 ++++++++++++++++++++++ fs/ceph/mds_client.c | 1 + fs/ceph/super.c | 1 + fs/ceph/super.h | 1 + 4 files changed, 25 insertions(+) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 888bf9a290a8..329516b1eaff 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4702,6 +4702,28 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true); } +/* + * Flush all cap releases to the mds + */ +static void flush_cap_releases(struct ceph_mds_session *s) +{ + struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; + + doutc(cl, "begin\n"); + spin_lock(&s->s_cap_lock); + if (s->s_num_cap_releases) + ceph_flush_session_cap_releases(mdsc, s); + spin_unlock(&s->s_cap_lock); + doutc(cl, "done\n"); + +} + +void ceph_flush_cap_releases(struct ceph_mds_client *mdsc) +{ + ceph_mdsc_iterate_sessions(mdsc, flush_cap_releases, true); +} + void __ceph_touch_fmode(struct ceph_inode_info *ci, struct ceph_mds_client *mdsc, int fmode) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 36c5886b5aac..9682c6ae3866 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -5877,6 +5877,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) mutex_unlock(&mdsc->mutex); ceph_flush_dirty_caps(mdsc); + ceph_flush_cap_releases(mdsc); spin_lock(&mdsc->cap_dirty_lock); want_flush = mdsc->last_cap_flush_tid; if (!list_empty(&mdsc->cap_flush_list)) { diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 0cdf84cd1791..73f321b52895 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -126,6 +126,7 @@ static int ceph_sync_fs(struct super_block *sb, int wait) if (!wait) { doutc(cl, "(non-blocking)\n"); ceph_flush_dirty_caps(fsc->mdsc); + ceph_flush_cap_releases(fsc->mdsc); doutc(cl, "(non-blocking) done\n"); return 0; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c88bf53f68e9..0020746622fd 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1268,6 +1268,7 @@ extern bool __ceph_should_report_size(struct ceph_inode_info *ci); extern void ceph_check_caps(struct ceph_inode_info *ci, int flags); extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc); extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc); +extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc); extern int ceph_drop_caps_for_unlink(struct inode *inode); extern int ceph_encode_inode_release(void **p, struct inode *inode, int mds, int drop, int unless, int force); From d97079e97eab20e08afc507f2bed4501e2824717 Mon Sep 17 00:00:00 2001 From: "Luis Henriques (SUSE)" Date: Mon, 19 Aug 2024 10:52:17 +0100 Subject: [PATCH 435/655] ceph: fix a memory leak on cap_auths in MDS client The cap_auths that are allocated during an MDS session opening are never released, causing a memory leak detected by kmemleak. Fix this by freeing the memory allocated when shutting down the MDS client. Fixes: 1d17de9534cb ("ceph: save cap_auths in MDS client when session is opened") Signed-off-by: Luis Henriques (SUSE) Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9682c6ae3866..59eb13aabc81 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -6016,6 +6016,18 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) ceph_mdsmap_destroy(mdsc->mdsmap); kfree(mdsc->sessions); ceph_caps_finalize(mdsc); + + if (mdsc->s_cap_auths) { + int i; + + for (i = 0; i < mdsc->s_cap_auths_num; i++) { + kfree(mdsc->s_cap_auths[i].match.gids); + kfree(mdsc->s_cap_auths[i].match.path); + kfree(mdsc->s_cap_auths[i].match.fs_name); + } + kfree(mdsc->s_cap_auths); + } + ceph_pool_perm_destroy(mdsc); } From 0039aebfe87129fae1e3567cb6de7a99dbb3ba28 Mon Sep 17 00:00:00 2001 From: Yan Zhen Date: Thu, 5 Sep 2024 19:32:27 +0800 Subject: [PATCH 436/655] ceph: Fix typo in the comment Correctly spelled comments make it easier for the reader to understand the code. replace 'tagert' with 'target' in the comment & replace 'vaild' with 'valid' in the comment & replace 'carefull' with 'careful' in the comment & replace 'trsaverse' with 'traverse' in the comment. Signed-off-by: Yan Zhen Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 2 +- fs/ceph/dir.c | 2 +- fs/ceph/inode.c | 2 +- fs/ceph/mds_client.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 329516b1eaff..bed34fc11c91 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4150,7 +4150,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, ceph_remove_cap(mdsc, cap, false); goto out_unlock; } else if (tsession) { - /* add placeholder for the export tagert */ + /* add placeholder for the export target */ int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; tcap = new_cap; ceph_add_cap(inode, tsession, t_cap_id, issued, 0, diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 18c72b305858..e23f8a40f3e3 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -2059,7 +2059,7 @@ static int ceph_d_delete(const struct dentry *dentry) return 0; if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP) return 0; - /* vaild lease? */ + /* valid lease? */ di = ceph_dentry(dentry); if (di) { if (__dentry_lease_is_valid(di)) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 71cd70514efa..5bf2bcf38d8c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1778,7 +1778,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) if (err < 0) goto done; } else if (rinfo->head->is_dentry && req->r_dentry) { - /* parent inode is not locked, be carefull */ + /* parent inode is not locked, be careful */ struct ceph_vino *ptvino = NULL; dvino.ino = le64_to_cpu(rinfo->diri.in->ino); dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 59eb13aabc81..c4a5fd94bbbb 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4910,7 +4910,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, } else { recon_state.msg_version = 2; } - /* trsaverse this session's caps */ + /* traverse this session's caps */ err = ceph_iterate_session_caps(session, reconnect_caps_cb, &recon_state); spin_lock(&session->s_cap_lock); From 74249188f31827cf1eeeee8e06474c2fbe2fc1d2 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Fri, 6 Sep 2024 14:01:34 +0800 Subject: [PATCH 437/655] ceph: Remove empty definition in header file The real definition of ceph_acl_chmod() has been removed since commit 4db658ea0ca2 ("ceph: Fix up after semantic merge conflict"), remain the empty definition untouched in the header files. Let's remove the empty definition. Signed-off-by: Zhang Zekun Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/super.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 0020746622fd..2508aa8950b7 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1206,10 +1206,6 @@ static inline void ceph_init_inode_acls(struct inode *inode, struct ceph_acl_sec_ctx *as_ctx) { } -static inline int ceph_acl_chmod(struct dentry *dentry, struct inode *inode) -{ - return 0; -} static inline void ceph_forget_all_cached_acls(struct inode *inode) { From c08dfb1b49492c09cf13838c71897493ea3b424e Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 5 Sep 2024 06:22:18 +0800 Subject: [PATCH 438/655] ceph: remove the incorrect Fw reference check when dirtying pages When doing the direct-io reads it will also try to mark pages dirty, but for the read path it won't hold the Fw caps and there is case will it get the Fw reference. Fixes: 5dda377cf0a6 ("ceph: set i_head_snapc when getting CEPH_CAP_FILE_WR reference") Signed-off-by: Xiubo Li Reviewed-by: Patrick Donnelly Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index c4744a02db75..0df4623785dd 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -95,7 +95,6 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) /* dirty the head */ spin_lock(&ci->i_ceph_lock); - BUG_ON(ci->i_wr_ref == 0); // caller should hold Fw reference if (__ceph_have_pending_cap_snap(ci)) { struct ceph_cap_snap *capsnap = list_last_entry(&ci->i_cap_snaps, From 42268ad0eb4142245ea40ab01a5690a40e9c3b41 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Sep 2024 11:10:07 -1000 Subject: [PATCH 439/655] sched_ext: Build fix for !CONFIG_SMP move_remote_task_to_local_dsq() is only defined on SMP configs but scx_disaptch_from_dsq() was calling move_remote_task_to_local_dsq() on UP configs too causing build failures. Add a dummy move_remote_task_to_local_dsq() which triggers a warning. Signed-off-by: Tejun Heo Fixes: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409241108.jaocHiDJ-lkp@intel.com/ --- kernel/sched/ext.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index c09e3dc38c34..d74d1fe06999 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2357,6 +2357,7 @@ static bool consume_remote_task(struct rq *this_rq, struct task_struct *p, } } #else /* CONFIG_SMP */ +static inline void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags, struct rq *src_rq, struct rq *dst_rq) { WARN_ON_ONCE(1); } static inline bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, bool trigger_error) { return false; } static inline bool consume_remote_task(struct rq *this_rq, struct task_struct *p, struct scx_dispatch_q *dsq, struct rq *task_rq) { return false; } #endif /* CONFIG_SMP */ From 88801d043b1d16caae76a5e2e5991e8b1f55ce7f Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Tue, 24 Sep 2024 14:14:35 +0800 Subject: [PATCH 440/655] xen/pci: Add a function to reset device for xen When device on dom0 side has been reset, the vpci on Xen side won't get notification, so that the cached state in vpci is all out of date with the real device state. To solve that problem, add a new function to clear all vpci device state when device is reset on dom0 side. And call that function in pcistub_init_device. Because when using "pci-assignable-add" to assign a passthrough device in Xen, it will reset passthrough device and the vpci state will out of date, and then device will fail to restore bar state. Signed-off-by: Jiqian Chen Signed-off-by: Huang Rui Signed-off-by: Jiqian Chen Reviewed-by: Stefano Stabellini Message-ID: <20240924061437.2636766-2-Jiqian.Chen@amd.com> Signed-off-by: Juergen Gross --- drivers/xen/pci.c | 13 +++++++++++++ drivers/xen/xen-pciback/pci_stub.c | 18 +++++++++++++++--- include/xen/interface/physdev.h | 17 +++++++++++++++++ include/xen/pci.h | 6 ++++++ 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index a2facd8f7e51..416f231809cb 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -173,6 +173,19 @@ static int xen_remove_device(struct device *dev) return r; } +int xen_reset_device(const struct pci_dev *dev) +{ + struct pci_device_reset device = { + .dev.seg = pci_domain_nr(dev->bus), + .dev.bus = dev->bus->number, + .dev.devfn = dev->devfn, + .flags = PCI_DEVICE_RESET_FLR, + }; + + return HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_reset, &device); +} +EXPORT_SYMBOL_GPL(xen_reset_device); + static int xen_pci_notifier(struct notifier_block *nb, unsigned long action, void *data) { diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 4faebbb84999..3e162c1753e2 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -89,6 +89,16 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) return psdev; } +static int pcistub_reset_device_state(struct pci_dev *dev) +{ + __pci_reset_function_locked(dev); + + if (!xen_pv_domain()) + return xen_reset_device(dev); + else + return 0; +} + /* Don't call this directly as it's called by pcistub_device_put */ static void pcistub_device_release(struct kref *kref) { @@ -107,7 +117,7 @@ static void pcistub_device_release(struct kref *kref) /* Call the reset function which does not take lock as this * is called from "unbind" which takes a device_lock mutex. */ - __pci_reset_function_locked(dev); + pcistub_reset_device_state(dev); if (dev_data && pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state)) dev_info(&dev->dev, "Could not reload PCI state\n"); @@ -284,7 +294,7 @@ void pcistub_put_pci_dev(struct pci_dev *dev) * (so it's ready for the next domain) */ device_lock_assert(&dev->dev); - __pci_reset_function_locked(dev); + pcistub_reset_device_state(dev); dev_data = pci_get_drvdata(dev); ret = pci_load_saved_state(dev, dev_data->pci_saved_state); @@ -420,7 +430,9 @@ static int pcistub_init_device(struct pci_dev *dev) dev_err(&dev->dev, "Could not store PCI conf saved state!\n"); else { dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n"); - __pci_reset_function_locked(dev); + err = pcistub_reset_device_state(dev); + if (err) + goto config_release; pci_restore_state(dev); } /* Now disable the device (this also ensures some private device diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index a237af867873..df74e65a884b 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -256,6 +256,13 @@ struct physdev_pci_device_add { */ #define PHYSDEVOP_prepare_msix 30 #define PHYSDEVOP_release_msix 31 +/* + * Notify the hypervisor that a PCI device has been reset, so that any + * internally cached state is regenerated. Should be called after any + * device reset performed by the hardware domain. + */ +#define PHYSDEVOP_pci_device_reset 32 + struct physdev_pci_device { /* IN */ uint16_t seg; @@ -263,6 +270,16 @@ struct physdev_pci_device { uint8_t devfn; }; +struct pci_device_reset { + struct physdev_pci_device dev; +#define PCI_DEVICE_RESET_COLD 0x0 +#define PCI_DEVICE_RESET_WARM 0x1 +#define PCI_DEVICE_RESET_HOT 0x2 +#define PCI_DEVICE_RESET_FLR 0x3 +#define PCI_DEVICE_RESET_MASK 0x3 + uint32_t flags; +}; + #define PHYSDEVOP_DBGP_RESET_PREPARE 1 #define PHYSDEVOP_DBGP_RESET_DONE 2 diff --git a/include/xen/pci.h b/include/xen/pci.h index b8337cf85fd1..424b8ea89ca8 100644 --- a/include/xen/pci.h +++ b/include/xen/pci.h @@ -4,10 +4,16 @@ #define __XEN_PCI_H__ #if defined(CONFIG_XEN_DOM0) +int xen_reset_device(const struct pci_dev *dev); int xen_find_device_domain_owner(struct pci_dev *dev); int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); int xen_unregister_device_domain_owner(struct pci_dev *dev); #else +static inline int xen_reset_device(const struct pci_dev *dev) +{ + return -1; +} + static inline int xen_find_device_domain_owner(struct pci_dev *dev) { return -1; From b166b8ab4189743a717cb93f50d6fcca3a46770d Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Tue, 24 Sep 2024 14:14:36 +0800 Subject: [PATCH 441/655] xen/pvh: Setup gsi for passthrough device In PVH dom0, the gsis don't get registered, but the gsi of a passthrough device must be configured for it to be able to be mapped into a domU. When assigning a device to passthrough, proactively setup the gsi of the device during that process. Signed-off-by: Jiqian Chen Signed-off-by: Huang Rui Signed-off-by: Jiqian Chen Reviewed-by: Stefano Stabellini Message-ID: <20240924061437.2636766-3-Jiqian.Chen@amd.com> Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pvh.c | 23 ++++++++++++++ drivers/acpi/pci_irq.c | 2 +- drivers/xen/acpi.c | 50 ++++++++++++++++++++++++++++++ drivers/xen/xen-pciback/pci_stub.c | 20 ++++++++++++ include/linux/acpi.h | 1 + include/xen/acpi.h | 18 +++++++++++ 6 files changed, 113 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 728a4366ca85..bf68c329fc01 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -28,6 +29,28 @@ bool __ro_after_init xen_pvh; EXPORT_SYMBOL_GPL(xen_pvh); +#ifdef CONFIG_XEN_DOM0 +int xen_pvh_setup_gsi(int gsi, int trigger, int polarity) +{ + int ret; + struct physdev_setup_gsi setup_gsi; + + setup_gsi.gsi = gsi; + setup_gsi.triggering = (trigger == ACPI_EDGE_SENSITIVE ? 0 : 1); + setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + + ret = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); + if (ret == -EEXIST) { + xen_raw_printk("Already setup the GSI :%d\n", gsi); + ret = 0; + } else if (ret) + xen_raw_printk("Fail to setup GSI (%d)!\n", gsi); + + return ret; +} +EXPORT_SYMBOL_GPL(xen_pvh_setup_gsi); +#endif + /* * Reserve e820 UNUSABLE regions to inflate the memory balloon. * diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index ff30ceca2203..630fe0a34bc6 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -288,7 +288,7 @@ static int acpi_reroute_boot_interrupt(struct pci_dev *dev, } #endif /* CONFIG_X86_IO_APIC */ -static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) +struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) { struct acpi_prt_entry *entry = NULL; struct pci_dev *bridge; diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c index 6893c79fd2a1..9e2096524fbc 100644 --- a/drivers/xen/acpi.c +++ b/drivers/xen/acpi.c @@ -30,6 +30,7 @@ * IN THE SOFTWARE. */ +#include #include #include #include @@ -75,3 +76,52 @@ int xen_acpi_notify_hypervisor_extended_sleep(u8 sleep_state, return xen_acpi_notify_hypervisor_state(sleep_state, val_a, val_b, true); } + +struct acpi_prt_entry { + struct acpi_pci_id id; + u8 pin; + acpi_handle link; + u32 index; +}; + +int xen_acpi_get_gsi_info(struct pci_dev *dev, + int *gsi_out, + int *trigger_out, + int *polarity_out) +{ + int gsi; + u8 pin; + struct acpi_prt_entry *entry; + int trigger = ACPI_LEVEL_SENSITIVE; + int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ? + ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW; + + if (!dev || !gsi_out || !trigger_out || !polarity_out) + return -EINVAL; + + pin = dev->pin; + if (!pin) + return -EINVAL; + + entry = acpi_pci_irq_lookup(dev, pin); + if (entry) { + if (entry->link) + gsi = acpi_pci_link_allocate_irq(entry->link, + entry->index, + &trigger, &polarity, + NULL); + else + gsi = entry->index; + } else + gsi = -1; + + if (gsi < 0) + return -EINVAL; + + *gsi_out = gsi; + *trigger_out = trigger; + *polarity_out = polarity; + + return 0; +} +EXPORT_SYMBOL_GPL(xen_acpi_get_gsi_info); diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 3e162c1753e2..8ce27333f54b 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -21,6 +21,9 @@ #include #include #include +#ifdef CONFIG_XEN_ACPI +#include +#endif #include #include #include "pciback.h" @@ -367,6 +370,9 @@ static int pcistub_match(struct pci_dev *dev) static int pcistub_init_device(struct pci_dev *dev) { struct xen_pcibk_dev_data *dev_data; +#ifdef CONFIG_XEN_ACPI + int gsi, trigger, polarity; +#endif int err = 0; dev_dbg(&dev->dev, "initializing...\n"); @@ -435,6 +441,20 @@ static int pcistub_init_device(struct pci_dev *dev) goto config_release; pci_restore_state(dev); } + +#ifdef CONFIG_XEN_ACPI + if (xen_initial_domain() && xen_pvh_domain()) { + err = xen_acpi_get_gsi_info(dev, &gsi, &trigger, &polarity); + if (err) { + dev_err(&dev->dev, "Fail to get gsi info!\n"); + goto config_release; + } + err = xen_pvh_setup_gsi(gsi, trigger, polarity); + if (err) + goto config_release; + } +#endif + /* Now disable the device (this also ensures some private device * data is setup before we export) */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0687a442fec7..02ded9f53a6b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -362,6 +362,7 @@ void acpi_unregister_gsi (u32 gsi); struct pci_dev; +struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin); int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); bool acpi_isa_irq_available(int irq); diff --git a/include/xen/acpi.h b/include/xen/acpi.h index b1e11863144d..3bcfe82d9078 100644 --- a/include/xen/acpi.h +++ b/include/xen/acpi.h @@ -67,10 +67,28 @@ static inline void xen_acpi_sleep_register(void) acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel; } } +int xen_pvh_setup_gsi(int gsi, int trigger, int polarity); +int xen_acpi_get_gsi_info(struct pci_dev *dev, + int *gsi_out, + int *trigger_out, + int *polarity_out); #else static inline void xen_acpi_sleep_register(void) { } + +static inline int xen_pvh_setup_gsi(int gsi, int trigger, int polarity) +{ + return -1; +} + +static inline int xen_acpi_get_gsi_info(struct pci_dev *dev, + int *gsi_out, + int *trigger_out, + int *polarity_out) +{ + return -1; +} #endif #endif /* _XEN_ACPI_H */ From 2fae6bb7be320270801b3c3b040189bd7daa8056 Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Tue, 24 Sep 2024 14:14:37 +0800 Subject: [PATCH 442/655] xen/privcmd: Add new syscall to get gsi from dev On PVH dom0, when passthrough a device to domU, QEMU and xl tools want to use gsi number to do pirq mapping, see QEMU code xen_pt_realize->xc_physdev_map_pirq, and xl code pci_add_dm_done->xc_physdev_map_pirq, but in current codes, the gsi number is got from file /sys/bus/pci/devices//irq, that is wrong, because irq is not equal with gsi, they are in different spaces, so pirq mapping fails. And in current linux codes, there is no method to get gsi for userspace. For above purpose, record gsi of pcistub devices when init pcistub and add a new syscall into privcmd to let userspace can get gsi when they have a need. Signed-off-by: Jiqian Chen Signed-off-by: Huang Rui Signed-off-by: Jiqian Chen Reviewed-by: Stefano Stabellini Message-ID: <20240924061437.2636766-4-Jiqian.Chen@amd.com> Signed-off-by: Juergen Gross --- drivers/xen/Kconfig | 1 + drivers/xen/privcmd.c | 32 +++++++++++++++++++++++++ drivers/xen/xen-pciback/pci_stub.c | 38 +++++++++++++++++++++++++++--- include/uapi/xen/privcmd.h | 7 ++++++ include/xen/acpi.h | 9 +++++++ 5 files changed, 84 insertions(+), 3 deletions(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index d5989871dd5d..fd83d51df2f4 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -261,6 +261,7 @@ config XEN_SCSI_BACKEND config XEN_PRIVCMD tristate "Xen hypercall passthrough driver" depends on XEN + imply CONFIG_XEN_PCIDEV_BACKEND default m help The hypercall passthrough driver allows privileged user programs to diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 9563650dfbaf..588f99a2b8df 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -46,6 +46,9 @@ #include #include #include +#ifdef CONFIG_XEN_ACPI +#include +#endif #include "privcmd.h" @@ -844,6 +847,31 @@ static long privcmd_ioctl_mmap_resource(struct file *file, return rc; } +static long privcmd_ioctl_pcidev_get_gsi(struct file *file, void __user *udata) +{ +#if defined(CONFIG_XEN_ACPI) + int rc = -EINVAL; + struct privcmd_pcidev_get_gsi kdata; + + if (copy_from_user(&kdata, udata, sizeof(kdata))) + return -EFAULT; + + if (IS_REACHABLE(CONFIG_XEN_PCIDEV_BACKEND)) + rc = pcistub_get_gsi_from_sbdf(kdata.sbdf); + + if (rc < 0) + return rc; + + kdata.gsi = rc; + if (copy_to_user(udata, &kdata, sizeof(kdata))) + return -EFAULT; + + return 0; +#else + return -EINVAL; +#endif +} + #ifdef CONFIG_XEN_PRIVCMD_EVENTFD /* Irqfd support */ static struct workqueue_struct *irqfd_cleanup_wq; @@ -1543,6 +1571,10 @@ static long privcmd_ioctl(struct file *file, ret = privcmd_ioctl_ioeventfd(file, udata); break; + case IOCTL_PRIVCMD_PCIDEV_GET_GSI: + ret = privcmd_ioctl_pcidev_get_gsi(file, udata); + break; + default: break; } diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 8ce27333f54b..d003402ce66b 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -56,6 +56,9 @@ struct pcistub_device { struct pci_dev *dev; struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */ +#ifdef CONFIG_XEN_ACPI + int gsi; +#endif }; /* Access to pcistub_devices & seized_devices lists and the initialize_devices @@ -88,6 +91,9 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) kref_init(&psdev->kref); spin_lock_init(&psdev->lock); +#ifdef CONFIG_XEN_ACPI + psdev->gsi = -1; +#endif return psdev; } @@ -220,6 +226,25 @@ static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev, return pci_dev; } +#ifdef CONFIG_XEN_ACPI +int pcistub_get_gsi_from_sbdf(unsigned int sbdf) +{ + struct pcistub_device *psdev; + int domain = (sbdf >> 16) & 0xffff; + int bus = PCI_BUS_NUM(sbdf); + int slot = PCI_SLOT(sbdf); + int func = PCI_FUNC(sbdf); + + psdev = pcistub_device_find(domain, bus, slot, func); + + if (!psdev) + return -ENODEV; + + return psdev->gsi; +} +EXPORT_SYMBOL_GPL(pcistub_get_gsi_from_sbdf); +#endif + struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, int domain, int bus, int slot, int func) @@ -367,14 +392,20 @@ static int pcistub_match(struct pci_dev *dev) return found; } -static int pcistub_init_device(struct pci_dev *dev) +static int pcistub_init_device(struct pcistub_device *psdev) { struct xen_pcibk_dev_data *dev_data; + struct pci_dev *dev; #ifdef CONFIG_XEN_ACPI int gsi, trigger, polarity; #endif int err = 0; + if (!psdev) + return -EINVAL; + + dev = psdev->dev; + dev_dbg(&dev->dev, "initializing...\n"); /* The PCI backend is not intended to be a module (or to work with @@ -452,6 +483,7 @@ static int pcistub_init_device(struct pci_dev *dev) err = xen_pvh_setup_gsi(gsi, trigger, polarity); if (err) goto config_release; + psdev->gsi = gsi; } #endif @@ -494,7 +526,7 @@ static int __init pcistub_init_devices_late(void) spin_unlock_irqrestore(&pcistub_devices_lock, flags); - err = pcistub_init_device(psdev->dev); + err = pcistub_init_device(psdev); if (err) { dev_err(&psdev->dev->dev, "error %d initializing device\n", err); @@ -564,7 +596,7 @@ static int pcistub_seize(struct pci_dev *dev, spin_unlock_irqrestore(&pcistub_devices_lock, flags); /* don't want irqs disabled when calling pcistub_init_device */ - err = pcistub_init_device(psdev->dev); + err = pcistub_init_device(psdev); spin_lock_irqsave(&pcistub_devices_lock, flags); diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h index 8b8c5d1420fe..8e2c8fd44764 100644 --- a/include/uapi/xen/privcmd.h +++ b/include/uapi/xen/privcmd.h @@ -126,6 +126,11 @@ struct privcmd_ioeventfd { __u8 pad[2]; }; +struct privcmd_pcidev_get_gsi { + __u32 sbdf; + __u32 gsi; +}; + /* * @cmd: IOCTL_PRIVCMD_HYPERCALL * @arg: &privcmd_hypercall_t @@ -157,5 +162,7 @@ struct privcmd_ioeventfd { _IOW('P', 8, struct privcmd_irqfd) #define IOCTL_PRIVCMD_IOEVENTFD \ _IOW('P', 9, struct privcmd_ioeventfd) +#define IOCTL_PRIVCMD_PCIDEV_GET_GSI \ + _IOC(_IOC_NONE, 'P', 10, sizeof(struct privcmd_pcidev_get_gsi)) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ diff --git a/include/xen/acpi.h b/include/xen/acpi.h index 3bcfe82d9078..daa96a22d257 100644 --- a/include/xen/acpi.h +++ b/include/xen/acpi.h @@ -91,4 +91,13 @@ static inline int xen_acpi_get_gsi_info(struct pci_dev *dev, } #endif +#ifdef CONFIG_XEN_PCI_STUB +int pcistub_get_gsi_from_sbdf(unsigned int sbdf); +#else +static inline int pcistub_get_gsi_from_sbdf(unsigned int sbdf) +{ + return -1; +} +#endif + #endif /* _XEN_ACPI_H */ From e860513f56d8428fcb2bd0282ac8ab691a53fc6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 18 Sep 2024 22:04:39 +0300 Subject: [PATCH 443/655] drm/i915/dp: Fix colorimetry detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_dp_init_connector() is no place for detecting stuff via DPCD (except perhaps for eDP). Move the colorimetry stuff into a more appropriate place. Cc: Jouni Högander Fixes: 00076671a648 ("drm/i915/display: Move colorimetry_support from intel_psr to intel_dp") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240918190441.29071-1-ville.syrjala@linux.intel.com Reviewed-by: Jouni Högander (cherry picked from commit 35dba4834bded843d5416e8caadfe82bd0ce1904) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index b1b512e258ab..90fa73575feb 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4067,6 +4067,9 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector drm_dp_is_branch(intel_dp->dpcd)); intel_init_dpcd_quirks(intel_dp, &intel_dp->desc.ident); + intel_dp->colorimetry_support = + intel_dp_get_colorimetry_status(intel_dp); + /* * Read the eDP display control registers. * @@ -4180,6 +4183,9 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) intel_init_dpcd_quirks(intel_dp, &intel_dp->desc.ident); + intel_dp->colorimetry_support = + intel_dp_get_colorimetry_status(intel_dp); + intel_dp_update_sink_caps(intel_dp); } @@ -6931,9 +6937,6 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, "HDCP init failed, skipping.\n"); } - intel_dp->colorimetry_support = - intel_dp_get_colorimetry_status(intel_dp); - intel_dp->frl.is_trained = false; intel_dp->frl.trained_rate_gbps = 0; From e11daafdbf5b683a5da33a080862769b696b1621 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 Sep 2024 10:56:57 +0200 Subject: [PATCH 444/655] Revert "driver core: fix async device shutdown hang" This reverts commit 4f2c346e621624315e2a1405e98616a0c5ac146f. The series is being reverted before -rc1 as there are still reports of lockups on shutdown, so it's not quite ready for "prime time." Reported-by: Andrey Skvortsov Link: https://lore.kernel.org/r/ZvMkkhyJrohaajuk@skv.local Cc: Christoph Hellwig Cc: David Jeffery Cc: Keith Busch Cc: Laurence Oberman Cc: Nathan Chancellor Cc: Sagi Grimberg Cc: Stuart Hayes Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 76513e360496..b69b82da8837 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4898,16 +4898,8 @@ void device_shutdown(void) idx = device_links_read_lock(); list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) { - /* - * sync_state_only suppliers don't need to wait, - * aren't reordered on devices_kset, so making them - * wait could result in a hang - */ - if (device_link_flag_is_sync_state_only(link->flags)) - continue; + device_links_read_lock_held()) link->supplier->p->shutdown_after = cookie; - } device_links_read_unlock(idx); put_device(dev); From ec1fcbae1918084b5dea2e72cc6297c32f7792da Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 Sep 2024 10:56:59 +0200 Subject: [PATCH 445/655] Revert "nvme-pci: Make driver prefer asynchronous shutdown" This reverts commit ba82e10c3c6b5b5d2c8279a8bd0dae5c2abaacfc. The series is being reverted before -rc1 as there are still reports of lockups on shutdown, so it's not quite ready for "prime time." Reported-by: Andrey Skvortsov Link: https://lore.kernel.org/r/ZvMkkhyJrohaajuk@skv.local Cc: Christoph Hellwig Cc: David Jeffery Cc: Keith Busch Cc: Laurence Oberman Cc: Nathan Chancellor Cc: Sagi Grimberg Cc: Stuart Hayes Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 58d0d517fead..6cd9395ba9ec 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3580,7 +3580,6 @@ static struct pci_driver nvme_driver = { .shutdown = nvme_shutdown, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, - .async_shutdown_enable = true, #ifdef CONFIG_PM_SLEEP .pm = &nvme_dev_pm_ops, #endif From 2efddb5575cd9f5f4d61ad417c92365a5f18d2f1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 Sep 2024 10:57:00 +0200 Subject: [PATCH 446/655] Revert "driver core: shut down devices asynchronously" This reverts commit 8064952c65045f05ee2671fe437770e50c151776. The series is being reverted before -rc1 as there are still reports of lockups on shutdown, so it's not quite ready for "prime time." Reported-by: Andrey Skvortsov Link: https://lore.kernel.org/r/ZvMkkhyJrohaajuk@skv.local Cc: Christoph Hellwig Cc: David Jeffery Cc: Keith Busch Cc: Laurence Oberman Cc: Nathan Chancellor Cc: Sagi Grimberg Cc: Stuart Hayes Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 4 --- drivers/base/core.c | 54 +---------------------------------- include/linux/device/driver.h | 2 -- 3 files changed, 1 insertion(+), 59 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index ea18aa70f151..8cf04a557bdb 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -10,7 +10,6 @@ * shared outside of the drivers/base/ directory. * */ -#include #include /** @@ -98,8 +97,6 @@ struct driver_private { * the device; typically because it depends on another driver getting * probed first. * @async_driver - pointer to device driver awaiting probe via async_probe - * @shutdown_after - used during device shutdown to ensure correct shutdown - * ordering. * @device - pointer back to the struct device that this structure is * associated with. * @dead - This device is currently either in the process of or has been @@ -117,7 +114,6 @@ struct device_private { struct list_head deferred_probe; const struct device_driver *async_driver; char *deferred_probe_reason; - async_cookie_t shutdown_after; struct device *device; u8 dead:1; }; diff --git a/drivers/base/core.c b/drivers/base/core.c index b69b82da8837..4482382fb947 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -9,7 +9,6 @@ */ #include -#include #include #include #include @@ -3525,7 +3524,6 @@ static int device_private_init(struct device *dev) klist_init(&dev->p->klist_children, klist_children_get, klist_children_put); INIT_LIST_HEAD(&dev->p->deferred_probe); - dev->p->shutdown_after = 0; return 0; } @@ -4781,8 +4779,6 @@ int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) } EXPORT_SYMBOL_GPL(device_change_owner); -static ASYNC_DOMAIN(sd_domain); - static void shutdown_one_device(struct device *dev) { /* hold lock to avoid race with probe/release */ @@ -4818,34 +4814,12 @@ static void shutdown_one_device(struct device *dev) put_device(dev->parent); } -/** - * shutdown_one_device_async - * @data: the pointer to the struct device to be shutdown - * @cookie: not used - * - * Shuts down one device, after waiting for shutdown_after to complete. - * shutdown_after should be set to the cookie of the last child or consumer - * of this device to be shutdown (if any), or to the cookie of the previous - * device to be shut down for devices that don't enable asynchronous shutdown. - */ -static void shutdown_one_device_async(void *data, async_cookie_t cookie) -{ - struct device *dev = data; - - async_synchronize_cookie_domain(dev->p->shutdown_after + 1, &sd_domain); - - shutdown_one_device(dev); -} - /** * device_shutdown - call ->shutdown() on each device to shutdown. */ void device_shutdown(void) { struct device *dev, *parent; - async_cookie_t cookie = 0; - struct device_link *link; - int idx; wait_for_device_probe(); device_block_probing(); @@ -4876,37 +4850,11 @@ void device_shutdown(void) list_del_init(&dev->kobj.entry); spin_unlock(&devices_kset->list_lock); - - /* - * Set cookie for devices that will be shut down synchronously - */ - if (!dev->driver || !dev->driver->async_shutdown_enable) - dev->p->shutdown_after = cookie; - - get_device(dev); - get_device(parent); - - cookie = async_schedule_domain(shutdown_one_device_async, - dev, &sd_domain); - /* - * Ensure parent & suppliers wait for this device to shut down - */ - if (parent) { - parent->p->shutdown_after = cookie; - put_device(parent); - } - - idx = device_links_read_lock(); - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) - link->supplier->p->shutdown_after = cookie; - device_links_read_unlock(idx); - put_device(dev); + shutdown_one_device(dev); spin_lock(&devices_kset->list_lock); } spin_unlock(&devices_kset->list_lock); - async_synchronize_full_domain(&sd_domain); } /* diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 14c9211b82d6..5c04b8e3833b 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -56,7 +56,6 @@ enum probe_type { * @mod_name: Used for built-in modules. * @suppress_bind_attrs: Disables bind/unbind via sysfs. * @probe_type: Type of the probe (synchronous or asynchronous) to use. - * @async_shutdown_enable: Enables devices to be shutdown asynchronously. * @of_match_table: The open firmware table. * @acpi_match_table: The ACPI match table. * @probe: Called to query the existence of a specific device, @@ -103,7 +102,6 @@ struct device_driver { bool suppress_bind_attrs; /* disables bind/unbind via sysfs */ enum probe_type probe_type; - bool async_shutdown_enable; const struct of_device_id *of_match_table; const struct acpi_device_id *acpi_match_table; From 56d16d44fe8d8012dabd32700ea143c7caa35ba3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 Sep 2024 10:57:01 +0200 Subject: [PATCH 447/655] Revert "driver core: separate function to shutdown one device" This reverts commit 95dc7565253a8564911190ebd1e4ffceb4de208a. The series is being reverted before -rc1 as there are still reports of lockups on shutdown, so it's not quite ready for "prime time." Reported-by: Andrey Skvortsov Link: https://lore.kernel.org/r/ZvMkkhyJrohaajuk@skv.local Cc: Christoph Hellwig Cc: David Jeffery Cc: Keith Busch Cc: Laurence Oberman Cc: Nathan Chancellor Cc: Sagi Grimberg Cc: Stuart Hayes Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 66 +++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 4482382fb947..2bf9730db056 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4779,41 +4779,6 @@ int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) } EXPORT_SYMBOL_GPL(device_change_owner); -static void shutdown_one_device(struct device *dev) -{ - /* hold lock to avoid race with probe/release */ - if (dev->parent && dev->bus && dev->bus->need_parent_lock) - device_lock(dev->parent); - device_lock(dev); - - /* Don't allow any more runtime suspends */ - pm_runtime_get_noresume(dev); - pm_runtime_barrier(dev); - - if (dev->class && dev->class->shutdown_pre) { - if (initcall_debug) - dev_info(dev, "shutdown_pre\n"); - dev->class->shutdown_pre(dev); - } - if (dev->bus && dev->bus->shutdown) { - if (initcall_debug) - dev_info(dev, "shutdown\n"); - dev->bus->shutdown(dev); - } else if (dev->driver && dev->driver->shutdown) { - if (initcall_debug) - dev_info(dev, "shutdown\n"); - dev->driver->shutdown(dev); - } - - device_unlock(dev); - if (dev->parent && dev->bus && dev->bus->need_parent_lock) - device_unlock(dev->parent); - - put_device(dev); - if (dev->parent) - put_device(dev->parent); -} - /** * device_shutdown - call ->shutdown() on each device to shutdown. */ @@ -4850,7 +4815,36 @@ void device_shutdown(void) list_del_init(&dev->kobj.entry); spin_unlock(&devices_kset->list_lock); - shutdown_one_device(dev); + /* hold lock to avoid race with probe/release */ + if (parent && dev->bus && dev->bus->need_parent_lock) + device_lock(parent); + device_lock(dev); + + /* Don't allow any more runtime suspends */ + pm_runtime_get_noresume(dev); + pm_runtime_barrier(dev); + + if (dev->class && dev->class->shutdown_pre) { + if (initcall_debug) + dev_info(dev, "shutdown_pre\n"); + dev->class->shutdown_pre(dev); + } + if (dev->bus && dev->bus->shutdown) { + if (initcall_debug) + dev_info(dev, "shutdown\n"); + dev->bus->shutdown(dev); + } else if (dev->driver && dev->driver->shutdown) { + if (initcall_debug) + dev_info(dev, "shutdown\n"); + dev->driver->shutdown(dev); + } + + device_unlock(dev); + if (parent && dev->bus && dev->bus->need_parent_lock) + device_unlock(parent); + + put_device(dev); + put_device(parent); spin_lock(&devices_kset->list_lock); } From eb46cb321f1f3f3102f4ad3d61dd5c8c06cdbf17 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 Sep 2024 10:57:02 +0200 Subject: [PATCH 448/655] Revert "driver core: don't always lock parent in shutdown" This reverts commit ba6353748e71bd1d7e422fec2b5c2e2dfc2e3bd9. The series is being reverted before -rc1 as there are still reports of lockups on shutdown, so it's not quite ready for "prime time." Reported-by: Andrey Skvortsov Link: https://lore.kernel.org/r/ZvMkkhyJrohaajuk@skv.local Cc: Christoph Hellwig Cc: David Jeffery Cc: Keith Busch Cc: Laurence Oberman Cc: Nathan Chancellor Cc: Sagi Grimberg Cc: Stuart Hayes Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 2bf9730db056..a4c853411a6b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4816,7 +4816,7 @@ void device_shutdown(void) spin_unlock(&devices_kset->list_lock); /* hold lock to avoid race with probe/release */ - if (parent && dev->bus && dev->bus->need_parent_lock) + if (parent) device_lock(parent); device_lock(dev); @@ -4840,7 +4840,7 @@ void device_shutdown(void) } device_unlock(dev); - if (parent && dev->bus && dev->bus->need_parent_lock) + if (parent) device_unlock(parent); put_device(dev); From 8f2f74f2f3ebd9bb7159301cb7560db75d2e801a Mon Sep 17 00:00:00 2001 From: Min-Hua Chen Date: Wed, 18 Sep 2024 07:36:50 +0800 Subject: [PATCH 449/655] xen/pciback: fix cast to restricted pci_ers_result_t and pci_power_t This patch fix the following sparse warning by applying __force cast to pci_ers_result_t and pci_power_t. drivers/xen/xen-pciback/pci_stub.c:760:16: sparse: warning: cast to restricted pci_ers_result_t drivers/xen/xen-pciback/conf_space_capability.c:125:22: sparse: warning: cast to restricted pci_power_t No functional changes intended. Signed-off-by: Min-Hua Chen Reviewed-by: Juergen Gross Message-ID: <20240917233653.61630-1-minhuadotchen@gmail.com> Signed-off-by: Juergen Gross --- drivers/xen/xen-pciback/conf_space_capability.c | 2 +- drivers/xen/xen-pciback/pci_stub.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c index 1948a9700c8f..cf568e899ee2 100644 --- a/drivers/xen/xen-pciback/conf_space_capability.c +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -122,7 +122,7 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, if (err) goto out; - new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); + new_state = (__force pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); new_value &= PM_OK_BITS; if ((old_value & PM_OK_BITS) != new_value) { diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index d003402ce66b..2f3da5ac62cd 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -821,7 +821,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, } clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags); - res = (pci_ers_result_t)aer_op->err; + res = (__force pci_ers_result_t)aer_op->err; return res; } From 08377ed24feef66866d0c6aabcae0aec515cf2ca Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 23 Aug 2024 15:36:26 -0400 Subject: [PATCH 450/655] xen: sync elfnote.h from xen tree Sync Xen's elfnote.h header from xen.git to pull in the XEN_ELFNOTE_PHYS32_RELOC define. xen commit dfc9fab00378 ("x86/PVH: Support relocatable dom0 kernels") This is a copy except for the removal of the emacs editor config at the end of the file. Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Message-ID: <20240823193630.2583107-2-jason.andryuk@amd.com> Signed-off-by: Juergen Gross --- include/xen/interface/elfnote.h | 93 +++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 5 deletions(-) diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h index 38deb1214613..918f47d87d7a 100644 --- a/include/xen/interface/elfnote.h +++ b/include/xen/interface/elfnote.h @@ -11,7 +11,9 @@ #define __XEN_PUBLIC_ELFNOTE_H__ /* - * The notes should live in a SHT_NOTE segment and have "Xen" in the + * `incontents 200 elfnotes ELF notes + * + * The notes should live in a PT_NOTE segment and have "Xen" in the * name field. * * Numeric types are either 4 or 8 bytes depending on the content of @@ -22,6 +24,8 @@ * * String values (for non-legacy) are NULL terminated ASCII, also known * as ASCIZ type. + * + * Xen only uses ELF Notes contained in x86 binaries. */ /* @@ -52,7 +56,7 @@ #define XEN_ELFNOTE_VIRT_BASE 3 /* - * The offset of the ELF paddr field from the acutal required + * The offset of the ELF paddr field from the actual required * pseudo-physical address (numeric). * * This is used to maintain backwards compatibility with older kernels @@ -92,7 +96,12 @@ #define XEN_ELFNOTE_LOADER 8 /* - * The kernel supports PAE (x86/32 only, string = "yes" or "no"). + * The kernel supports PAE (x86/32 only, string = "yes", "no" or + * "bimodal"). + * + * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting + * may be given as "yes,bimodal" which will cause older Xen to treat + * this kernel as PAE. * * LEGACY: PAE (n.b. The legacy interface included a provision to * indicate 'extended-cr3' support allowing L3 page tables to be @@ -149,7 +158,9 @@ * The (non-default) location the initial phys-to-machine map should be * placed at by the hypervisor (Dom0) or the tools (DomU). * The kernel must be prepared for this mapping to be established using - * large pages, despite such otherwise not being available to guests. + * large pages, despite such otherwise not being available to guests. Note + * that these large pages may be misaligned in PFN space (they'll obviously + * be aligned in MFN and virtual address spaces). * The kernel must also be able to handle the page table pages used for * this mapping not being accessible through the initial mapping. * (Only x86-64 supports this at present.) @@ -185,9 +196,81 @@ */ #define XEN_ELFNOTE_PHYS32_ENTRY 18 +/* + * Physical loading constraints for PVH kernels + * + * The presence of this note indicates the kernel supports relocating itself. + * + * The note may include up to three 32bit values to place constraints on the + * guest physical loading addresses and alignment for a PVH kernel. Values + * are read in the following order: + * - a required start alignment (default 0x200000) + * - a minimum address for the start of the image (default 0; see below) + * - a maximum address for the last byte of the image (default 0xffffffff) + * + * When this note specifies an alignment value, it is used. Otherwise the + * maximum p_align value from loadable ELF Program Headers is used, if it is + * greater than or equal to 4k (0x1000). Otherwise, the default is used. + */ +#define XEN_ELFNOTE_PHYS32_RELOC 19 + /* * The number of the highest elfnote defined. */ -#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_RELOC + +/* + * System information exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO + * note in case of a system crash. This note will contain various + * information about the system, see xen/include/xen/elfcore.h. + */ +#define XEN_ELFNOTE_CRASH_INFO 0x1000001 + +/* + * System registers exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS + * note per cpu in case of a system crash. This note is architecture + * specific and will contain registers not saved in the "CORE" note. + * See xen/include/xen/elfcore.h for more information. + */ +#define XEN_ELFNOTE_CRASH_REGS 0x1000002 + + +/* + * xen dump-core none note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE + * in its dump file to indicate that the file is xen dump-core + * file. This note doesn't have any other information. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 + +/* + * xen dump-core header note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER + * in its dump file. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 + +/* + * xen dump-core xen version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION + * in its dump file. It contains the xen version obtained via the + * XENVER hypercall. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 + +/* + * xen dump-core format version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION + * in its dump file. It contains a format version identifier. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ From 1db29f99edb056d8445876292f53a63459142309 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 23 Aug 2024 15:36:27 -0400 Subject: [PATCH 451/655] x86/pvh: Make PVH entrypoint PIC for x86-64 The PVH entrypoint is 32bit non-PIC code running the uncompressed vmlinux at its load address CONFIG_PHYSICAL_START - default 0x1000000 (16MB). The kernel is loaded at that physical address inside the VM by the VMM software (Xen/QEMU). When running a Xen PVH Dom0, the host reserved addresses are mapped 1-1 into the PVH container. There exist system firmwares (Coreboot/EDK2) with reserved memory at 16MB. This creates a conflict where the PVH kernel cannot be loaded at that address. Modify the PVH entrypoint to be position-indepedent to allow flexibility in load address. Only the 64bit entry path is converted. A 32bit kernel is not PIC, so calling into other parts of the kernel, like xen_prepare_pvh() and mk_pgtable_32(), don't work properly when relocated. This makes the code PIC, but the page tables need to be updated as well to handle running from the kernel high map. The UNWIND_HINT_END_OF_STACK is to silence: vmlinux.o: warning: objtool: pvh_start_xen+0x7f: unreachable instruction after the lret into 64bit code. Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Message-ID: <20240823193630.2583107-3-jason.andryuk@amd.com> Signed-off-by: Juergen Gross --- arch/x86/platform/pvh/head.S | 46 ++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index f7235ef87bc3..ba4d0eab4436 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -7,6 +7,7 @@ .code32 .text #define _pa(x) ((x) - __START_KERNEL_map) +#define rva(x) ((x) - pvh_start_xen) #include #include @@ -54,7 +55,25 @@ SYM_CODE_START_LOCAL(pvh_start_xen) UNWIND_HINT_END_OF_STACK cld - lgdt (_pa(gdt)) + /* + * See the comment for startup_32 for more details. We need to + * execute a call to get the execution address to be position + * independent, but we don't have a stack. Save and restore the + * magic field of start_info in ebx, and use that as the stack. + */ + mov (%ebx), %eax + leal 4(%ebx), %esp + ANNOTATE_INTRA_FUNCTION_CALL + call 1f +1: popl %ebp + mov %eax, (%ebx) + subl $rva(1b), %ebp + movl $0, %esp + + leal rva(gdt)(%ebp), %eax + leal rva(gdt_start)(%ebp), %ecx + movl %ecx, 2(%eax) + lgdt (%eax) mov $PVH_DS_SEL,%eax mov %eax,%ds @@ -62,14 +81,14 @@ SYM_CODE_START_LOCAL(pvh_start_xen) mov %eax,%ss /* Stash hvm_start_info. */ - mov $_pa(pvh_start_info), %edi + leal rva(pvh_start_info)(%ebp), %edi mov %ebx, %esi - mov _pa(pvh_start_info_sz), %ecx + movl rva(pvh_start_info_sz)(%ebp), %ecx shr $2,%ecx rep movsl - mov $_pa(early_stack_end), %esp + leal rva(early_stack_end)(%ebp), %esp /* Enable PAE mode. */ mov %cr4, %eax @@ -84,30 +103,33 @@ SYM_CODE_START_LOCAL(pvh_start_xen) wrmsr /* Enable pre-constructed page tables. */ - mov $_pa(init_top_pgt), %eax + leal rva(init_top_pgt)(%ebp), %eax mov %eax, %cr3 mov $(X86_CR0_PG | X86_CR0_PE), %eax mov %eax, %cr0 /* Jump to 64-bit mode. */ - ljmp $PVH_CS_SEL, $_pa(1f) + pushl $PVH_CS_SEL + leal rva(1f)(%ebp), %eax + pushl %eax + lretl /* 64-bit entry point. */ .code64 1: + UNWIND_HINT_END_OF_STACK + /* Set base address in stack canary descriptor. */ mov $MSR_GS_BASE,%ecx - mov $_pa(canary), %eax + leal canary(%rip), %eax xor %edx, %edx wrmsr call xen_prepare_pvh /* startup_64 expects boot_params in %rsi. */ - mov $_pa(pvh_bootparams), %rsi - mov $_pa(startup_64), %rax - ANNOTATE_RETPOLINE_SAFE - jmp *%rax + lea pvh_bootparams(%rip), %rsi + jmp startup_64 #else /* CONFIG_X86_64 */ @@ -143,7 +165,7 @@ SYM_CODE_END(pvh_start_xen) .balign 8 SYM_DATA_START_LOCAL(gdt) .word gdt_end - gdt_start - .long _pa(gdt_start) + .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */ .word 0 SYM_DATA_END(gdt) SYM_DATA_START_LOCAL(gdt_start) From b464b461d27d564125db760938643374864c1b1f Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 23 Aug 2024 15:36:28 -0400 Subject: [PATCH 452/655] x86/pvh: Set phys_base when calling xen_prepare_pvh() phys_base needs to be set for __pa() to work in xen_pvh_init() when finding the hypercall page. Set it before calling into xen_prepare_pvh(), which calls xen_pvh_init(). Clear it afterward to avoid __startup_64() adding to it and creating an incorrect value. Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Message-ID: <20240823193630.2583107-4-jason.andryuk@amd.com> Signed-off-by: Juergen Gross --- arch/x86/platform/pvh/head.S | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index ba4d0eab4436..14b4345d9bae 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -125,7 +125,20 @@ SYM_CODE_START_LOCAL(pvh_start_xen) xor %edx, %edx wrmsr + /* + * Calculate load offset and store in phys_base. __pa() needs + * phys_base set to calculate the hypercall page in xen_pvh_init(). + */ + movq %rbp, %rbx + subq $_pa(pvh_start_xen), %rbx + movq %rbx, phys_base(%rip) call xen_prepare_pvh + /* + * Clear phys_base. __startup_64 will *add* to its value, + * so reset to 0. + */ + xor %rbx, %rbx + movq %rbx, phys_base(%rip) /* startup_64 expects boot_params in %rsi. */ lea pvh_bootparams(%rip), %rsi From ada1986d07976d60bed5017aa38b7f7cf27883f7 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 25 Sep 2024 22:30:59 +0900 Subject: [PATCH 453/655] tomoyo: fallback to realpath if symlink's pathname does not exist Alfred Agrell found that TOMOYO cannot handle execveat(AT_EMPTY_PATH) inside chroot environment where /dev and /proc are not mounted, for commit 51f39a1f0cea ("syscalls: implement execveat() system call") missed that TOMOYO tries to canonicalize argv[0] when the filename fed to the executed program as argv[0] is supplied using potentially nonexistent pathname. Since "/dev/fd/" already lost symlink information used for obtaining that , it is too late to reconstruct symlink's pathname. Although part of "/dev/fd//" might not be canonicalized, TOMOYO cannot use tomoyo_realpath_nofollow() when /dev or /proc is not mounted. Therefore, fallback to tomoyo_realpath_from_path() when tomoyo_realpath_nofollow() failed. Reported-by: Alfred Agrell Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1082001 Fixes: 51f39a1f0cea ("syscalls: implement execveat() system call") Cc: stable@vger.kernel.org # v3.19+ Signed-off-by: Tetsuo Handa --- security/tomoyo/domain.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 90b53500a236..aed9e3ef2c9e 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -723,10 +723,13 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm) ee->r.obj = &ee->obj; ee->obj.path1 = bprm->file->f_path; /* Get symlink's pathname of program. */ - retval = -ENOENT; exename.name = tomoyo_realpath_nofollow(original_name); - if (!exename.name) - goto out; + if (!exename.name) { + /* Fallback to realpath if symlink's pathname does not exist. */ + exename.name = tomoyo_realpath_from_path(&bprm->file->f_path); + if (!exename.name) + goto out; + } tomoyo_fill_path_info(&exename); retry: /* Check 'aggregator' directive. */ From e3e8cd90f8e2eef67ded38f9c1a5f5520a407a62 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 23 Aug 2024 15:36:29 -0400 Subject: [PATCH 454/655] x86/kernel: Move page table macros to header The PVH entry point will need an additional set of prebuild page tables. Move the macros and defines to pgtable_64.h, so they can be re-used. Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Acked-by: Dave Hansen Message-ID: <20240823193630.2583107-5-jason.andryuk@amd.com> Signed-off-by: Juergen Gross --- arch/x86/include/asm/pgtable_64.h | 23 ++++++++++++++++++++++- arch/x86/kernel/head_64.S | 20 -------------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 3c4407271d08..72912b8edfdf 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -271,5 +271,26 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end) #include -#endif /* !__ASSEMBLY__ */ +#else /* __ASSEMBLY__ */ + +#define l4_index(x) (((x) >> 39) & 511) +#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + +L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) +L4_START_KERNEL = l4_index(__START_KERNEL_map) + +L3_START_KERNEL = pud_index(__START_KERNEL_map) + +#define SYM_DATA_START_PAGE_ALIGNED(name) \ + SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) + +/* Automate the creation of 1 to 1 mapping pmd entries */ +#define PMDS(START, PERM, COUNT) \ + i = 0 ; \ + .rept (COUNT) ; \ + .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ + i = i + 1 ; \ + .endr + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 330922b328bf..16752b8dfa89 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -32,13 +32,6 @@ * We are not able to switch in one step to the final KERNEL ADDRESS SPACE * because we need identity-mapped pages. */ -#define l4_index(x) (((x) >> 39) & 511) -#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) - -L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) -L4_START_KERNEL = l4_index(__START_KERNEL_map) - -L3_START_KERNEL = pud_index(__START_KERNEL_map) __HEAD .code64 @@ -577,9 +570,6 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) SYM_CODE_END(vc_no_ghcb) #endif -#define SYM_DATA_START_PAGE_ALIGNED(name) \ - SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) - #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * Each PGD needs to be 8k long and 8k aligned. We do not @@ -601,14 +591,6 @@ SYM_CODE_END(vc_no_ghcb) #define PTI_USER_PGD_FILL 0 #endif -/* Automate the creation of 1 to 1 mapping pmd entries */ -#define PMDS(START, PERM, COUNT) \ - i = 0 ; \ - .rept (COUNT) ; \ - .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ - i = i + 1 ; \ - .endr - __INITDATA .balign 4 @@ -708,8 +690,6 @@ SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt) .endr SYM_DATA_END(level1_fixmap_pgt) -#undef PMDS - .data .align 16 From 47ffe0578aee45fed3a06d5dcff76cdebb303163 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 23 Aug 2024 15:36:30 -0400 Subject: [PATCH 455/655] x86/pvh: Add 64bit relocation page tables The PVH entry point is 32bit. For a 64bit kernel, the entry point must switch to 64bit mode, which requires a set of page tables. In the past, PVH used init_top_pgt. This works fine when the kernel is loaded at LOAD_PHYSICAL_ADDR, as the page tables are prebuilt for this address. If the kernel is loaded at a different address, they need to be adjusted. __startup_64() adjusts the prebuilt page tables for the physical load address, but it is 64bit code. The 32bit PVH entry code can't call it to adjust the page tables, so it can't readily be re-used. 64bit PVH entry needs page tables set up for identity map, the kernel high map and the direct map. pvh_start_xen() enters identity mapped. Inside xen_prepare_pvh(), it jumps through a pv_ops function pointer into the highmap. The direct map is used for __va() on the initramfs and other guest physical addresses. Add a dedicated set of prebuild page tables for PVH entry. They are adjusted in assembly before loading. Add XEN_ELFNOTE_PHYS32_RELOC to indicate support for relocation along with the kernel's loading constraints. The maximum load address, KERNEL_IMAGE_SIZE - 1, is determined by a single pvh_level2_ident_pgt page. It could be larger with more pages. Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Message-ID: <20240823193630.2583107-6-jason.andryuk@amd.com> Signed-off-by: Juergen Gross --- arch/x86/platform/pvh/head.S | 104 ++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 14b4345d9bae..64fca49cd88f 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -102,8 +103,47 @@ SYM_CODE_START_LOCAL(pvh_start_xen) btsl $_EFER_LME, %eax wrmsr + mov %ebp, %ebx + subl $_pa(pvh_start_xen), %ebx /* offset */ + jz .Lpagetable_done + + /* Fixup page-tables for relocation. */ + leal rva(pvh_init_top_pgt)(%ebp), %edi + movl $PTRS_PER_PGD, %ecx +2: + testl $_PAGE_PRESENT, 0x00(%edi) + jz 1f + addl %ebx, 0x00(%edi) +1: + addl $8, %edi + decl %ecx + jnz 2b + + /* L3 ident has a single entry. */ + leal rva(pvh_level3_ident_pgt)(%ebp), %edi + addl %ebx, 0x00(%edi) + + leal rva(pvh_level3_kernel_pgt)(%ebp), %edi + addl %ebx, (PAGE_SIZE - 16)(%edi) + addl %ebx, (PAGE_SIZE - 8)(%edi) + + /* pvh_level2_ident_pgt is fine - large pages */ + + /* pvh_level2_kernel_pgt needs adjustment - large pages */ + leal rva(pvh_level2_kernel_pgt)(%ebp), %edi + movl $PTRS_PER_PMD, %ecx +2: + testl $_PAGE_PRESENT, 0x00(%edi) + jz 1f + addl %ebx, 0x00(%edi) +1: + addl $8, %edi + decl %ecx + jnz 2b + +.Lpagetable_done: /* Enable pre-constructed page tables. */ - leal rva(init_top_pgt)(%ebp), %eax + leal rva(pvh_init_top_pgt)(%ebp), %eax mov %eax, %cr3 mov $(X86_CR0_PG | X86_CR0_PE), %eax mov %eax, %cr0 @@ -198,5 +238,67 @@ SYM_DATA_START_LOCAL(early_stack) .fill BOOT_STACK_SIZE, 1, 0 SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) +#ifdef CONFIG_X86_64 +/* + * Xen PVH needs a set of identity mapped and kernel high mapping + * page tables. pvh_start_xen starts running on the identity mapped + * page tables, but xen_prepare_pvh calls into the high mapping. + * These page tables need to be relocatable and are only used until + * startup_64 transitions to init_top_pgt. + */ +SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt) + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0 + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ + .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC +SYM_DATA_END(pvh_init_top_pgt) + +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt) + .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .fill 511, 8, 0 +SYM_DATA_END(pvh_level3_ident_pgt) +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt) + /* + * Since I easily can, map the first 1G. + * Don't set NX because code runs from these pages. + * + * Note: This sets _PAGE_GLOBAL despite whether + * the CPU supports it or it is enabled. But, + * the CPU should ignore the bit. + */ + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) +SYM_DATA_END(pvh_level2_ident_pgt) +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt) + .fill L3_START_KERNEL, 8, 0 + /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ + .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .quad 0 /* no fixmap */ +SYM_DATA_END(pvh_level3_kernel_pgt) + +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt) + /* + * Kernel high mapping. + * + * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in + * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, + * 512 MiB otherwise. + * + * (NOTE: after that starts the module area, see MODULES_VADDR.) + * + * This table is eventually used by the kernel during normal runtime. + * Care must be taken to clear out undesired bits later, like _PAGE_RW + * or _PAGE_GLOBAL in some cases. + */ + PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE) +SYM_DATA_END(pvh_level2_kernel_pgt) + + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC, + .long CONFIG_PHYSICAL_ALIGN; + .long LOAD_PHYSICAL_ADDR; + .long KERNEL_IMAGE_SIZE - 1) +#endif + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, _ASM_PTR (pvh_start_xen - __START_KERNEL_map)) From 4771d2ecb7b9e4c2c73ede2908d7e7c989460981 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 18 Sep 2024 17:07:13 +0800 Subject: [PATCH 456/655] drm/amdgpu/mes12: set enable_level_process_quantum_check enable_level_process_quantum_check is requried to enable process quantum based scheduling. Signed-off-by: Jack Xiao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 186f77813397..8d27421689c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -609,6 +609,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.disable_mes_log = 1; mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.enable_reg_active_poll = 1; + mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; /* * Keep oversubscribe timer for sdma . When we have unmapped doorbell From 126be9b2bef9c7068fdd464790d82e6d70f9d8e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 21 Aug 2024 13:55:41 +0200 Subject: [PATCH 457/655] drm/amdgpu: sync to KFD fences before clearing PTEs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch tries to solve the basic problem we also need to sync to the KFD fences of the BO because otherwise it can be that we clear PTEs while the KFD queues are still running. Signed-off-by: Christian König Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 30 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++++ 3 files changed, 37 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index bdf1ef825d89..c586ab4c911b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -260,6 +260,36 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, return 0; } +/** + * amdgpu_sync_kfd - sync to KFD fences + * + * @sync: sync object to add KFD fences to + * @resv: reservation object with KFD fences + * + * Extract all KFD fences and add them to the sync object. + */ +int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv) +{ + struct dma_resv_iter cursor; + struct dma_fence *f; + int r = 0; + + dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, f) { + void *fence_owner = amdgpu_sync_get_owner(f); + + if (fence_owner != AMDGPU_FENCE_OWNER_KFD) + continue; + + r = amdgpu_sync_fence(sync, f); + if (r) + break; + } + dma_resv_iter_end(&cursor); + + return r; +} + /* Free the entry back to the slab */ static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index cf1e9e858efd..e3272dce798d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -51,6 +51,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); +int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 985b89242b44..6005280f5f38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1169,6 +1169,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, AMDGPU_SYNC_EQ_OWNER, vm); if (r) goto error_free; + if (bo) { + r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv); + if (r) + goto error_free; + } + } else { struct drm_gem_object *obj = &bo->tbo.base; From e1d27f7a9cea1e0c06699164e3b177862e7b4096 Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Thu, 19 Sep 2024 11:38:04 +0800 Subject: [PATCH 458/655] drm/amdgpu: skip coredump after job timeout in SRIOV VF FLR will be triggered by host driver before job timeout, hence the error status of GPU get cleared. Performing a coredump here is unnecessary. Signed-off-by: ZhenGuo Yin Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index ad6bf5d4e0a9..16f2605ac50b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -107,8 +107,11 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) /* * Do the coredump immediately after a job timeout to get a very * close dump/snapshot/representation of GPU's current error status + * Skip it for SRIOV, since VF FLR will be triggered by host driver + * before job timeout */ - amdgpu_job_core_dump(adev, job); + if (!amdgpu_sriov_vf(adev)) + amdgpu_job_core_dump(adev, job); if (amdgpu_gpu_recovery && amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { From 6ae9e1aba97e4cdaa31a0bfdc07497ad0e915c84 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 16 Sep 2024 14:33:58 -0400 Subject: [PATCH 459/655] drm/amdkfd: Update logic for CU occupancy calculations Currently, the code uses the IH_VMID_X_LUT register to map a queue's vmid to the corresponding PASID. This logic is racy since CP can update the VMID-PASID mapping anytime especially when there are more processes than number of vmids. Update the logic to calculate CU occupancy by matching doorbell offset of the queue with valid wave counts against the process's queues. Signed-off-by: Mukul Joshi Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 98 ++++++++----------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 5 +- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 20 ++++ .../drm/amd/amdkfd/kfd_device_queue_manager.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 ++- .../gpu/drm/amd/include/kgd_kfd_interface.h | 10 +- 6 files changed, 87 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 1254a43ec96b..26b1f37c316e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -950,28 +950,30 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev) * @inst: xcc's instance number on a multi-XCC setup */ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, - int *wave_cnt, int *vmid, uint32_t inst) + struct kfd_cu_occupancy *queue_cnt, uint32_t inst) { int pipe_idx; int queue_slot; unsigned int reg_val; - + unsigned int wave_cnt; /* * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID * parameters to read out waves in flight. Get VMID if there are * non-zero waves in flight. */ - *vmid = 0xFF; - *wave_cnt = 0; pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst); - reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + - queue_slot); - *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; - if (*wave_cnt != 0) - *vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) & - CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT; + reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, + mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot); + wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; + if (wave_cnt != 0) { + queue_cnt->wave_cnt += wave_cnt; + queue_cnt->doorbell_off = + (RREG32_SOC15(GC, inst, mmCP_HQD_PQ_DOORBELL_CONTROL) & + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + } } /** @@ -981,9 +983,8 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * or more queues running and submitting waves to compute units. * * @adev: Handle of device from which to get number of waves in flight - * @pasid: Identifies the process for which this query call is invoked - * @pasid_wave_cnt: Output parameter updated with number of waves in flight that - * belong to process with given pasid + * @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset + * for comparison later. * @max_waves_per_cu: Output parameter updated with maximum number of waves * possible per Compute Unit * @inst: xcc's instance number on a multi-XCC setup @@ -1011,30 +1012,24 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * number of waves that are in flight for the queue at specified index. The * index ranges from 0 to 7. * - * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID - * of the wave(s). + * If non-zero waves are in flight, store the corresponding doorbell offset + * of the queue, along with the wave count. * - * Determine if VMID from above step maps to pasid provided as parameter. If - * it matches agrregate the wave count. That the VMID will not match pasid is - * a normal condition i.e. a device is expected to support multiple queues - * from multiple proceses. + * Determine if the queue belongs to the process by comparing the doorbell + * offset against the process's queues. If it matches, aggregate the wave + * count for the process. * * Reading registers referenced above involves programming GRBM appropriately */ -void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, - int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst) +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, + struct kfd_cu_occupancy *cu_occupancy, + int *max_waves_per_cu, uint32_t inst) { int qidx; - int vmid; int se_idx; - int sh_idx; int se_cnt; - int sh_cnt; - int wave_cnt; int queue_map; - int pasid_tmp; int max_queue_cnt; - int vmid_wave_cnt = 0; DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES); lock_spi_csq_mutexes(adev); @@ -1048,42 +1043,30 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, AMDGPU_MAX_QUEUES); max_queue_cnt = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - sh_cnt = adev->gfx.config.max_sh_per_se; se_cnt = adev->gfx.config.max_shader_engines; for (se_idx = 0; se_idx < se_cnt; se_idx++) { - for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { + amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst); + queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS); - amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst); - queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS); - - /* - * Assumption: queue map encodes following schema: four - * pipes per each micro-engine, with each pipe mapping - * eight queues. This schema is true for GFX9 devices - * and must be verified for newer device families + /* + * Assumption: queue map encodes following schema: four + * pipes per each micro-engine, with each pipe mapping + * eight queues. This schema is true for GFX9 devices + * and must be verified for newer device families + */ + for (qidx = 0; qidx < max_queue_cnt; qidx++) { + /* Skip qeueus that are not associated with + * compute functions */ - for (qidx = 0; qidx < max_queue_cnt; qidx++) { + if (!test_bit(qidx, cp_queue_bitmap)) + continue; - /* Skip qeueus that are not associated with - * compute functions - */ - if (!test_bit(qidx, cp_queue_bitmap)) - continue; + if (!(queue_map & (1 << qidx))) + continue; - if (!(queue_map & (1 << qidx))) - continue; - - /* Get number of waves in flight and aggregate them */ - get_wave_count(adev, qidx, &wave_cnt, &vmid, - inst); - if (wave_cnt != 0) { - pasid_tmp = - RREG32(SOC15_REG_OFFSET(OSSSYS, inst, - mmIH_VMID_0_LUT) + vmid); - if (pasid_tmp == pasid) - vmid_wave_cnt += wave_cnt; - } - } + /* Get number of waves in flight and aggregate them */ + get_wave_count(adev, qidx, &cu_occupancy[qidx], + inst); } } @@ -1092,7 +1075,6 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, unlock_spi_csq_mutexes(adev); /* Update the output parameters and return */ - *pasid_wave_cnt = vmid_wave_cnt; *max_waves_per_cu = adev->gfx.cu_info.simd_per_cu * adev->gfx.cu_info.max_waves_per_simd; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 988c50ac3be0..b6a91a552aa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -52,8 +52,9 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); -void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, - int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst); +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, + struct kfd_cu_occupancy *cu_occupancy, + int *max_waves_per_cu, uint32_t inst); void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 71b465f8d83e..29578550b478 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -3540,6 +3540,26 @@ int debug_refresh_runlist(struct device_queue_manager *dqm) return debug_map_and_unlock(dqm); } +bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + int doorbell_off) +{ + struct queue *q; + bool r = false; + + dqm_lock(dqm); + + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->properties.doorbell_off == doorbell_off) { + r = true; + goto out; + } + } + +out: + dqm_unlock(dqm); + return r; +} #if defined(CONFIG_DEBUG_FS) static void seq_reg_dump(struct seq_file *m, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 08b40826ad1e..80be2036abea 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -324,6 +324,9 @@ void set_queue_snapshot_entry(struct queue *q, int debug_lock_and_unmap(struct device_queue_manager *dqm); int debug_map_and_unlock(struct device_queue_manager *dqm); int debug_refresh_runlist(struct device_queue_manager *dqm); +bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + int doorbell_off); static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a902950cc060..d73841268c9b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -270,6 +270,10 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) struct kfd_node *dev = NULL; struct kfd_process *proc = NULL; struct kfd_process_device *pdd = NULL; + int i; + struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES]; + + memset(cu_occupancy, 0x0, sizeof(cu_occupancy)); pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy); dev = pdd->dev; @@ -287,9 +291,17 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) /* Collect wave count from device if it supports */ wave_cnt = 0; max_waves_per_cu = 0; - dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt, + + dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy, &max_waves_per_cu, 0); + for (i = 0; i < AMDGPU_MAX_QUEUES; i++) { + if (cu_occupancy[i].wave_cnt != 0 && + kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd, + cu_occupancy[i].doorbell_off)) + wave_cnt += cu_occupancy[i].wave_cnt; + } + /* Translate wave count to number of compute units */ cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu; return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 7744ca3ef4b1..e3e635a31b8a 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -71,6 +71,11 @@ enum kgd_memory_pool { KGD_POOL_FRAMEBUFFER = 3, }; +struct kfd_cu_occupancy { + u32 wave_cnt; + u32 doorbell_off; +}; + /** * enum kfd_sched_policy * @@ -313,8 +318,9 @@ struct kfd2kgd_calls { uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); - void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid, - int *wave_cnt, int *max_waves_per_cu, uint32_t inst); + void (*get_cu_occupancy)(struct amdgpu_device *adev, + struct kfd_cu_occupancy *cu_occupancy, + int *max_waves_per_cu, uint32_t inst); void (*program_trap_handler_settings)(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst); From e45b011d2c4146442a388113657b70f0c7cad09b Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 20 Sep 2024 14:59:29 -0400 Subject: [PATCH 460/655] drm/amdkfd: Fix CU occupancy for GFX 9.4.3 Make CU occupancy calculations work on GFX 9.4.3 by updating the logic to handle multiple XCCs correctly. Signed-off-by: Mukul Joshi Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 +++++------ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +++++- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 20 ++++++++++++++++--- 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 26b1f37c316e..3bc0cbf45bc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -963,14 +963,14 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, */ pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; - soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst); - reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, + soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst)); + reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot); wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; if (wave_cnt != 0) { queue_cnt->wave_cnt += wave_cnt; queue_cnt->doorbell_off = - (RREG32_SOC15(GC, inst, mmCP_HQD_PQ_DOORBELL_CONTROL) & + (RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) & CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >> CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; } @@ -1033,7 +1033,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES); lock_spi_csq_mutexes(adev); - soc15_grbm_select(adev, 1, 0, 0, 0, inst); + soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst)); /* * Iterate through the shader engines and arrays of the device @@ -1046,7 +1046,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, se_cnt = adev->gfx.config.max_shader_engines; for (se_idx = 0; se_idx < se_cnt; se_idx++) { amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst); - queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS); + queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS); /* * Assumption: queue map encodes following schema: four @@ -1071,7 +1071,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, } amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst); - soc15_grbm_select(adev, 0, 0, 0, 0, inst); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst)); unlock_spi_csq_mutexes(adev); /* Update the output parameters and return */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 29578550b478..648f40091aa3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -3542,15 +3542,19 @@ int debug_refresh_runlist(struct device_queue_manager *dqm) bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, struct qcm_process_device *qpd, - int doorbell_off) + int doorbell_off, u32 *queue_format) { struct queue *q; bool r = false; + if (!queue_format) + return r; + dqm_lock(dqm); list_for_each_entry(q, &qpd->queues_list, list) { if (q->properties.doorbell_off == doorbell_off) { + *queue_format = q->properties.format; r = true; goto out; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 80be2036abea..09ab36f8e8c6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -326,7 +326,7 @@ int debug_map_and_unlock(struct device_queue_manager *dqm); int debug_refresh_runlist(struct device_queue_manager *dqm); bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, struct qcm_process_device *qpd, - int doorbell_off); + int doorbell_off, u32 *queue_format); static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d73841268c9b..d07acf1b2f93 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -272,6 +272,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) struct kfd_process_device *pdd = NULL; int i; struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES]; + u32 queue_format; memset(cu_occupancy, 0x0, sizeof(cu_occupancy)); @@ -292,14 +293,27 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) wave_cnt = 0; max_waves_per_cu = 0; + /* + * For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition. + * For AQL queues, because of cooperative dispatch we multiply the wave count + * by number of XCCs in the partition to get the total wave counts across all + * XCCs in the partition. + * For PM4 queues, there is no cooperative dispatch so wave_cnt stay as it is. + */ dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy, - &max_waves_per_cu, 0); + &max_waves_per_cu, ffs(dev->xcc_mask) - 1); for (i = 0; i < AMDGPU_MAX_QUEUES; i++) { if (cu_occupancy[i].wave_cnt != 0 && kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd, - cu_occupancy[i].doorbell_off)) - wave_cnt += cu_occupancy[i].wave_cnt; + cu_occupancy[i].doorbell_off, + &queue_format)) { + if (unlikely(queue_format == KFD_QUEUE_FORMAT_PM4)) + wave_cnt += cu_occupancy[i].wave_cnt; + else + wave_cnt += (NUM_XCC(dev->xcc_mask) * + cu_occupancy[i].wave_cnt); + } } /* Translate wave count to number of compute units */ From 8048e5ade8224969023902b0b3f64470f9c250a7 Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Fri, 20 Sep 2024 18:40:18 +0530 Subject: [PATCH 461/655] drm/amdgpu/vcn: enable AV1 on both instances v1 - remove cs parse code (Christian) On VCN v4_0_6 AV1 is supported on both the instances. Remove cs IB parse code since explict handling of AV1 schedule is not required. Signed-off-by: Saleemkhan Jamadar Reviewed-by: Leo Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 165 ------------------------ 1 file changed, 165 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index b1fd226b7efb..9d4f5352a62c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1395,170 +1395,6 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring) } } -static int vcn_v4_0_5_limit_sched(struct amdgpu_cs_parser *p, - struct amdgpu_job *job) -{ - struct drm_gpu_scheduler **scheds; - - /* The create msg must be in the first IB submitted */ - if (atomic_read(&job->base.entity->fence_seq)) - return -EINVAL; - - /* if VCN0 is harvested, we can't support AV1 */ - if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) - return -EINVAL; - - scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] - [AMDGPU_RING_PRIO_0].sched; - drm_sched_entity_modify_sched(job->base.entity, scheds, 1); - return 0; -} - -static int vcn_v4_0_5_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, - uint64_t addr) -{ - struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_bo_va_mapping *map; - uint32_t *msg, num_buffers; - struct amdgpu_bo *bo; - uint64_t start, end; - unsigned int i; - void *ptr; - int r; - - addr &= AMDGPU_GMC_HOLE_MASK; - r = amdgpu_cs_find_mapping(p, addr, &bo, &map); - if (r) { - DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); - return r; - } - - start = map->start * AMDGPU_GPU_PAGE_SIZE; - end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; - if (addr & 0x7) { - DRM_ERROR("VCN messages must be 8 byte aligned!\n"); - return -EINVAL; - } - - bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) { - DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); - return r; - } - - r = amdgpu_bo_kmap(bo, &ptr); - if (r) { - DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); - return r; - } - - msg = ptr + addr - start; - - /* Check length */ - if (msg[1] > end - addr) { - r = -EINVAL; - goto out; - } - - if (msg[3] != RDECODE_MSG_CREATE) - goto out; - - num_buffers = msg[2]; - for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { - uint32_t offset, size, *create; - - if (msg[0] != RDECODE_MESSAGE_CREATE) - continue; - - offset = msg[1]; - size = msg[2]; - - if (offset + size > end) { - r = -EINVAL; - goto out; - } - - create = ptr + addr + offset - start; - - /* H264, HEVC and VP9 can run on any instance */ - if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) - continue; - - r = vcn_v4_0_5_limit_sched(p, job); - if (r) - goto out; - } - -out: - amdgpu_bo_kunmap(bo); - return r; -} - -#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002) -#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) - -#define RADEON_VCN_ENGINE_INFO (0x30000001) -#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16 - -#define RENCODE_ENCODE_STANDARD_AV1 2 -#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 -#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64 - -/* return the offset in ib if id is found, -1 otherwise - * to speed up the searching we only search upto max_offset - */ -static int vcn_v4_0_5_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset) -{ - int i; - - for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) { - if (ib->ptr[i + 1] == id) - return i; - } - return -1; -} - -static int vcn_v4_0_5_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, - struct amdgpu_job *job, - struct amdgpu_ib *ib) -{ - struct amdgpu_ring *ring = amdgpu_job_ring(job); - struct amdgpu_vcn_decode_buffer *decode_buffer; - uint64_t addr; - uint32_t val; - int idx; - - /* The first instance can decode anything */ - if (!ring->me) - return 0; - - /* RADEON_VCN_ENGINE_INFO is at the top of ib block */ - idx = vcn_v4_0_5_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, - RADEON_VCN_ENGINE_INFO_MAX_OFFSET); - if (idx < 0) /* engine info is missing */ - return 0; - - val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ - if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { - decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; - - if (!(decode_buffer->valid_buf_flag & 0x1)) - return 0; - - addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | - decode_buffer->msg_buffer_address_lo; - return vcn_v4_0_5_dec_msg(p, job, addr); - } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { - idx = vcn_v4_0_5_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, - RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); - if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1) - return vcn_v4_0_5_limit_sched(p, job); - } - return 0; -} - static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1566,7 +1402,6 @@ static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .get_rptr = vcn_v4_0_5_unified_ring_get_rptr, .get_wptr = vcn_v4_0_5_unified_ring_get_wptr, .set_wptr = vcn_v4_0_5_unified_ring_set_wptr, - .patch_cs_in_place = vcn_v4_0_5_ring_patch_cs_in_place, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + From d52ac79053a2f3eba04c1e7b56334df84d1d289f Mon Sep 17 00:00:00 2001 From: Sreekant Somasekharan Date: Fri, 20 Sep 2024 01:53:17 -0400 Subject: [PATCH 462/655] drm/amdkfd: Add SDMA queue quantum support for GFX12 program SDMAx_QUEUEx_SCHEDULE_CNTL for context switch due to quantum in KFD for GFX12. Signed-off-by: Sreekant Somasekharan Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index d163d92a692f..2b72d5b4949b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -341,6 +341,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, m->sdmax_rlcx_doorbell_offset = q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum + << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT) + & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK; + m->sdma_engine_id = q->sdma_engine_id; m->sdma_queue_id = q->sdma_queue_id; From 2bcae12c795f32ddfbf8c80d1b5f1d3286341c32 Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Tue, 10 Sep 2024 10:53:51 +0200 Subject: [PATCH 463/655] net/mlx5: Fix error path in multi-packet WQE transmit Remove the erroneous unmap in case no DMA mapping was established The multi-packet WQE transmit code attempts to obtain a DMA mapping for the skb. This could fail, e.g. under memory pressure, when the IOMMU driver just can't allocate more memory for page tables. While the code tries to handle this in the path below the err_unmap label it erroneously unmaps one entry from the sq's FIFO list of active mappings. Since the current map attempt failed this unmap is removing some random DMA mapping that might still be required. If the PCI function now presents that IOVA, the IOMMU may assumes a rogue DMA access and e.g. on s390 puts the PCI function in error state. The erroneous behavior was seen in a stress-test environment that created memory pressure. Fixes: 5af75c747e2a ("net/mlx5e: Enhanced TX MPWQE for SKBs") Signed-off-by: Gerd Bayer Reviewed-by: Zhu Yanjun Acked-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index b09e9abd39f3..f8c7912abe0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -642,7 +642,6 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, return; err_unmap: - mlx5e_dma_unmap_wqe_err(sq, 1); sq->stats->dropped++; dev_kfree_skb_any(skb); mlx5e_tx_flush(sq); From ec793155894140df7421d25903de2e6bc12c695b Mon Sep 17 00:00:00 2001 From: Mohamed Khalfella Date: Wed, 4 Sep 2024 22:02:48 -0600 Subject: [PATCH 464/655] net/mlx5: Added cond_resched() to crdump collection Collecting crdump involves reading vsc registers from pci config space of mlx device, which can take long time to complete. This might result in starving other threads waiting to run on the cpu. Numbers I got from testing ConnectX-5 Ex MCX516A-CDAT in the lab: - mlx5_vsc_gw_read_block_fast() was called with length = 1310716. - mlx5_vsc_gw_read_fast() reads 4 bytes at a time. It was not used to read the entire 1310716 bytes. It was called 53813 times because there are jumps in read_addr. - On average mlx5_vsc_gw_read_fast() took 35284.4ns. - In total mlx5_vsc_wait_on_flag() called vsc_read() 54707 times. The average time for each call was 17548.3ns. In some instances vsc_read() was called more than one time when the flag was not set. As expected the thread released the cpu after 16 iterations in mlx5_vsc_wait_on_flag(). - Total time to read crdump was 35284.4ns * 53813 ~= 1.898s. It was seen in the field that crdump can take more than 5 seconds to complete. During that time mlx5_vsc_wait_on_flag() did not release the cpu because it did not complete 16 iterations. It is believed that pci config reads were slow. Adding cond_resched() every 128 register read improves the situation. In the common case the, crdump takes ~1.8989s, the thread yields the cpu every ~4.51ms. If crdump takes ~5s, the thread yields the cpu every ~18.0ms. Fixes: 8b9d8baae1de ("net/mlx5: Add Crdump support") Reviewed-by: Yuanyuan Zhong Signed-off-by: Mohamed Khalfella Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c index d0b595ba6110..432c98f2626d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c @@ -24,6 +24,11 @@ pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) #define VSC_MAX_RETRIES 2048 +/* Reading VSC registers can take relatively long time. + * Yield the cpu every 128 registers read. + */ +#define VSC_GW_READ_BLOCK_COUNT 128 + enum { VSC_CTRL_OFFSET = 0x4, VSC_COUNTER_OFFSET = 0x8, @@ -273,6 +278,7 @@ int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, { unsigned int next_read_addr = 0; unsigned int read_addr = 0; + unsigned int count = 0; while (read_addr < length) { if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr, @@ -280,6 +286,10 @@ int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, return read_addr; read_addr = next_read_addr; + if (++count == VSC_GW_READ_BLOCK_COUNT) { + cond_resched(); + count = 0; + } } return length; } From f25389e779500cf4a59ef9804534237841bce536 Mon Sep 17 00:00:00 2001 From: Elena Salomatkina Date: Tue, 24 Sep 2024 19:00:18 +0300 Subject: [PATCH 465/655] net/mlx5e: Fix NULL deref in mlx5e_tir_builder_alloc() In mlx5e_tir_builder_alloc() kvzalloc() may return NULL which is dereferenced on the next line in a reference to the modify field. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: a6696735d694 ("net/mlx5e: Convert TIR to a dedicated object") Signed-off-by: Elena Salomatkina Reviewed-by: Simon Horman Reviewed-by: Kalesh AP Reviewed-by: Tariq Toukan Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c index d4239e3b3c88..11f724ad90db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c @@ -23,6 +23,9 @@ struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify) struct mlx5e_tir_builder *builder; builder = kvzalloc(sizeof(*builder), GFP_KERNEL); + if (!builder) + return NULL; + builder->modify = modify; return builder; From 19da17010a55924f2b5540b0f61652cc5781af85 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Mon, 23 Sep 2024 11:44:30 +0300 Subject: [PATCH 466/655] net/mlx5: Fix wrong reserved field in hca_cap_2 in mlx5_ifc Fixing the wrong size of a field in hca_cap_2. The bug was introduced by adding new fields for HWS and not fixing the reserved field size. Fixes: 34c626c3004a ("net/mlx5: Added missing mlx5_ifc definition for HW Steering") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 620a5c305123..04df1610736e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2115,7 +2115,7 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 ts_cqe_metadata_size2wqe_counter[0x5]; u8 reserved_at_250[0x10]; - u8 reserved_at_260[0x120]; + u8 reserved_at_260[0x20]; u8 format_select_dw_gtpu_dw_0[0x8]; u8 format_select_dw_gtpu_dw_1[0x8]; From d8c561741ef83980114b3f7f95ffac54600f3f16 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 19 Sep 2024 12:17:59 +0300 Subject: [PATCH 467/655] net/mlx5: HWS, fixed double-free in error flow of creating SQ When SQ creation fails, call the appropriate mlx5_core destroy function. This fixes the following smatch warnings: divers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c:739 hws_send_ring_open_sq() warn: 'sq->dep_wqe' double freed hws_send_ring_open_sq() warn: 'sq->wq_ctrl.buf.frags' double freed hws_send_ring_open_sq() warn: 'sq->wr_priv' double freed Fixes: 2ca62599aa0b ("net/mlx5: HWS, added send engine and context handling") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/e4ebc227-4b25-49bf-9e4c-14b7ea5c6a07@stanley.mountain/ Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/hws/mlx5hws_send.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c index a1adbb48735c..0c7989184c30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c @@ -653,6 +653,12 @@ static int hws_send_ring_create_sq(struct mlx5_core_dev *mdev, u32 pdn, return err; } +static void hws_send_ring_destroy_sq(struct mlx5_core_dev *mdev, + struct mlx5hws_send_ring_sq *sq) +{ + mlx5_core_destroy_sq(mdev, sq->sqn); +} + static int hws_send_ring_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn) { void *in, *sqc; @@ -696,7 +702,7 @@ static int hws_send_ring_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn, err = hws_send_ring_set_sq_rdy(mdev, sq->sqn); if (err) - hws_send_ring_close_sq(sq); + hws_send_ring_destroy_sq(mdev, sq); return err; } From d15525f300109fac5477dce1b8fef244c5dc9ec3 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Mon, 16 Sep 2024 14:13:39 +0300 Subject: [PATCH 468/655] net/mlx5: HWS, changed E2BIG error to a negative return code Fixed all the 'E2BIG' returns in error flow of functions to the negative '-E2BIG' as we are using negative error codes everywhere in HWS code. This also fixes the following smatch warnings: "warn: was negative '-E2BIG' intended?" Fixes: 74a778b4a63f ("net/mlx5: HWS, added definers handling") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/f8c77688-7d83-4937-baba-ac844dfe2e0b@stanley.mountain/ Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c | 2 +- .../mellanox/mlx5/core/steering/hws/mlx5hws_definer.c | 4 ++-- .../mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c index bb563f50ef09..601fad5fc54a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c @@ -33,7 +33,7 @@ bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, * and let the usual match creation path handle it, * both for good and bad flows. */ - if (ret == E2BIG) { + if (ret == -E2BIG) { is_complex = true; mlx5hws_dbg(ctx, "Matcher definer layout: need complex matcher\n"); } else { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.c index 3bdb5c90efff..d566d2ddf424 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.c @@ -1845,7 +1845,7 @@ hws_definer_find_best_match_fit(struct mlx5hws_context *ctx, return 0; } - return E2BIG; + return -E2BIG; } static void @@ -1931,7 +1931,7 @@ mlx5hws_definer_calc_layout(struct mlx5hws_context *ctx, /* Find the match definer layout for header layout match union */ ret = hws_definer_find_best_match_fit(ctx, match_definer, match_hl); if (ret) { - if (ret == E2BIG) + if (ret == -E2BIG) mlx5hws_dbg(ctx, "Failed to create match definer from header layout - E2BIG\n"); else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c index 33d2b31e4b46..61a1155d4b4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c @@ -675,7 +675,7 @@ static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher) if (!(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)) { ret = mlx5hws_definer_mt_init(ctx, matcher->mt); if (ret) { - if (ret == E2BIG) + if (ret == -E2BIG) mlx5hws_err(ctx, "Failed to set matcher templates with match definers\n"); return ret; } From 023d2a43ed0d9ab73d4a35757121e4c8e01298e5 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 13 Aug 2024 13:34:54 +0300 Subject: [PATCH 469/655] net/mlx5e: SHAMPO, Fix overflow of hd_per_wq When having larger RQ sizes and small MTUs sizes, the hd_per_wq variable can overflow. Like in the following case: $> ethtool --set-ring eth1 rx 8192 $> ip link set dev eth1 mtu 144 $> ethtool --features eth1 rx-gro-hw on ... yields in dmesg: mlx5_core 0000:08:00.1: mlx5_cmd_out_err:808:(pid 194797): CREATE_MKEY(0x200) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x3bf6f), err(-22) because hd_per_wq is 64K which overflows to 0 and makes the command fail. This patch increases the variable size to 32 bit. Fixes: 99be56171fa9 ("net/mlx5e: SHAMPO, Re-enable HW-GRO") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index da0a1c65ec4a..57b7298a0e79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -627,7 +627,7 @@ struct mlx5e_shampo_hd { struct mlx5e_dma_info *info; struct mlx5e_frag_page *pages; u16 curr_page_index; - u16 hd_per_wq; + u32 hd_per_wq; u16 hd_per_wqe; unsigned long *bitmap; u16 pi; From 7b124695db40d5c9c5295a94ae928a8d67a01c3d Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 2 Sep 2024 09:40:58 +0300 Subject: [PATCH 470/655] net/mlx5e: Fix crash caused by calling __xfrm_state_delete() twice The km.state is not checked in driver's delayed work. When xfrm_state_check_expire() is called, the state can be reset to XFRM_STATE_EXPIRED, even if it is XFRM_STATE_DEAD already. This happens when xfrm state is deleted, but not freed yet. As __xfrm_state_delete() is called again in xfrm timer, the following crash occurs. To fix this issue, skip xfrm_state_check_expire() if km.state is not XFRM_STATE_VALID. Oops: general protection fault, probably for non-canonical address 0xdead000000000108: 0000 [#1] SMP CPU: 5 UID: 0 PID: 7448 Comm: kworker/u102:2 Not tainted 6.11.0-rc2+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5e_ipsec: eth%d mlx5e_ipsec_handle_sw_limits [mlx5_core] RIP: 0010:__xfrm_state_delete+0x3d/0x1b0 Code: 0f 84 8b 01 00 00 48 89 fd c6 87 c8 00 00 00 05 48 8d bb 40 10 00 00 e8 11 04 1a 00 48 8b 95 b8 00 00 00 48 8b 85 c0 00 00 00 <48> 89 42 08 48 89 10 48 8b 55 10 48 b8 00 01 00 00 00 00 ad de 48 RSP: 0018:ffff88885f945ec8 EFLAGS: 00010246 RAX: dead000000000122 RBX: ffffffff82afa940 RCX: 0000000000000036 RDX: dead000000000100 RSI: 0000000000000000 RDI: ffffffff82afb980 RBP: ffff888109a20340 R08: ffff88885f945ea0 R09: 0000000000000000 R10: 0000000000000000 R11: ffff88885f945ff8 R12: 0000000000000246 R13: ffff888109a20340 R14: ffff88885f95f420 R15: ffff88885f95f400 FS: 0000000000000000(0000) GS:ffff88885f940000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2163102430 CR3: 00000001128d6001 CR4: 0000000000370eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? die_addr+0x33/0x90 ? exc_general_protection+0x1a2/0x390 ? asm_exc_general_protection+0x22/0x30 ? __xfrm_state_delete+0x3d/0x1b0 ? __xfrm_state_delete+0x2f/0x1b0 xfrm_timer_handler+0x174/0x350 ? __xfrm_state_delete+0x1b0/0x1b0 __hrtimer_run_queues+0x121/0x270 hrtimer_run_softirq+0x88/0xd0 handle_softirqs+0xcc/0x270 do_softirq+0x3c/0x50 __local_bh_enable_ip+0x47/0x50 mlx5e_ipsec_handle_sw_limits+0x7d/0x90 [mlx5_core] process_one_work+0x137/0x2d0 worker_thread+0x28d/0x3a0 ? rescuer_thread+0x480/0x480 kthread+0xb8/0xe0 ? kthread_park+0x80/0x80 ret_from_fork+0x2d/0x50 ? kthread_park+0x80/0x80 ret_from_fork_asm+0x11/0x20 Fixes: b2f7b01d36a9 ("net/mlx5e: Simulate missing IPsec TX limits hardware functionality") Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 3d274599015b..ca92e518be76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -67,7 +67,6 @@ static void mlx5e_ipsec_handle_sw_limits(struct work_struct *_work) return; spin_lock_bh(&x->lock); - xfrm_state_check_expire(x); if (x->km.state == XFRM_STATE_EXPIRED) { sa_entry->attrs.drop = true; spin_unlock_bh(&x->lock); @@ -75,6 +74,13 @@ static void mlx5e_ipsec_handle_sw_limits(struct work_struct *_work) mlx5e_accel_ipsec_fs_modify(sa_entry); return; } + + if (x->km.state != XFRM_STATE_VALID) { + spin_unlock_bh(&x->lock); + return; + } + + xfrm_state_check_expire(x); spin_unlock_bh(&x->lock); queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork, From 4c411cca33cf1c21946b710b2eb59aca9f646703 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 13 Sep 2024 19:51:43 +0300 Subject: [PATCH 471/655] intel_idle: fix ACPI _CST matching for newer Xeon platforms Background ~~~~~~~~~~ The driver uses 'use_acpi = true' in C-state custom table for all Xeon platforms. The meaning of this flag is as follows. 1. If a C-state from the custom table is defined in ACPI _CST (matched by the mwait hint), then enable this C-state. 2. Otherwise, disable this C-state, unless the C-sate definition in the custom table has the 'CPUIDLE_FLAG_ALWAYS_ENABLE' flag set, in which case enabled it. The goal is to honor BIOS C6 settings - If BIOS disables C6, disable it by default in the OS too (but it can be enabled via sysfs). This works well on Xeons that expose only one flavor of C6. This are all Xeons except for the newest Granite Rapids (GNR) and Sierra Forest (SRF). The problem ~~~~~~~~~~~ GNR and SRF have 2 flavors of C6: C6/C6P on GNR, C6S/C6SP on SRF. The the "P" flavor allows for the package C6, while the "non-P" flavor allows only for core/module C6. As far as this patch is concerned, both GNR and SRF platforms are handled the same way. Therefore, further discussion is focused on GNR, but it applies to SRF as well. On Intel Xeon platforms, BIOS exposes only 2 ACPI C-states: C1 and C2. Well, depending on BIOS settings, C2 may be named as C3. But there still will be only 2 states - C1 and C3. But this is a non-essential detail, so further discussion is focused on the ACPI C1 and C2 case. On pre-GNR/SRF Xeon platforms, ACPI C1 is mapped to C1 or C1E, and ACPI C2 is mapped to C6. The 'use_acpi' flag works just fine: * If ACPI C2 enabled, enable C6. * Otherwise, disable C6. However, on GNR there are 2 flavors of C6, so BIOS maps ACPI C2 to either C6 or C6P, depending on the user settings. As a result, due to the 'use_acpi' flag, 'intel_idle' disables least one of the C6 flavors. BIOS | OS | Verdict ----------------------------------------------------|--------- ACPI C2 disabled | C6 disabled, C6P disabled | OK ACPI C2 mapped to C6 | C6 enabled, C6P disabled | Not OK ACPI C2 mapped to C6P | C6 disabled, C6P enabled | Not OK The goal of 'use_acpi' is to honor BIOS ACPI C2 disabled case, which works fine. But if ACPI C2 is enabled, the goal is to enable all flavors of C6, not just one of the flavors. This was overlooked when enabling GNR/SRF platforms. In other words, before GNR/SRF, the ACPI C2 status was binary - enabled or disabled. But it is not binary on GNR/SRF, however the goal is to continue treat it as binary. The fix ~~~~~~~ Notice, that current algorithm matches ACPI and custom table C-states by the mwait hint. However, mwait hint consists of the 'state' and 'sub-state' parts, and all C6 flavors have the same state value of 0x20, but different sub-state values. Introduce new C-state table flag - CPUIDLE_FLAG_PARTIAL_HINT_MATCH and add it to both C6 flavors of the GNR/SRF platforms. When matching ACPI _CST and custom table C-states, match only the start part if the C-state has CPUIDLE_FLAG_PARTIAL_HINT_MATCH, other wise match both state and sub-state parts (as before). With this fix, GNR C-states enabled/disabled status looks like this. BIOS | OS ---------------------------------------------------- ACPI C2 disabled | C6 disabled, C6P disabled ACPI C2 mapped to C6 | C6 enabled, C6P enabled ACPI C2 mapped to C6P | C6 enabled, C6P enabled Possible alternative ~~~~~~~~~~~~~~~~~~~~ The alternative would be to remove 'use_acpi' flag for GNR and SRF. This would be a simpler solution, but it would violate the principle of least surprise - users of Xeon platforms are used to the fact that intel_idle honors C6 enabled/disabled flag. It is more consistent user experience if GNR/SRF continue doing so. How tested ~~~~~~~~~~ Tested on GNR and SRF platform with all the 3 BIOS configurations: ACPI C2 disabled, mapped to C6/C6S, mapped to C6P/C6SP. Tested on Ice lake Xeon and Sapphire Rapids Xeon platforms with ACPI C2 enabled and disabled, just to verify that the patch does not break older Xeons. Fixes: 92813fd5b156 ("intel_idle: add Sierra Forest SoC support") Fixes: 370406bf5738 ("intel_idle: add Granite Rapids Xeon support") Cc: 6.8+ # 6.8+ Signed-off-by: Artem Bityutskiy Link: https://patch.msgid.link/20240913165143.4140073-1-dedekind1@gmail.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 9457e34b9e32..67aebfe0fed6 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -120,6 +120,12 @@ static unsigned int mwait_substates __initdata; */ #define CPUIDLE_FLAG_INIT_XSTATE BIT(17) +/* + * Ignore the sub-state when matching mwait hints between the ACPI _CST and + * custom tables. + */ +#define CPUIDLE_FLAG_PARTIAL_HINT_MATCH BIT(18) + /* * MWAIT takes an 8-bit "hint" in EAX "suggesting" * the C-state (top nibble) and sub-state (bottom nibble) @@ -1043,7 +1049,8 @@ static struct cpuidle_state gnr_cstates[] __initdata = { .name = "C6", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | - CPUIDLE_FLAG_INIT_XSTATE, + CPUIDLE_FLAG_INIT_XSTATE | + CPUIDLE_FLAG_PARTIAL_HINT_MATCH, .exit_latency = 170, .target_residency = 650, .enter = &intel_idle, @@ -1052,7 +1059,8 @@ static struct cpuidle_state gnr_cstates[] __initdata = { .name = "C6P", .desc = "MWAIT 0x21", .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED | - CPUIDLE_FLAG_INIT_XSTATE, + CPUIDLE_FLAG_INIT_XSTATE | + CPUIDLE_FLAG_PARTIAL_HINT_MATCH, .exit_latency = 210, .target_residency = 1000, .enter = &intel_idle, @@ -1354,7 +1362,8 @@ static struct cpuidle_state srf_cstates[] __initdata = { { .name = "C6S", .desc = "MWAIT 0x22", - .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED | + CPUIDLE_FLAG_PARTIAL_HINT_MATCH, .exit_latency = 270, .target_residency = 700, .enter = &intel_idle, @@ -1362,7 +1371,8 @@ static struct cpuidle_state srf_cstates[] __initdata = { { .name = "C6SP", .desc = "MWAIT 0x23", - .flags = MWAIT2flg(0x23) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x23) | CPUIDLE_FLAG_TLB_FLUSHED | + CPUIDLE_FLAG_PARTIAL_HINT_MATCH, .exit_latency = 310, .target_residency = 900, .enter = &intel_idle, @@ -1744,7 +1754,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) } } -static bool __init intel_idle_off_by_default(u32 mwait_hint) +static bool __init intel_idle_off_by_default(unsigned int flags, u32 mwait_hint) { int cstate, limit; @@ -1761,7 +1771,15 @@ static bool __init intel_idle_off_by_default(u32 mwait_hint) * the interesting states are ACPI_CSTATE_FFH. */ for (cstate = 1; cstate < limit; cstate++) { - if (acpi_state_table.states[cstate].address == mwait_hint) + u32 acpi_hint = acpi_state_table.states[cstate].address; + u32 table_hint = mwait_hint; + + if (flags & CPUIDLE_FLAG_PARTIAL_HINT_MATCH) { + acpi_hint &= ~MWAIT_SUBSTATE_MASK; + table_hint &= ~MWAIT_SUBSTATE_MASK; + } + + if (acpi_hint == table_hint) return false; } return true; @@ -1771,7 +1789,10 @@ static bool __init intel_idle_off_by_default(u32 mwait_hint) static inline bool intel_idle_acpi_cst_extract(void) { return false; } static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } -static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } +static inline bool intel_idle_off_by_default(unsigned int flags, u32 mwait_hint) +{ + return false; +} #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ /** @@ -2098,7 +2119,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) if ((disabled_states_mask & BIT(drv->state_count)) || ((icpu->use_acpi || force_use_acpi) && - intel_idle_off_by_default(mwait_hint) && + intel_idle_off_by_default(state->flags, mwait_hint) && !(state->flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) state->flags |= CPUIDLE_FLAG_OFF; From 1e123fd73deb16cb362ecefb55c90c9196f4a6c2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Sep 2024 11:58:44 -1000 Subject: [PATCH 472/655] sched_ext: Add __COMPAT helpers for features added during v6.12 devel cycle cgroup support and scx_bpf_dispatch[_vtime]_from_dsq() are newly added since 8bb30798fd6e ("sched_ext: Fixes incorrect type in bpf_scx_init()") which is the current earliest commit targeted by BPF schedulers. Add compat helpers for them and apply them in the example schedulers. These will be dropped after a few kernel releases. The exact backward compatibility window hasn't been decided yet. Signed-off-by: Tejun Heo --- tools/sched_ext/include/scx/compat.bpf.h | 19 +++++++++++++++++++ tools/sched_ext/scx_flatcg.bpf.c | 10 +++++----- tools/sched_ext/scx_qmap.bpf.c | 12 ++++++------ 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h index 3d2fe1208900..e5afe9efd3f3 100644 --- a/tools/sched_ext/include/scx/compat.bpf.h +++ b/tools/sched_ext/include/scx/compat.bpf.h @@ -15,6 +15,25 @@ __ret; \ }) +/* v6.12: 819513666966 ("sched_ext: Add cgroup support") */ +#define __COMPAT_scx_bpf_task_cgroup(p) \ + (bpf_ksym_exists(scx_bpf_task_cgroup) ? \ + scx_bpf_task_cgroup((p)) : NULL) + +/* v6.12: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()") */ +#define __COMPAT_scx_bpf_dispatch_from_dsq_set_slice(it, slice) \ + (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_slice) ? \ + scx_bpf_dispatch_from_dsq_set_slice((it), (slice)) : (void)0) +#define __COMPAT_scx_bpf_dispatch_from_dsq_set_vtime(it, vtime) \ + (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_vtime) ? \ + scx_bpf_dispatch_from_dsq_set_vtime((it), (vtime)) : (void)0) +#define __COMPAT_scx_bpf_dispatch_from_dsq(it, p, dsq_id, enq_flags) \ + (bpf_ksym_exists(scx_bpf_dispatch_from_dsq) ? \ + scx_bpf_dispatch_from_dsq((it), (p), (dsq_id), (enq_flags)) : false) +#define __COMPAT_scx_bpf_dispatch_vtime_from_dsq(it, p, dsq_id, enq_flags) \ + (bpf_ksym_exists(scx_bpf_dispatch_vtime_from_dsq) ? \ + scx_bpf_dispatch_vtime_from_dsq((it), (p), (dsq_id), (enq_flags)) : false) + /* * Define sched_ext_ops. This may be expanded to define multiple variants for * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH(). diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 3ab2b60781a0..936415b98ae7 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -383,7 +383,7 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags) return; } - cgrp = scx_bpf_task_cgroup(p); + cgrp = __COMPAT_scx_bpf_task_cgroup(p); cgc = find_cgrp_ctx(cgrp); if (!cgc) goto out_release; @@ -509,7 +509,7 @@ void BPF_STRUCT_OPS(fcg_runnable, struct task_struct *p, u64 enq_flags) { struct cgroup *cgrp; - cgrp = scx_bpf_task_cgroup(p); + cgrp = __COMPAT_scx_bpf_task_cgroup(p); update_active_weight_sums(cgrp, true); bpf_cgroup_release(cgrp); } @@ -522,7 +522,7 @@ void BPF_STRUCT_OPS(fcg_running, struct task_struct *p) if (fifo_sched) return; - cgrp = scx_bpf_task_cgroup(p); + cgrp = __COMPAT_scx_bpf_task_cgroup(p); cgc = find_cgrp_ctx(cgrp); if (cgc) { /* @@ -565,7 +565,7 @@ void BPF_STRUCT_OPS(fcg_stopping, struct task_struct *p, bool runnable) if (!taskc->bypassed_at) return; - cgrp = scx_bpf_task_cgroup(p); + cgrp = __COMPAT_scx_bpf_task_cgroup(p); cgc = find_cgrp_ctx(cgrp); if (cgc) { __sync_fetch_and_add(&cgc->cvtime_delta, @@ -579,7 +579,7 @@ void BPF_STRUCT_OPS(fcg_quiescent, struct task_struct *p, u64 deq_flags) { struct cgroup *cgrp; - cgrp = scx_bpf_task_cgroup(p); + cgrp = __COMPAT_scx_bpf_task_cgroup(p); update_active_weight_sums(cgrp, false); bpf_cgroup_release(cgrp); } diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 83c8f54c1e31..67e2a7968cc9 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -318,11 +318,11 @@ static bool dispatch_highpri(bool from_timer) if (tctx->highpri) { /* exercise the set_*() and vtime interface too */ - scx_bpf_dispatch_from_dsq_set_slice( + __COMPAT_scx_bpf_dispatch_from_dsq_set_slice( BPF_FOR_EACH_ITER, slice_ns * 2); - scx_bpf_dispatch_from_dsq_set_vtime( + __COMPAT_scx_bpf_dispatch_from_dsq_set_vtime( BPF_FOR_EACH_ITER, highpri_seq++); - scx_bpf_dispatch_vtime_from_dsq( + __COMPAT_scx_bpf_dispatch_vtime_from_dsq( BPF_FOR_EACH_ITER, p, HIGHPRI_DSQ, 0); } } @@ -340,9 +340,9 @@ static bool dispatch_highpri(bool from_timer) else cpu = scx_bpf_pick_any_cpu(p->cpus_ptr, 0); - if (scx_bpf_dispatch_from_dsq(BPF_FOR_EACH_ITER, p, - SCX_DSQ_LOCAL_ON | cpu, - SCX_ENQ_PREEMPT)) { + if (__COMPAT_scx_bpf_dispatch_from_dsq(BPF_FOR_EACH_ITER, p, + SCX_DSQ_LOCAL_ON | cpu, + SCX_ENQ_PREEMPT)) { if (cpu == this_cpu) { dispatched = true; __sync_fetch_and_add(&nr_expedited_local, 1); From a748db0c8c6a88da66c3ab3791bd1a229f4a7fee Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Sep 2024 12:22:37 -1000 Subject: [PATCH 473/655] tools/sched_ext: Receive misc updates from SCX repo Receive misc tools/sched_ext updates from https://github.com/sched-ext/scx to sync userspace bits. - LSP macros to help language servers. - bpf_cpumask_weight() declaration and cast_mask() helper. - Cosmetic updates to scx_flatcg.bpf.c. Signed-off-by: Tejun Heo --- tools/sched_ext/include/scx/common.bpf.h | 15 +++++++++++++++ tools/sched_ext/include/scx/user_exit_info.h | 4 ++++ tools/sched_ext/scx_flatcg.bpf.c | 5 ++--- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index f538c75db183..225f61f9bfca 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -7,7 +7,13 @@ #ifndef __SCX_COMMON_BPF_H #define __SCX_COMMON_BPF_H +#ifdef LSP +#define __bpf__ +#include "../vmlinux/vmlinux.h" +#else #include "vmlinux.h" +#endif + #include #include #include @@ -309,6 +315,15 @@ void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym u32 bpf_cpumask_any_distribute(const struct cpumask *cpumask) __ksym; u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; +u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; + +/* + * Access a cpumask in read-only mode (typically to check bits). + */ +const struct cpumask *cast_mask(struct bpf_cpumask *mask) +{ + return (const struct cpumask *)mask; +} /* rcu */ void bpf_rcu_read_lock(void) __ksym; diff --git a/tools/sched_ext/include/scx/user_exit_info.h b/tools/sched_ext/include/scx/user_exit_info.h index 891693ee604e..8ce2734402e1 100644 --- a/tools/sched_ext/include/scx/user_exit_info.h +++ b/tools/sched_ext/include/scx/user_exit_info.h @@ -25,7 +25,11 @@ struct user_exit_info { #ifdef __bpf__ +#ifdef LSP +#include "../vmlinux/vmlinux.h" +#else #include "vmlinux.h" +#endif #include #define UEI_DEFINE(__name) \ diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 936415b98ae7..e272bc39bbbd 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -225,7 +225,7 @@ static void cgrp_refresh_hweight(struct cgroup *cgrp, struct fcg_cgrp_ctx *cgc) break; /* - * We can be oppotunistic here and not grab the + * We can be opportunistic here and not grab the * cgv_tree_lock and deal with the occasional races. * However, hweight updates are already cached and * relatively low-frequency. Let's just do the @@ -258,8 +258,7 @@ static void cgrp_cap_budget(struct cgv_node *cgv_node, struct fcg_cgrp_ctx *cgc) * and thus can't be updated and repositioned. Instead, we collect the * vtime deltas separately and apply it asynchronously here. */ - delta = cgc->cvtime_delta; - __sync_fetch_and_sub(&cgc->cvtime_delta, delta); + delta = __sync_fetch_and_sub(&cgc->cvtime_delta, cgc->cvtime_delta); cvtime = cgv_node->cvtime + delta; /* From d782d6e1d9078d6b82f8468dd6421050165e7d75 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 23 Sep 2024 22:39:11 +0900 Subject: [PATCH 474/655] ksmbd: remove unsafe_memcpy use in session setup Kees pointed out to just use directly ->Buffer instead of pointing ->Buffer using offset not to use unsafe_memcpy(). Suggested-by: Kees Cook Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 7121266daa02..72af3ab40b5c 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1335,8 +1335,7 @@ static int ntlm_negotiate(struct ksmbd_work *work, return rc; sz = le16_to_cpu(rsp->SecurityBufferOffset); - chgblob = - (struct challenge_message *)((char *)&rsp->hdr.ProtocolId + sz); + chgblob = (struct challenge_message *)rsp->Buffer; memset(chgblob, 0, sizeof(struct challenge_message)); if (!work->conn->use_spnego) { @@ -1369,9 +1368,7 @@ static int ntlm_negotiate(struct ksmbd_work *work, goto out; } - sz = le16_to_cpu(rsp->SecurityBufferOffset); - unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len, - /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); + memcpy(rsp->Buffer, spnego_blob, spnego_blob_len); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); out: @@ -1453,10 +1450,7 @@ static int ntlm_authenticate(struct ksmbd_work *work, if (rc) return -ENOMEM; - sz = le16_to_cpu(rsp->SecurityBufferOffset); - unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, - spnego_blob_len, - /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); + memcpy(rsp->Buffer, spnego_blob, spnego_blob_len); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); kfree(spnego_blob); } From 8e2f6a0e2dc9db663b4ba2225822e7a3c4047bfb Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 24 Sep 2024 22:39:29 +0900 Subject: [PATCH 475/655] ksmbd: fix open failure from block and char device file char/block device file can't be opened with dentry_open() if device driver is not loaded. Use O_PATH flags for fake opening file to handle it if file is a block or char file. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 72af3ab40b5c..7460089c186f 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2052,18 +2052,20 @@ int smb2_tree_connect(struct ksmbd_work *work) * @access: file access flags * @disposition: file disposition flags * @may_flags: set with MAY_ flags - * @is_dir: is creating open flags for directory + * @coptions: file creation options + * @mode: file mode * * Return: file open flags */ static int smb2_create_open_flags(bool file_present, __le32 access, __le32 disposition, int *may_flags, - bool is_dir) + __le32 coptions, + umode_t mode) { int oflags = O_NONBLOCK | O_LARGEFILE; - if (is_dir) { + if (coptions & FILE_DIRECTORY_FILE_LE || S_ISDIR(mode)) { access &= ~FILE_WRITE_DESIRE_ACCESS_LE; ksmbd_debug(SMB, "Discard write access to a directory\n"); } @@ -2080,7 +2082,7 @@ static int smb2_create_open_flags(bool file_present, __le32 access, *may_flags = MAY_OPEN | MAY_READ; } - if (access == FILE_READ_ATTRIBUTES_LE) + if (access == FILE_READ_ATTRIBUTES_LE || S_ISBLK(mode) || S_ISCHR(mode)) oflags |= O_PATH; if (file_present) { @@ -3175,8 +3177,8 @@ int smb2_open(struct ksmbd_work *work) open_flags = smb2_create_open_flags(file_present, daccess, req->CreateDisposition, &may_flags, - req->CreateOptions & FILE_DIRECTORY_FILE_LE || - (file_present && S_ISDIR(d_inode(path.dentry)->i_mode))); + req->CreateOptions, + file_present ? d_inode(path.dentry)->i_mode : 0); if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { if (open_flags & (O_CREAT | O_TRUNC)) { From 9e676e571d39eb6189bf6d55a9c401ba2dd13410 Mon Sep 17 00:00:00 2001 From: Shen Lichuan Date: Wed, 25 Sep 2024 15:43:23 +0800 Subject: [PATCH 476/655] ksmbd: Correct typos in multiple comments across various files Fixed some confusing typos that were currently identified witch codespell, the details are as follows: -in the code comments: fs/smb/common/smb2pdu.h:9: specfication ==> specification fs/smb/common/smb2pdu.h:494: usally ==> usually fs/smb/common/smb2pdu.h:1064: Attrubutes ==> Attributes fs/smb/server/connection.c:28: cleand ==> cleaned fs/smb/server/ksmbd_netlink.h:216: struture ==> structure fs/smb/server/oplock.c:799: conains ==> contains fs/smb/server/oplock.c:1487: containted ==> contained fs/smb/server/server.c:282: proccessing ==> processing fs/smb/server/smb_common.c:491: comforms ==> conforms fs/smb/server/xattr.h:102: ATTRIBUITE ==> ATTRIBUTE Signed-off-by: Shen Lichuan Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smb2pdu.h | 6 +++--- fs/smb/server/connection.c | 2 +- fs/smb/server/ksmbd_netlink.h | 2 +- fs/smb/server/oplock.c | 4 ++-- fs/smb/server/server.c | 2 +- fs/smb/server/smb_common.c | 2 +- fs/smb/server/xattr.h | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index c769f9dbc0b4..9f272cc8f566 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -6,7 +6,7 @@ * Note that, due to trying to use names similar to the protocol specifications, * there are many mixed case field names in the structures below. Although * this does not match typical Linux kernel style, it is necessary to be - * able to match against the protocol specfication. + * able to match against the protocol specification. * * SMB2 commands * Some commands have minimal (wct=0,bcc=0), or uninteresting, responses @@ -491,7 +491,7 @@ struct smb2_encryption_neg_context { __le16 ContextType; /* 2 */ __le16 DataLength; __le32 Reserved; - /* CipherCount usally 2, but can be 3 when AES256-GCM enabled */ + /* CipherCount usually 2, but can be 3 when AES256-GCM enabled */ __le16 CipherCount; /* AES128-GCM and AES128-CCM by default */ __le16 Ciphers[]; } __packed; @@ -1061,7 +1061,7 @@ struct smb2_server_client_notification { #define IL_IMPERSONATION cpu_to_le32(0x00000002) #define IL_DELEGATE cpu_to_le32(0x00000003) -/* File Attrubutes */ +/* File Attributes */ #define FILE_ATTRIBUTE_READONLY 0x00000001 #define FILE_ATTRIBUTE_HIDDEN 0x00000002 #define FILE_ATTRIBUTE_SYSTEM 0x00000004 diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index cac80e7bfefc..aa2a37a7ce84 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -25,7 +25,7 @@ DECLARE_RWSEM(conn_list_lock); /** * ksmbd_conn_free() - free resources of the connection instance * - * @conn: connection instance to be cleand up + * @conn: connection instance to be cleaned up * * During the thread termination, the corresponding conn instance * resources(sock/memory) are released and finally the conn object is freed. diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index f4e55199938d..38e6fd2da3b8 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -213,7 +213,7 @@ struct ksmbd_tree_connect_response { }; /* - * IPC Request struture to disconnect tree connection. + * IPC Request structure to disconnect tree connection. */ struct ksmbd_tree_disconnect_request { __u64 session_id; /* session id */ diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 246cde380dfb..4142c7ad5fa9 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -796,7 +796,7 @@ static void __smb2_lease_break_noti(struct work_struct *wk) /** * smb2_lease_break_noti() - break lease when a new client request * write lease - * @opinfo: conains lease state information + * @opinfo: contains lease state information * * Return: 0 on success, otherwise error */ @@ -1484,7 +1484,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) } /** - * parse_lease_state() - parse lease context containted in file open request + * parse_lease_state() - parse lease context contained in file open request * @open_req: buffer containing smb2 file open(create) request * * Return: allocated lease context object on success, otherwise NULL diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index c402d4abe826..231d2d224656 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -279,7 +279,7 @@ static void handle_ksmbd_work(struct work_struct *wk) /** * queue_ksmbd_work() - queue a smb request to worker thread queue - * for proccessing smb command and sending response + * for processing smb command and sending response * @conn: connection instance * * read remaining data from socket create and submit work. diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index cc4bb2377cbd..5b8d75e78ffb 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -488,7 +488,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, * @shortname: destination short filename * * Return: shortname length or 0 when source long name is '.' or '..' - * TODO: Though this function comforms the restriction of 8.3 Filename spec, + * TODO: Though this function conforms the restriction of 8.3 Filename spec, * but the result is different with Windows 7's one. need to check. */ int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname, diff --git a/fs/smb/server/xattr.h b/fs/smb/server/xattr.h index fa3e27d6971b..505101a8104c 100644 --- a/fs/smb/server/xattr.h +++ b/fs/smb/server/xattr.h @@ -99,7 +99,7 @@ struct xattr_ntacl { __u8 posix_acl_hash[XATTR_SD_HASH_SIZE]; /* 64bytes hash for posix acl */ }; -/* DOS ATTRIBUITE XATTR PREFIX */ +/* DOS ATTRIBUTE XATTR PREFIX */ #define DOS_ATTRIBUTE_PREFIX "DOSATTRIB" #define DOS_ATTRIBUTE_PREFIX_LEN (sizeof(DOS_ATTRIBUTE_PREFIX) - 1) #define XATTR_NAME_DOS_ATTRIBUTE (XATTR_USER_PREFIX DOS_ATTRIBUTE_PREFIX) From 84db6f27b26b5bebeeb85d1b6f6c035daa6f2ac2 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 24 Sep 2024 11:24:23 +0200 Subject: [PATCH 477/655] soc: ep93xx: drop reference to removed EP93XX_SOC_COMMON config Commit 6eab0ce6e1c6 ("soc: Add SoC driver for Cirrus ep93xx") adds the config EP93XX_SOC referring to the config EP93XX_SOC_COMMON. Within the same patch series of the commit above, the commit 046322f1e1d9 ("ARM: ep93xx: DT for the Cirrus ep93xx SoC platforms") then removes the config EP93XX_SOC_COMMON. With that the reference to this config is obsolete. Simplify the expression in the EP93XX_SOC config definition. Signed-off-by: Lukas Bulwahn Reviewed-by: Nikita Shubin Signed-off-by: Arnd Bergmann --- drivers/soc/cirrus/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/cirrus/Kconfig b/drivers/soc/cirrus/Kconfig index f2fd0e16a196..d8b3b1e68998 100644 --- a/drivers/soc/cirrus/Kconfig +++ b/drivers/soc/cirrus/Kconfig @@ -6,7 +6,7 @@ config EP93XX_SOC bool "Cirrus EP93xx chips SoC" select SOC_BUS select AUXILIARY_BUS - default y if !EP93XX_SOC_COMMON + default y help Enable support SoC for Cirrus EP93xx chips. From a481b9d2baf77d8153361ff19634530bc7272899 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Tue, 17 Sep 2024 19:34:01 +0300 Subject: [PATCH 478/655] MAINTAINERS: Update EP93XX ARM ARCHITECTURE maintainer Add myself as maintainer of EP93XX ARCHITECTURE. CC: Alexander Sverdlin CC: Arnd Bergmann Signed-off-by: Nikita Shubin Acked-by: Alexander Sverdlin Signed-off-by: Arnd Bergmann --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f328373463b0..61a0e0abaa2f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2171,6 +2171,7 @@ N: clps711x ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE M: Hartley Sweeten M: Alexander Sverdlin +M: Nikita Shubin L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/iio/adc/cirrus,ep9301-adc.yaml From e3eb39e6bab564ed430172f37be835f84e923c23 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 25 Sep 2024 12:35:10 -0500 Subject: [PATCH 479/655] dt-bindings: gpio: ep9301: Add missing "#interrupt-cells" to examples Enabling dtc interrupt_provider check reveals the examples are missing the "#interrupt-cells" property as it is a dependency of "interrupt-controller". Some of the indentation is off, so fix that too. Signed-off-by: Rob Herring (Arm) Reviewed-by: Nikita Shubin Signed-off-by: Arnd Bergmann --- Documentation/devicetree/bindings/gpio/gpio-ep9301.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/gpio/gpio-ep9301.yaml b/Documentation/devicetree/bindings/gpio/gpio-ep9301.yaml index daadfb4926c3..3a1079d6ee20 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-ep9301.yaml +++ b/Documentation/devicetree/bindings/gpio/gpio-ep9301.yaml @@ -73,9 +73,10 @@ examples: reg-names = "data", "dir", "intr"; gpio-controller; #gpio-cells = <2>; - interrupt-controller; - interrupt-parent = <&vic1>; - interrupts = <27>; + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&vic1>; + interrupts = <27>; }; gpio@80840004 { @@ -87,6 +88,7 @@ examples: gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&vic1>; interrupts = <27>; }; @@ -127,6 +129,7 @@ examples: gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupts-extended = <&vic0 19>, <&vic0 20>, <&vic0 21>, <&vic0 22>, <&vic1 15>, <&vic1 16>, From 76f1ed087b562a469f2153076f179854b749c09a Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 25 Sep 2024 20:01:20 +0200 Subject: [PATCH 480/655] netfilter: uapi: NFTA_FLOWTABLE_HOOK is NLA_NESTED Fix the comment which incorrectly defines it as NLA_U32. Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index d6476ca5d7a6..9e9079321380 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1694,7 +1694,7 @@ enum nft_flowtable_flags { * * @NFTA_FLOWTABLE_TABLE: name of the table containing the expression (NLA_STRING) * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING) - * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32) + * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration (NLA_NESTED) * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32) * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64) * @NFTA_FLOWTABLE_FLAGS: flags (NLA_U32) From 977fae6d611764d41de19b9ba01699b1618148f7 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sat, 24 Aug 2024 20:06:09 +0800 Subject: [PATCH 481/655] sh: Remove unused declarations for make_maskreg_irq() and irq_mask_register make_maskreg_irq() and irq_mask_register have been removed since commit 5a4053b23262 ("sh: Kill off dead boards."), so remove the unused declarations. Signed-off-by: Gaosheng Cui Reviewed-by: John Paul Adrian Glaubitz Signed-off-by: John Paul Adrian Glaubitz --- arch/sh/include/asm/irq.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h index 0f384b1f45ca..53fc18a3d4c2 100644 --- a/arch/sh/include/asm/irq.h +++ b/arch/sh/include/asm/irq.h @@ -13,12 +13,6 @@ */ #define NO_IRQ_IGNORE ((unsigned int)-1) -/* - * Simple Mask Register Support - */ -extern void make_maskreg_irq(unsigned int irq); -extern unsigned short *irq_mask_register; - /* * PINT IRQs */ From c3e878ca7b6663d2ad77a6e17460fc47a2347f4a Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Mon, 2 Sep 2024 10:45:34 +0800 Subject: [PATCH 482/655] sh: intc: Replace simple_strtoul() with kstrtoul() The function simple_strtoul() performs no error checking in scenarios where the input value overflows the intended output variable. We can replace the use of simple_strtoul() with the safer alternative kstrtoul(). This also allows us to print an error message in case of failure. Signed-off-by: Hongbo Li Reviewed-by: Geert Uytterhoeven Reviewed-by: John Paul Adrian Glaubitz Signed-off-by: John Paul Adrian Glaubitz --- drivers/sh/intc/userimask.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/sh/intc/userimask.c b/drivers/sh/intc/userimask.c index abe9091827cd..a363f77881d1 100644 --- a/drivers/sh/intc/userimask.c +++ b/drivers/sh/intc/userimask.c @@ -32,8 +32,11 @@ store_intc_userimask(struct device *dev, const char *buf, size_t count) { unsigned long level; + int ret; - level = simple_strtoul(buf, NULL, 10); + ret = kstrtoul(buf, 10, &level); + if (ret != 0) + return ret; /* * Minimal acceptable IRQ levels are in the 2 - 16 range, but From 4feb014bc79a42485b15bc3912dd3b0bca592520 Mon Sep 17 00:00:00 2001 From: Dipendra Khadka Date: Sun, 22 Sep 2024 16:47:01 +0000 Subject: [PATCH 483/655] dm-cache: remove pointless error check Smatch reported following: ''' drivers/md/dm-cache-target.c:3204 parse_cblock_range() warn: sscanf doesn't return error codes drivers/md/dm-cache-target.c:3217 parse_cblock_range() warn: sscanf doesn't return error codes ''' Sscanf doesn't return negative values at all. Signed-off-by: Dipendra Khadka Signed-off-by: Mikulas Patocka --- drivers/md/dm-cache-target.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 17f0fab1e254..621be35c2b00 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3200,8 +3200,6 @@ static int parse_cblock_range(struct cache *cache, const char *str, * Try and parse form (ii) first. */ r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy); - if (r < 0) - return r; if (r == 2) { result->begin = to_cblock(b); @@ -3213,8 +3211,6 @@ static int parse_cblock_range(struct cache *cache, const char *str, * That didn't work, try form (i). */ r = sscanf(str, "%llu%c", &b, &dummy); - if (r < 0) - return r; if (r == 1) { result->begin = to_cblock(b); From 0a92e5cdeef9fa4cba8bef6cd1d91cff6b5d300b Mon Sep 17 00:00:00 2001 From: Shen Lichuan Date: Tue, 24 Sep 2024 15:21:11 +0200 Subject: [PATCH 484/655] dm: fix spelling errors Fixed some confusing spelling errors that were currently identified, the details are as follows: -in the code comments: dm-cache-target.c: 1371: exclussive ==> exclusive dm-raid.c: 2522: repective ==> respective Signed-off-by: Shen Lichuan Signed-off-by: Mikulas Patocka --- drivers/md/dm-cache-target.c | 2 +- drivers/md/dm-raid.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 621be35c2b00..aaeeabfab09b 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1368,7 +1368,7 @@ static void mg_copy(struct work_struct *ws) */ bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio); - BUG_ON(rb); /* An exclussive lock must _not_ be held for this block */ + BUG_ON(rb); /* An exclusive lock must _not_ be held for this block */ mg->overwrite_bio = NULL; inc_io_migrations(mg->cache); mg_full_copy(ws); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 0c3323e0adb2..32c8240c1873 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2519,7 +2519,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) rdev->saved_raid_disk = rdev->raid_disk; } - /* Reshape support -> restore repective data offsets */ + /* Reshape support -> restore respective data offsets */ rdev->data_offset = le64_to_cpu(sb->data_offset); rdev->new_data_offset = le64_to_cpu(sb->new_data_offset); From e6a3531dd542cb127c8de32ab1e54a48ae19962b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 24 Sep 2024 15:18:29 +0200 Subject: [PATCH 485/655] dm-verity: restart or panic on an I/O error Maxim Suhanov reported that dm-verity doesn't crash if an I/O error happens. In theory, this could be used to subvert security, because an attacker can create sectors that return error with the Write Uncorrectable command. Some programs may misbehave if they have to deal with EIO. This commit fixes dm-verity, so that if "panic_on_corruption" or "restart_on_corruption" was specified and an I/O error happens, the machine will panic or restart. This commit also changes kernel_restart to emergency_restart - kernel_restart calls reboot notifiers and these reboot notifiers may wait for the bio that failed. emergency_restart doesn't call the notifiers. Reported-by: Maxim Suhanov Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-verity-target.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index cf659c8feb29..a95c1b9cc5b5 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -272,8 +272,10 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, if (v->mode == DM_VERITY_MODE_LOGGING) return 0; - if (v->mode == DM_VERITY_MODE_RESTART) - kernel_restart("dm-verity device corrupted"); + if (v->mode == DM_VERITY_MODE_RESTART) { + pr_emerg("dm-verity device corrupted\n"); + emergency_restart(); + } if (v->mode == DM_VERITY_MODE_PANIC) panic("dm-verity device corrupted"); @@ -596,6 +598,23 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status) if (!static_branch_unlikely(&use_bh_wq_enabled) || !io->in_bh) verity_fec_finish_io(io); + if (unlikely(status != BLK_STS_OK) && + unlikely(!(bio->bi_opf & REQ_RAHEAD)) && + !verity_is_system_shutting_down()) { + if (v->mode == DM_VERITY_MODE_RESTART || + v->mode == DM_VERITY_MODE_PANIC) + DMERR_LIMIT("%s has error: %s", v->data_dev->name, + blk_status_to_str(status)); + + if (v->mode == DM_VERITY_MODE_RESTART) { + pr_emerg("dm-verity device corrupted\n"); + emergency_restart(); + } + + if (v->mode == DM_VERITY_MODE_PANIC) + panic("dm-verity device corrupted"); + } + bio_endio(bio); } From 579b2ba40ece57f3f9150f59dfe327e60a5445b5 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sun, 22 Sep 2024 18:17:53 +0200 Subject: [PATCH 486/655] dm verity: fallback to platform keyring also if key in trusted keyring is rejected If enabled, we fallback to the platform keyring if the trusted keyring doesn't have the key used to sign the roothash. But if pkcs7_verify() rejects the key for other reasons, such as usage restrictions, we do not fallback. Do so. Follow-up for 6fce1f40e95182ebbfe1ee3096b8fc0b37903269 Suggested-by: Serge Hallyn Signed-off-by: Luca Boccassi Acked-by: Jarkko Sakkinen Signed-off-by: Mikulas Patocka --- drivers/md/dm-verity-verify-sig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c index d351d7d39c60..a9e2c6c0a33c 100644 --- a/drivers/md/dm-verity-verify-sig.c +++ b/drivers/md/dm-verity-verify-sig.c @@ -127,7 +127,7 @@ int verity_verify_root_hash(const void *root_hash, size_t root_hash_len, #endif VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL); #ifdef CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING - if (ret == -ENOKEY) + if (ret == -ENOKEY || ret == -EKEYREJECTED) ret = verify_pkcs7_signature(root_hash, root_hash_len, sig_data, sig_len, VERIFY_USE_PLATFORM_KEYRING, From 5b97eebcce1b4f3f07a71f635d6aa3af96c236e7 Mon Sep 17 00:00:00 2001 From: Qianqiang Liu Date: Wed, 25 Sep 2024 13:29:36 +0800 Subject: [PATCH 487/655] fbcon: Fix a NULL pointer dereference issue in fbcon_putcs syzbot has found a NULL pointer dereference bug in fbcon. Here is the simplified C reproducer: struct param { uint8_t type; struct tiocl_selection ts; }; int main() { struct fb_con2fbmap con2fb; struct param param; int fd = open("/dev/fb1", 0, 0); con2fb.console = 0x19; con2fb.framebuffer = 0; ioctl(fd, FBIOPUT_CON2FBMAP, &con2fb); param.type = 2; param.ts.xs = 0; param.ts.ys = 0; param.ts.xe = 0; param.ts.ye = 0; param.ts.sel_mode = 0; int fd1 = open("/dev/tty1", O_RDWR, 0); ioctl(fd1, TIOCLINUX, ¶m); con2fb.console = 1; con2fb.framebuffer = 0; ioctl(fd, FBIOPUT_CON2FBMAP, &con2fb); return 0; } After calling ioctl(fd1, TIOCLINUX, ¶m), the subsequent ioctl(fd, FBIOPUT_CON2FBMAP, &con2fb) causes the kernel to follow a different execution path: set_con2fb_map -> con2fb_init_display -> fbcon_set_disp -> redraw_screen -> hide_cursor -> clear_selection -> highlight -> invert_screen -> do_update_region -> fbcon_putcs -> ops->putcs Since ops->putcs is a NULL pointer, this leads to a kernel panic. To prevent this, we need to call set_blitting_type() within set_con2fb_map() to properly initialize ops->putcs. Reported-by: syzbot+3d613ae53c031502687a@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=3d613ae53c031502687a Tested-by: syzbot+3d613ae53c031502687a@syzkaller.appspotmail.com Signed-off-by: Qianqiang Liu Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fbcon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 2e093535884b..d9abae2516d8 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -861,6 +861,8 @@ static int set_con2fb_map(int unit, int newidx, int user) return err; fbcon_add_cursor_work(info); + } else if (vc) { + set_blitting_type(vc, info); } con2fb_map[unit] = newidx; From f1ebbe4cd07d058f42174cc5b8c5efcf83de8ffa Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 25 Sep 2024 21:12:36 +0200 Subject: [PATCH 488/655] fbdev: omapfb: Call of_node_put(ep) only once in omapdss_of_find_source_for_first_ep() An of_node_put(ep) call was immediately used after a pointer check for a of_graph_get_remote_port() call in this function implementation. Thus call such a function only once instead directly before the check. This issue was transformed by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/dss/dss-of.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dss-of.c b/drivers/video/fbdev/omap2/omapfb/dss/dss-of.c index 4040e247e026..d5a43b3bf45e 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/dss-of.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/dss-of.c @@ -129,12 +129,9 @@ omapdss_of_find_source_for_first_ep(struct device_node *node) return ERR_PTR(-EINVAL); src_port = of_graph_get_remote_port(ep); - if (!src_port) { - of_node_put(ep); - return ERR_PTR(-EINVAL); - } - of_node_put(ep); + if (!src_port) + return ERR_PTR(-EINVAL); src = omap_dss_find_output_by_port_node(src_port); From 2555906fd53e0a5239431d44fad695b420e94fdd Mon Sep 17 00:00:00 2001 From: Qianqiang Liu Date: Thu, 26 Sep 2024 19:59:11 +0800 Subject: [PATCH 489/655] fbcon: break earlier in search_fb_in_map and search_for_mapped_con Break the for loop immediately upon finding the target, making the process more efficient. Signed-off-by: Qianqiang Liu Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fbcon.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index d9abae2516d8..e8b4e8c119b5 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -512,8 +512,10 @@ static int search_fb_in_map(int idx) int i, retval = 0; for (i = first_fb_vc; i <= last_fb_vc; i++) { - if (con2fb_map[i] == idx) + if (con2fb_map[i] == idx) { retval = 1; + break; + } } return retval; } @@ -523,8 +525,10 @@ static int search_for_mapped_con(void) int i, retval = 0; for (i = first_fb_vc; i <= last_fb_vc; i++) { - if (con2fb_map[i] != -1) + if (con2fb_map[i] != -1) { retval = 1; + break; + } } return retval; } From d4fc4d01471528da8a9797a065982e05090e1d81 Mon Sep 17 00:00:00 2001 From: "Alexey Gladkov (Intel)" Date: Fri, 13 Sep 2024 19:05:56 +0200 Subject: [PATCH 490/655] x86/tdx: Fix "in-kernel MMIO" check TDX only supports kernel-initiated MMIO operations. The handle_mmio() function checks if the #VE exception occurred in the kernel and rejects the operation if it did not. However, userspace can deceive the kernel into performing MMIO on its behalf. For example, if userspace can point a syscall to an MMIO address, syscall does get_user() or put_user() on it, triggering MMIO #VE. The kernel will treat the #VE as in-kernel MMIO. Ensure that the target MMIO address is within the kernel before decoding instruction. Fixes: 31d58c4e557d ("x86/tdx: Handle in-kernel MMIO") Signed-off-by: Alexey Gladkov (Intel) Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Acked-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/565a804b80387970460a4ebc67c88d1380f61ad1.1726237595.git.legion%40kernel.org --- arch/x86/coco/tdx/tdx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index da8b66dce0da..327c45c5013f 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -16,6 +16,7 @@ #include #include #include +#include /* MMIO direction */ #define EPT_READ 0 @@ -433,6 +434,11 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) return -EINVAL; } + if (!fault_in_kernel_space(ve->gla)) { + WARN_ONCE(1, "Access to userspace address is not supported"); + return -EINVAL; + } + /* * Reject EPT violation #VEs that split pages. * From d1fb034b75a8a96fcb4bf01a7c0e1421eef833a3 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 23 Sep 2024 10:37:50 -0700 Subject: [PATCH 491/655] x86/cpu: Add two Intel CPU model numbers Pantherlake is a mobile CPU. Diamond Rapids next generation Xeon. Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240923173750.16874-1-tony.luck%40intel.com --- arch/x86/include/asm/intel-family.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index f81a851c46dc..17d899da4650 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -191,6 +191,8 @@ #define INTEL_FAM6_LUNARLAKE_M 0xBD #define INTEL_LUNARLAKE_M IFM(6, 0xBD) +#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) + /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ @@ -257,4 +259,7 @@ #define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ #define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */ +/* Family 19 */ +#define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */ + #endif /* _ASM_X86_INTEL_FAMILY_H */ From bfc4a245a794841cba5cf287034a0f60d3087402 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Sep 2024 08:35:24 +0200 Subject: [PATCH 492/655] dma-mapping: fix DMA API tracing for chained scatterlists scatterlist allocations can be chained, and thus all iterations need to use the chain-aware iterators. Switch the newly added tracing to use the proper iterators so that they work with chained scatterlists. Fixes: 038eb433dc14 ("dma-mapping: add tracing for dma-mapping API calls") Reported-by: syzbot+95e4ef83a3024384ec7a@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig Reviewed-by: Sean Anderson Tested-by: syzbot+95e4ef83a3024384ec7a@syzkaller.appspotmail.com --- include/trace/events/dma.h | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index f57f05331d73..569f86a44aaa 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -176,9 +176,9 @@ TRACE_EVENT(dma_free, ); TRACE_EVENT(dma_map_sg, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, int ents, enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, sg, nents, ents, dir, attrs), + TP_ARGS(dev, sgl, nents, ents, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) @@ -190,17 +190,17 @@ TRACE_EVENT(dma_map_sg, ), TP_fast_assign( + struct scatterlist *sg; int i; __assign_str(device); - for (i = 0; i < nents; i++) - ((u64 *)__get_dynamic_array(phys_addrs))[i] = - sg_phys(sg + i); - for (i = 0; i < ents; i++) { + for_each_sg(sgl, sg, nents, i) + ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); + for_each_sg(sgl, sg, ents, i) { ((u64 *)__get_dynamic_array(dma_addrs))[i] = - sg_dma_address(sg + i); + sg_dma_address(sg); ((unsigned int *)__get_dynamic_array(lengths))[i] = - sg_dma_len(sg + i); + sg_dma_len(sg); } __entry->dir = dir; __entry->attrs = attrs; @@ -222,9 +222,9 @@ TRACE_EVENT(dma_map_sg, ); TRACE_EVENT(dma_unmap_sg, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, sg, nents, dir, attrs), + TP_ARGS(dev, sgl, nents, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) @@ -234,12 +234,12 @@ TRACE_EVENT(dma_unmap_sg, ), TP_fast_assign( + struct scatterlist *sg; int i; __assign_str(device); - for (i = 0; i < nents; i++) - ((u64 *)__get_dynamic_array(addrs))[i] = - sg_phys(sg + i); + for_each_sg(sgl, sg, nents, i) + ((u64 *)__get_dynamic_array(addrs))[i] = sg_phys(sg); __entry->dir = dir; __entry->attrs = attrs; ), @@ -290,9 +290,9 @@ DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device, TP_ARGS(dev, dma_addr, size, dir)); DECLARE_EVENT_CLASS(dma_sync_sg, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir), - TP_ARGS(dev, sg, nents, dir), + TP_ARGS(dev, sgl, nents, dir), TP_STRUCT__entry( __string(device, dev_name(dev)) @@ -302,14 +302,15 @@ DECLARE_EVENT_CLASS(dma_sync_sg, ), TP_fast_assign( + struct scatterlist *sg; int i; __assign_str(device); - for (i = 0; i < nents; i++) { + for_each_sg(sgl, sg, nents, i) { ((u64 *)__get_dynamic_array(dma_addrs))[i] = - sg_dma_address(sg + i); + sg_dma_address(sg); ((unsigned int *)__get_dynamic_array(lengths))[i] = - sg_dma_len(sg + i); + sg_dma_len(sg); } __entry->dir = dir; ), From a78282e2c94f4ca80a2d7c56e4d1e9546be5596d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 26 Sep 2024 11:39:02 -0700 Subject: [PATCH 493/655] Revert "binfmt_elf, coredump: Log the reason of the failed core dumps" This reverts commit fb97d2eb542faf19a8725afbd75cbc2518903210. The logging was questionable to begin with, but it seems to actively deadlock on the task lock. "On second thought, let's not log core dump failures. 'Tis a silly place" because if you can't tell your core dump is truncated, maybe you should just fix your debugger instead of adding bugs to the kernel. Reported-by: Vegard Nossum Link: https://lore.kernel.org/all/d122ece6-3606-49de-ae4d-8da88846bef2@oracle.com/ Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 48 +++++------------- fs/coredump.c | 107 +++++++-------------------------------- include/linux/coredump.h | 8 +-- kernel/signal.c | 21 +------- 4 files changed, 34 insertions(+), 150 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 34d0d1e43f36..06dc4a57ba78 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2032,10 +2032,8 @@ static int elf_core_dump(struct coredump_params *cprm) * Collect all the non-memory information about the process for the * notes. This also sets up the file header. */ - if (!fill_note_info(&elf, e_phnum, &info, cprm)) { - coredump_report_failure("Error collecting note info"); + if (!fill_note_info(&elf, e_phnum, &info, cprm)) goto end_coredump; - } has_dumped = 1; @@ -2050,10 +2048,8 @@ static int elf_core_dump(struct coredump_params *cprm) sz += elf_coredump_extra_notes_size(); phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL); - if (!phdr4note) { - coredump_report_failure("Error allocating program headers note entry"); + if (!phdr4note) goto end_coredump; - } fill_elf_note_phdr(phdr4note, sz, offset); offset += sz; @@ -2067,24 +2063,18 @@ static int elf_core_dump(struct coredump_params *cprm) if (e_phnum == PN_XNUM) { shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL); - if (!shdr4extnum) { - coredump_report_failure("Error allocating extra program headers"); + if (!shdr4extnum) goto end_coredump; - } fill_extnum_info(&elf, shdr4extnum, e_shoff, segs); } offset = dataoff; - if (!dump_emit(cprm, &elf, sizeof(elf))) { - coredump_report_failure("Error emitting the ELF headers"); + if (!dump_emit(cprm, &elf, sizeof(elf))) goto end_coredump; - } - if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) { - coredump_report_failure("Error emitting the program header for notes"); + if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) goto end_coredump; - } /* Write program headers for segments dump */ for (i = 0; i < cprm->vma_count; i++) { @@ -2107,28 +2097,20 @@ static int elf_core_dump(struct coredump_params *cprm) phdr.p_flags |= PF_X; phdr.p_align = ELF_EXEC_PAGESIZE; - if (!dump_emit(cprm, &phdr, sizeof(phdr))) { - coredump_report_failure("Error emitting program headers"); + if (!dump_emit(cprm, &phdr, sizeof(phdr))) goto end_coredump; - } } - if (!elf_core_write_extra_phdrs(cprm, offset)) { - coredump_report_failure("Error writing out extra program headers"); + if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; - } /* write out the notes section */ - if (!write_note_info(&info, cprm)) { - coredump_report_failure("Error writing out notes"); + if (!write_note_info(&info, cprm)) goto end_coredump; - } /* For cell spufs and x86 xstate */ - if (elf_coredump_extra_notes_write(cprm)) { - coredump_report_failure("Error writing out extra notes"); + if (elf_coredump_extra_notes_write(cprm)) goto end_coredump; - } /* Align to page */ dump_skip_to(cprm, dataoff); @@ -2136,22 +2118,16 @@ static int elf_core_dump(struct coredump_params *cprm) for (i = 0; i < cprm->vma_count; i++) { struct core_vma_metadata *meta = cprm->vma_meta + i; - if (!dump_user_range(cprm, meta->start, meta->dump_size)) { - coredump_report_failure("Error writing out the process memory"); + if (!dump_user_range(cprm, meta->start, meta->dump_size)) goto end_coredump; - } } - if (!elf_core_write_extra_data(cprm)) { - coredump_report_failure("Error writing out extra data"); + if (!elf_core_write_extra_data(cprm)) goto end_coredump; - } if (e_phnum == PN_XNUM) { - if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum))) { - coredump_report_failure("Error emitting extra program headers"); + if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum))) goto end_coredump; - } } end_coredump: diff --git a/fs/coredump.c b/fs/coredump.c index 53a78b6bbb5b..45737b43dda5 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -465,17 +465,7 @@ static bool dump_interrupted(void) * but then we need to teach dump_write() to restart and clear * TIF_SIGPENDING. */ - if (fatal_signal_pending(current)) { - coredump_report_failure("interrupted: fatal signal pending"); - return true; - } - - if (freezing(current)) { - coredump_report_failure("interrupted: freezing"); - return true; - } - - return false; + return fatal_signal_pending(current) || freezing(current); } static void wait_for_dump_helpers(struct file *file) @@ -530,7 +520,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) return err; } -int do_coredump(const kernel_siginfo_t *siginfo) +void do_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; struct core_name cn; @@ -538,7 +528,7 @@ int do_coredump(const kernel_siginfo_t *siginfo) struct linux_binfmt * binfmt; const struct cred *old_cred; struct cred *cred; - int retval; + int retval = 0; int ispipe; size_t *argv = NULL; int argc = 0; @@ -562,20 +552,14 @@ int do_coredump(const kernel_siginfo_t *siginfo) audit_core_dumps(siginfo->si_signo); binfmt = mm->binfmt; - if (!binfmt || !binfmt->core_dump) { - retval = -ENOEXEC; + if (!binfmt || !binfmt->core_dump) goto fail; - } - if (!__get_dumpable(cprm.mm_flags)) { - retval = -EACCES; + if (!__get_dumpable(cprm.mm_flags)) goto fail; - } cred = prepare_creds(); - if (!cred) { - retval = -EPERM; + if (!cred) goto fail; - } /* * We cannot trust fsuid as being the "true" uid of the process * nor do we know its entire history. We only know it was tainted @@ -604,7 +588,6 @@ int do_coredump(const kernel_siginfo_t *siginfo) if (ispipe < 0) { coredump_report_failure("format_corename failed, aborting core"); - retval = ispipe; goto fail_unlock; } @@ -625,7 +608,6 @@ int do_coredump(const kernel_siginfo_t *siginfo) * core_pattern process dies. */ coredump_report_failure("RLIMIT_CORE is set to 1, aborting core"); - retval = -EPERM; goto fail_unlock; } cprm.limit = RLIM_INFINITY; @@ -633,7 +615,6 @@ int do_coredump(const kernel_siginfo_t *siginfo) dump_count = atomic_inc_return(&core_dump_count); if (core_pipe_limit && (core_pipe_limit < dump_count)) { coredump_report_failure("over core_pipe_limit, skipping core dump"); - retval = -E2BIG; goto fail_dropcount; } @@ -641,7 +622,6 @@ int do_coredump(const kernel_siginfo_t *siginfo) GFP_KERNEL); if (!helper_argv) { coredump_report_failure("%s failed to allocate memory", __func__); - retval = -ENOMEM; goto fail_dropcount; } for (argi = 0; argi < argc; argi++) @@ -667,16 +647,12 @@ int do_coredump(const kernel_siginfo_t *siginfo) int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW | O_LARGEFILE | O_EXCL; - if (cprm.limit < binfmt->min_coredump) { - coredump_report_failure("over coredump resource limit, skipping core dump"); - retval = -E2BIG; + if (cprm.limit < binfmt->min_coredump) goto fail_unlock; - } if (need_suid_safe && cn.corename[0] != '/') { coredump_report_failure( "this process can only dump core to a fully qualified path, skipping core dump"); - retval = -EPERM; goto fail_unlock; } @@ -722,28 +698,20 @@ int do_coredump(const kernel_siginfo_t *siginfo) } else { cprm.file = filp_open(cn.corename, open_flags, 0600); } - if (IS_ERR(cprm.file)) { - retval = PTR_ERR(cprm.file); + if (IS_ERR(cprm.file)) goto fail_unlock; - } inode = file_inode(cprm.file); - if (inode->i_nlink > 1) { - retval = -EMLINK; + if (inode->i_nlink > 1) goto close_fail; - } - if (d_unhashed(cprm.file->f_path.dentry)) { - retval = -EEXIST; + if (d_unhashed(cprm.file->f_path.dentry)) goto close_fail; - } /* * AK: actually i see no reason to not allow this for named * pipes etc, but keep the previous behaviour for now. */ - if (!S_ISREG(inode->i_mode)) { - retval = -EISDIR; + if (!S_ISREG(inode->i_mode)) goto close_fail; - } /* * Don't dump core if the filesystem changed owner or mode * of the file during file creation. This is an issue when @@ -755,22 +723,17 @@ int do_coredump(const kernel_siginfo_t *siginfo) current_fsuid())) { coredump_report_failure("Core dump to %s aborted: " "cannot preserve file owner", cn.corename); - retval = -EPERM; goto close_fail; } if ((inode->i_mode & 0677) != 0600) { coredump_report_failure("Core dump to %s aborted: " "cannot preserve file permissions", cn.corename); - retval = -EPERM; goto close_fail; } - if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) { - retval = -EACCES; + if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) goto close_fail; - } - retval = do_truncate(idmap, cprm.file->f_path.dentry, - 0, 0, cprm.file); - if (retval) + if (do_truncate(idmap, cprm.file->f_path.dentry, + 0, 0, cprm.file)) goto close_fail; } @@ -786,15 +749,10 @@ int do_coredump(const kernel_siginfo_t *siginfo) */ if (!cprm.file) { coredump_report_failure("Core dump to |%s disabled", cn.corename); - retval = -EPERM; goto close_fail; } - if (!dump_vma_snapshot(&cprm)) { - coredump_report_failure("Can't get VMA snapshot for core dump |%s", - cn.corename); - retval = -EACCES; + if (!dump_vma_snapshot(&cprm)) goto close_fail; - } file_start_write(cprm.file); core_dumped = binfmt->core_dump(&cprm); @@ -810,21 +768,9 @@ int do_coredump(const kernel_siginfo_t *siginfo) } file_end_write(cprm.file); free_vma_snapshot(&cprm); - } else { - coredump_report_failure("Core dump to %s%s has been interrupted", - ispipe ? "|" : "", cn.corename); - retval = -EAGAIN; - goto fail; } - coredump_report( - "written to %s%s: VMAs: %d, size %zu; core: %lld bytes, pos %lld", - ispipe ? "|" : "", cn.corename, - cprm.vma_count, cprm.vma_data_size, cprm.written, cprm.pos); if (ispipe && core_pipe_limit) wait_for_dump_helpers(cprm.file); - - retval = 0; - close_fail: if (cprm.file) filp_close(cprm.file, NULL); @@ -839,7 +785,7 @@ int do_coredump(const kernel_siginfo_t *siginfo) fail_creds: put_cred(cred); fail: - return retval; + return; } /* @@ -859,16 +805,8 @@ static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr) if (dump_interrupted()) return 0; n = __kernel_write(file, addr, nr, &pos); - if (n != nr) { - if (n < 0) - coredump_report_failure("failed when writing out, error %zd", n); - else - coredump_report_failure( - "partially written out, only %zd(of %d) bytes written", - n, nr); - + if (n != nr) return 0; - } file->f_pos = pos; cprm->written += n; cprm->pos += n; @@ -881,16 +819,9 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr) static char zeroes[PAGE_SIZE]; struct file *file = cprm->file; if (file->f_mode & FMODE_LSEEK) { - int ret; - - if (dump_interrupted()) + if (dump_interrupted() || + vfs_llseek(file, nr, SEEK_CUR) < 0) return 0; - - ret = vfs_llseek(file, nr, SEEK_CUR); - if (ret < 0) { - coredump_report_failure("failed when seeking, error %d", ret); - return 0; - } cprm->pos += nr; return 1; } else { diff --git a/include/linux/coredump.h b/include/linux/coredump.h index edeb8532ce0f..45e598fe3476 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -42,7 +42,7 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); -extern int do_coredump(const kernel_siginfo_t *siginfo); +extern void do_coredump(const kernel_siginfo_t *siginfo); /* * Logging for the coredump code, ratelimited. @@ -62,11 +62,7 @@ extern int do_coredump(const kernel_siginfo_t *siginfo); #define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__) #else -static inline int do_coredump(const kernel_siginfo_t *siginfo) -{ - /* Coredump support is not available, can't fail. */ - return 0; -} +static inline void do_coredump(const kernel_siginfo_t *siginfo) {} #define coredump_report(...) #define coredump_report_failure(...) diff --git a/kernel/signal.c b/kernel/signal.c index 6e57036f947f..4344860ffcac 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2888,8 +2888,6 @@ bool get_signal(struct ksignal *ksig) current->flags |= PF_SIGNALED; if (sig_kernel_coredump(signr)) { - int ret; - if (print_fatal_signals) print_fatal_signal(signr); proc_coredump_connector(current); @@ -2901,24 +2899,7 @@ bool get_signal(struct ksignal *ksig) * first and our do_group_exit call below will use * that value and ignore the one we pass it. */ - ret = do_coredump(&ksig->info); - if (ret) - coredump_report_failure("coredump has not been created, error %d", - ret); - else if (!IS_ENABLED(CONFIG_COREDUMP)) { - /* - * Coredumps are not available, can't fail collecting - * the coredump. - * - * Leave a note though that the coredump is going to be - * not created. This is not an error or a warning as disabling - * support in the kernel for coredumps isn't commonplace, and - * the user must've built the kernel with the custom config so - * let them know all works as desired. - */ - coredump_report("no coredump collected as " - "that is disabled in the kernel configuration"); - } + do_coredump(&ksig->info); } /* From c234c6534040b1c1f8adcaf44702fc3e584cb1fe Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 24 Sep 2024 19:07:24 +0100 Subject: [PATCH 494/655] tools: fix shared radix-tree build The shared radix-tree build is not correctly recompiling when lib/maple_tree.c and lib/test_maple_tree.c are modified - fix this by adding these core components to the SHARED_DEPS list. Additionally, add missing header guards to shared header files. Link: https://lkml.kernel.org/r/20240924180724.112169-1-lorenzo.stoakes@oracle.com Fixes: 74579d8dab47 ("tools: separate out shared radix-tree components") Signed-off-by: Lorenzo Stoakes Tested-by: Sidhartha Kumar Cc: "Liam R. Howlett" Cc: Matthew Wilcox Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- tools/testing/shared/maple-shared.h | 4 ++++ tools/testing/shared/shared.h | 4 ++++ tools/testing/shared/shared.mk | 4 +++- tools/testing/shared/xarray-shared.h | 4 ++++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/testing/shared/maple-shared.h b/tools/testing/shared/maple-shared.h index 3d847edd149d..dc4d30f3860b 100644 --- a/tools/testing/shared/maple-shared.h +++ b/tools/testing/shared/maple-shared.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef __MAPLE_SHARED_H__ +#define __MAPLE_SHARED_H__ #define CONFIG_DEBUG_MAPLE_TREE #define CONFIG_MAPLE_SEARCH @@ -7,3 +9,5 @@ #include #include #include "linux/init.h" + +#endif /* __MAPLE_SHARED_H__ */ diff --git a/tools/testing/shared/shared.h b/tools/testing/shared/shared.h index f08f683812ad..13fb4d39966b 100644 --- a/tools/testing/shared/shared.h +++ b/tools/testing/shared/shared.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SHARED_H__ +#define __SHARED_H__ #include #include @@ -31,3 +33,5 @@ #ifndef dump_stack #define dump_stack() assert(0) #endif + +#endif /* __SHARED_H__ */ diff --git a/tools/testing/shared/shared.mk b/tools/testing/shared/shared.mk index a05f0588513a..a6bc51d0b0bf 100644 --- a/tools/testing/shared/shared.mk +++ b/tools/testing/shared/shared.mk @@ -15,7 +15,9 @@ SHARED_DEPS = Makefile ../shared/shared.mk ../shared/*.h generated/map-shift.h \ ../../../include/linux/maple_tree.h \ ../../../include/linux/radix-tree.h \ ../../../lib/radix-tree.h \ - ../../../include/linux/idr.h + ../../../include/linux/idr.h \ + ../../../lib/maple_tree.c \ + ../../../lib/test_maple_tree.c ifndef SHIFT SHIFT=3 diff --git a/tools/testing/shared/xarray-shared.h b/tools/testing/shared/xarray-shared.h index ac2d16ff53ae..d50de7884803 100644 --- a/tools/testing/shared/xarray-shared.h +++ b/tools/testing/shared/xarray-shared.h @@ -1,4 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef __XARRAY_SHARED_H__ +#define __XARRAY_SHARED_H__ #define XA_DEBUG #include "shared.h" + +#endif /* __XARRAY_SHARED_H__ */ From a3344078101ceee46d14a93f7e3a3b91a55d215b Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 24 Sep 2024 08:42:05 -0700 Subject: [PATCH 495/655] mm: make SPLIT_PTE_PTLOCKS depend on SMP SPLIT_PTE_PTLOCKS depends on "NR_CPUS >= 4". Unfortunately, that evaluates to true if there is no NR_CPUS configuration option. This results in CONFIG_SPLIT_PTE_PTLOCKS=y for mac_defconfig. This in turn causes the m68k "q800" and "virt" machines to crash in qemu if debugging options are enabled. Making CONFIG_SPLIT_PTE_PTLOCKS dependent on the existence of NR_CPUS does not work since a dependency on the existence of a numeric Kconfig entry always evaluates to false. Example: config HAVE_NO_NR_CPUS def_bool y depends on !NR_CPUS After adding this to a Kconfig file, "make defconfig" includes: $ grep NR_CPUS .config CONFIG_NR_CPUS=64 CONFIG_HAVE_NO_NR_CPUS=y Defining NR_CPUS for m68k does not help either since many architectures define NR_CPUS only for SMP configurations. Make SPLIT_PTE_PTLOCKS depend on SMP instead to solve the problem. Link: https://lkml.kernel.org/r/20240924154205.1491376-1-linux@roeck-us.net Fixes: 394290cba966 ("mm: turn USE_SPLIT_PTE_PTLOCKS / USE_SPLIT_PTE_PTLOCKS into Kconfig options") Signed-off-by: Guenter Roeck Acked-by: David Hildenbrand Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: Andrew Morton --- mm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/Kconfig b/mm/Kconfig index 09aebca1cae3..4c9f5ea13271 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -595,6 +595,7 @@ config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE config SPLIT_PTE_PTLOCKS def_bool y depends on MMU + depends on SMP depends on NR_CPUS >= 4 depends on !ARM || CPU_CACHE_VIPT depends on !PARISC || PA20 From c225c4f6056b46a8a5bf2ed35abf17a2d6887691 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Tue, 3 Sep 2024 07:25:17 -0700 Subject: [PATCH 496/655] mm/filemap: fix filemap_get_folios_contig THP panic Patch series "memfd-pin huge page fixes". Fix multiple bugs that occur when using memfd_pin_folios with hugetlb pages and THP. The hugetlb bugs only bite when the page is not yet faulted in when memfd_pin_folios is called. The THP bug bites when the starting offset passed to memfd_pin_folios is not huge page aligned. See the commit messages for details. This patch (of 5): memfd_pin_folios on memory backed by THP panics if the requested start offset is not huge page aligned: BUG: kernel NULL pointer dereference, address: 0000000000000036 RIP: 0010:filemap_get_folios_contig+0xdf/0x290 RSP: 0018:ffffc9002092fbe8 EFLAGS: 00010202 RAX: 0000000000000002 RBX: 0000000000000002 RCX: 0000000000000002 The fault occurs here, because xas_load returns a folio with value 2: filemap_get_folios_contig() for (folio = xas_load(&xas); folio && xas.xa_index <= end; folio = xas_next(&xas)) { ... if (!folio_try_get(folio)) <-- BOOM "2" is an xarray sibling entry. We get it because memfd_pin_folios does not round the indices passed to filemap_get_folios_contig to huge page boundaries for THP, so we load from the middle of a huge page range see a sibling. (It does round for hugetlbfs, at the is_file_hugepages test). To fix, if the folio is a sibling, then return the next index as the starting point for the next call to filemap_get_folios_contig. Link: https://lkml.kernel.org/r/1725373521-451395-1-git-send-email-steven.sistare@oracle.com Link: https://lkml.kernel.org/r/1725373521-451395-2-git-send-email-steven.sistare@oracle.com Fixes: 89c1905d9c14 ("mm/gup: introduce memfd_pin_folios() for pinning memfd folios") Signed-off-by: Steve Sistare Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Vivek Kasireddy Cc: Signed-off-by: Andrew Morton --- mm/filemap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/filemap.c b/mm/filemap.c index bbaed3dd5049..36d22968be9a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2196,6 +2196,10 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, if (xa_is_value(folio)) goto update_start; + /* If we landed in the middle of a THP, continue at its end. */ + if (xa_is_sibling(folio)) + goto update_start; + if (!folio_try_get(folio)) goto retry; From c56b6f3d801d7ec8965993342bdd9e2972b6cb8e Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Tue, 3 Sep 2024 07:25:18 -0700 Subject: [PATCH 497/655] mm/hugetlb: fix memfd_pin_folios free_huge_pages leak memfd_pin_folios followed by unpin_folios fails to restore free_huge_pages if the pages were not already faulted in, because the folio refcount for pages created by memfd_alloc_folio never goes to 0. memfd_pin_folios needs another folio_put to undo the folio_try_get below: memfd_alloc_folio() alloc_hugetlb_folio_nodemask() dequeue_hugetlb_folio_nodemask() dequeue_hugetlb_folio_node_exact() folio_ref_unfreeze(folio, 1); ; adds 1 refcount folio_try_get() ; adds 1 refcount hugetlb_add_to_page_cache() ; adds 512 refcount (on x86) With the fix, after memfd_pin_folios + unpin_folios, the refcount for the (unfaulted) page is 512, which is correct, as the refcount for a faulted unpinned page is 513. Link: https://lkml.kernel.org/r/1725373521-451395-3-git-send-email-steven.sistare@oracle.com Fixes: 89c1905d9c14 ("mm/gup: introduce memfd_pin_folios() for pinning memfd folios") Signed-off-by: Steve Sistare Acked-by: Vivek Kasireddy Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- mm/gup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index 8232c8c9c372..3e7eac9a4f2b 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -3615,7 +3615,7 @@ long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, pgoff_t start_idx, end_idx, next_idx; struct folio *folio = NULL; struct folio_batch fbatch; - struct hstate *h; + struct hstate *h = NULL; long ret = -EINVAL; if (start < 0 || start > end || !max_folios) @@ -3659,6 +3659,8 @@ long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, &fbatch); if (folio) { folio_put(folio); + if (h) + folio_put(folio); folio = NULL; } From 26a8ea80929c518bdec5e53a5776f95919b7c88e Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Tue, 3 Sep 2024 07:25:19 -0700 Subject: [PATCH 498/655] mm/hugetlb: fix memfd_pin_folios resv_huge_pages leak memfd_pin_folios followed by unpin_folios leaves resv_huge_pages elevated if the pages were not already faulted in. During a normal page fault, resv_huge_pages is consumed here: hugetlb_fault() alloc_hugetlb_folio() dequeue_hugetlb_folio_vma() dequeue_hugetlb_folio_nodemask() dequeue_hugetlb_folio_node_exact() free_huge_pages-- resv_huge_pages-- During memfd_pin_folios, the page is created by calling alloc_hugetlb_folio_nodemask instead of alloc_hugetlb_folio, and resv_huge_pages is not modified: memfd_alloc_folio() alloc_hugetlb_folio_nodemask() dequeue_hugetlb_folio_nodemask() dequeue_hugetlb_folio_node_exact() free_huge_pages-- alloc_hugetlb_folio_nodemask has other callers that must not modify resv_huge_pages. Therefore, to fix, define an alternate version of alloc_hugetlb_folio_nodemask for this call site that adjusts resv_huge_pages. Link: https://lkml.kernel.org/r/1725373521-451395-4-git-send-email-steven.sistare@oracle.com Fixes: 89c1905d9c14 ("mm/gup: introduce memfd_pin_folios() for pinning memfd folios") Signed-off-by: Steve Sistare Acked-by: Vivek Kasireddy Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 10 ++++++++++ mm/hugetlb.c | 17 +++++++++++++++++ mm/memfd.c | 9 ++++----- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 98c47c394b89..e4697539b665 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -692,6 +692,9 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback); +struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask); + int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, @@ -1059,6 +1062,13 @@ static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, return NULL; } +static inline struct folio * +alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask) +{ + return NULL; +} + static inline struct folio * alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index def84d8bcf2d..190fa05635f4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2390,6 +2390,23 @@ struct folio *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h, return folio; } +struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask) +{ + struct folio *folio; + + spin_lock_irq(&hugetlb_lock); + folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, preferred_nid, + nmask); + if (folio) { + VM_BUG_ON(!h->resv_huge_pages); + h->resv_huge_pages--; + } + + spin_unlock_irq(&hugetlb_lock); + return folio; +} + /* folio migration callback function */ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback) diff --git a/mm/memfd.c b/mm/memfd.c index e7b7c5294d59..bfe0e7189a37 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -82,11 +82,10 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) gfp_mask = htlb_alloc_mask(hstate_file(memfd)); gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE); - folio = alloc_hugetlb_folio_nodemask(hstate_file(memfd), - numa_node_id(), - NULL, - gfp_mask, - false); + folio = alloc_hugetlb_folio_reserve(hstate_file(memfd), + numa_node_id(), + NULL, + gfp_mask); if (folio && folio_try_get(folio)) { err = hugetlb_add_to_page_cache(folio, memfd->f_mapping, From 9289f020da47ef04b28865589eeee3d56d4bafea Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Tue, 3 Sep 2024 07:25:20 -0700 Subject: [PATCH 499/655] mm/gup: fix memfd_pin_folios hugetlb page allocation When memfd_pin_folios -> memfd_alloc_folio creates a hugetlb page, the index is wrong. The subsequent call to filemap_get_folios_contig thus cannot find it, and fails, and memfd_pin_folios loops forever. To fix, adjust the index for the huge_page_order. memfd_alloc_folio also forgets to unlock the folio, so the next touch of the page calls hugetlb_fault which blocks forever trying to take the lock. Unlock it. Link: https://lkml.kernel.org/r/1725373521-451395-5-git-send-email-steven.sistare@oracle.com Fixes: 89c1905d9c14 ("mm/gup: introduce memfd_pin_folios() for pinning memfd folios") Signed-off-by: Steve Sistare Acked-by: Vivek Kasireddy Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- mm/memfd.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/memfd.c b/mm/memfd.c index bfe0e7189a37..bcb131db829d 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -79,10 +79,13 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) * alloc from. Also, the folio will be pinned for an indefinite * amount of time, so it is not expected to be migrated away. */ - gfp_mask = htlb_alloc_mask(hstate_file(memfd)); - gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE); + struct hstate *h = hstate_file(memfd); - folio = alloc_hugetlb_folio_reserve(hstate_file(memfd), + gfp_mask = htlb_alloc_mask(h); + gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE); + idx >>= huge_page_order(h); + + folio = alloc_hugetlb_folio_reserve(h, numa_node_id(), NULL, gfp_mask); @@ -95,6 +98,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) free_huge_folio(folio); return ERR_PTR(err); } + folio_unlock(folio); return folio; } return ERR_PTR(-ENOMEM); From ce645b9fdc78ec5d28067286e92871ddae6817d5 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Tue, 3 Sep 2024 07:25:21 -0700 Subject: [PATCH 500/655] mm/gup: fix memfd_pin_folios alloc race panic If memfd_pin_folios tries to create a hugetlb page, but someone else already did, then folio gets the value -EEXIST here: folio = memfd_alloc_folio(memfd, start_idx); if (IS_ERR(folio)) { ret = PTR_ERR(folio); if (ret != -EEXIST) goto err; then on the next trip through the "while start_idx" loop we panic here: if (folio) { folio_put(folio); To fix, set the folio to NULL on error. Link: https://lkml.kernel.org/r/1725373521-451395-6-git-send-email-steven.sistare@oracle.com Fixes: 89c1905d9c14 ("mm/gup: introduce memfd_pin_folios() for pinning memfd folios") Signed-off-by: Steve Sistare Acked-by: Vivek Kasireddy Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- mm/gup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/gup.c b/mm/gup.c index 3e7eac9a4f2b..8d9b560a0fec 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -3702,6 +3702,7 @@ long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, ret = PTR_ERR(folio); if (ret != -EEXIST) goto err; + folio = NULL; } } } From dc677b5f3765cfd0944c8873d1ea57f1a3439676 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 4 Sep 2024 12:41:08 -0700 Subject: [PATCH 501/655] mm/hugetlb: simplify refs in memfd_alloc_folio The folio_try_get in memfd_alloc_folio is not necessary. Delete it, and delete the matching folio_put in memfd_pin_folios. This also avoids leaking a ref if the memfd_alloc_folio call to hugetlb_add_to_page_cache fails. That error path is also broken in a second way -- when its folio_put causes the ref to become 0, it will implicitly call free_huge_folio, but then the path *explicitly* calls free_huge_folio. Delete the latter. This is a continuation of the fix "mm/hugetlb: fix memfd_pin_folios free_huge_pages leak" [steven.sistare@oracle.com: remove explicit call to free_huge_folio(), per Matthew] Link: https://lkml.kernel.org/r/Zti-7nPVMcGgpcbi@casper.infradead.org Link: https://lkml.kernel.org/r/1725481920-82506-1-git-send-email-steven.sistare@oracle.com Link: https://lkml.kernel.org/r/1725478868-61732-1-git-send-email-steven.sistare@oracle.com Fixes: 89c1905d9c14 ("mm/gup: introduce memfd_pin_folios() for pinning memfd folios") Signed-off-by: Steve Sistare Suggested-by: Vivek Kasireddy Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- mm/gup.c | 4 +--- mm/memfd.c | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 8d9b560a0fec..a82890b46a36 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -3615,7 +3615,7 @@ long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, pgoff_t start_idx, end_idx, next_idx; struct folio *folio = NULL; struct folio_batch fbatch; - struct hstate *h = NULL; + struct hstate *h; long ret = -EINVAL; if (start < 0 || start > end || !max_folios) @@ -3659,8 +3659,6 @@ long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, &fbatch); if (folio) { folio_put(folio); - if (h) - folio_put(folio); folio = NULL; } diff --git a/mm/memfd.c b/mm/memfd.c index bcb131db829d..c17c3ea701a1 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -89,13 +89,12 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) numa_node_id(), NULL, gfp_mask); - if (folio && folio_try_get(folio)) { + if (folio) { err = hugetlb_add_to_page_cache(folio, memfd->f_mapping, idx); if (err) { folio_put(folio); - free_huge_folio(folio); return ERR_PTR(err); } folio_unlock(folio); From 8001070cfbec5cd4ea00b8b48ea51df91122f265 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Tue, 24 Sep 2024 22:00:53 +0900 Subject: [PATCH 502/655] mm: migrate: annotate data-race in migrate_folio_unmap() I found a report from syzbot [1] This report shows that the value can be changed, but in reality, the value of __folio_set_movable() cannot be changed because it holds the folio refcount. Therefore, it is appropriate to add an annotate to make KCSAN ignore that data-race. [1] ================================================================== BUG: KCSAN: data-race in __filemap_remove_folio / migrate_pages_batch write to 0xffffea0004b81dd8 of 8 bytes by task 6348 on cpu 0: page_cache_delete mm/filemap.c:153 [inline] __filemap_remove_folio+0x1ac/0x2c0 mm/filemap.c:233 filemap_remove_folio+0x6b/0x1f0 mm/filemap.c:265 truncate_inode_folio+0x42/0x50 mm/truncate.c:178 shmem_undo_range+0x25b/0xa70 mm/shmem.c:1028 shmem_truncate_range mm/shmem.c:1144 [inline] shmem_evict_inode+0x14d/0x530 mm/shmem.c:1272 evict+0x2f0/0x580 fs/inode.c:731 iput_final fs/inode.c:1883 [inline] iput+0x42a/0x5b0 fs/inode.c:1909 dentry_unlink_inode+0x24f/0x260 fs/dcache.c:412 __dentry_kill+0x18b/0x4c0 fs/dcache.c:615 dput+0x5c/0xd0 fs/dcache.c:857 __fput+0x3fb/0x6d0 fs/file_table.c:439 ____fput+0x1c/0x30 fs/file_table.c:459 task_work_run+0x13a/0x1a0 kernel/task_work.c:228 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop kernel/entry/common.c:114 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] syscall_exit_to_user_mode+0xbe/0x130 kernel/entry/common.c:218 do_syscall_64+0xd6/0x1c0 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f read to 0xffffea0004b81dd8 of 8 bytes by task 6342 on cpu 1: __folio_test_movable include/linux/page-flags.h:699 [inline] migrate_folio_unmap mm/migrate.c:1199 [inline] migrate_pages_batch+0x24c/0x1940 mm/migrate.c:1797 migrate_pages_sync mm/migrate.c:1963 [inline] migrate_pages+0xff1/0x1820 mm/migrate.c:2072 do_mbind mm/mempolicy.c:1390 [inline] kernel_mbind mm/mempolicy.c:1533 [inline] __do_sys_mbind mm/mempolicy.c:1607 [inline] __se_sys_mbind+0xf76/0x1160 mm/mempolicy.c:1603 __x64_sys_mbind+0x78/0x90 mm/mempolicy.c:1603 x64_sys_call+0x2b4d/0x2d60 arch/x86/include/generated/asm/syscalls_64.h:238 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xc9/0x1c0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f value changed: 0xffff888127601078 -> 0x0000000000000000 Link: https://lkml.kernel.org/r/20240924130053.107490-1-aha310510@gmail.com Fixes: 7e2a5e5ab217 ("mm: migrate: use __folio_test_movable()") Signed-off-by: Jeongjun Park Reported-by: syzbot Acked-by: David Hildenbrand Cc: Kefeng Wang Cc: Matthew Wilcox Cc: Zi Yan Cc: Signed-off-by: Andrew Morton --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index dfdb3a136bf8..df91248755e4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1196,7 +1196,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio, int rc = -EAGAIN; int old_page_state = 0; struct anon_vma *anon_vma = NULL; - bool is_lru = !__folio_test_movable(src); + bool is_lru = data_race(!__folio_test_movable(src)); bool locked = false; bool dst_locked = false; From 5ca60b86f57a4d9648f68418a725b3a7de2816b0 Mon Sep 17 00:00:00 2001 From: Gautham Ananthakrishna Date: Wed, 18 Sep 2024 06:38:44 +0000 Subject: [PATCH 503/655] ocfs2: reserve space for inline xattr before attaching reflink tree One of our customers reported a crash and a corrupted ocfs2 filesystem. The crash was due to the detection of corruption. Upon troubleshooting, the fsck -fn output showed the below corruption [EXTENT_LIST_FREE] Extent list in owner 33080590 claims 230 as the next free chain record, but fsck believes the largest valid value is 227. Clamp the next record value? n The stat output from the debugfs.ocfs2 showed the following corruption where the "Next Free Rec:" had overshot the "Count:" in the root metadata block. Inode: 33080590 Mode: 0640 Generation: 2619713622 (0x9c25a856) FS Generation: 904309833 (0x35e6ac49) CRC32: 00000000 ECC: 0000 Type: Regular Attr: 0x0 Flags: Valid Dynamic Features: (0x16) HasXattr InlineXattr Refcounted Extended Attributes Block: 0 Extended Attributes Inline Size: 256 User: 0 (root) Group: 0 (root) Size: 281320357888 Links: 1 Clusters: 141738 ctime: 0x66911b56 0x316edcb8 -- Fri Jul 12 06:02:30.829349048 2024 atime: 0x66911d6b 0x7f7a28d -- Fri Jul 12 06:11:23.133669517 2024 mtime: 0x66911b56 0x12ed75d7 -- Fri Jul 12 06:02:30.317552087 2024 dtime: 0x0 -- Wed Dec 31 17:00:00 1969 Refcount Block: 2777346 Last Extblk: 2886943 Orphan Slot: 0 Sub Alloc Slot: 0 Sub Alloc Bit: 14 Tree Depth: 1 Count: 227 Next Free Rec: 230 ## Offset Clusters Block# 0 0 2310 2776351 1 2310 2139 2777375 2 4449 1221 2778399 3 5670 731 2779423 4 6401 566 2780447 ....... .... ....... ....... .... ....... The issue was in the reflink workfow while reserving space for inline xattr. The problematic function is ocfs2_reflink_xattr_inline(). By the time this function is called the reflink tree is already recreated at the destination inode from the source inode. At this point, this function reserves space for inline xattrs at the destination inode without even checking if there is space at the root metadata block. It simply reduces the l_count from 243 to 227 thereby making space of 256 bytes for inline xattr whereas the inode already has extents beyond this index (in this case up to 230), thereby causing corruption. The fix for this is to reserve space for inline metadata at the destination inode before the reflink tree gets recreated. The customer has verified the fix. Link: https://lkml.kernel.org/r/20240918063844.1830332-1-gautham.ananthakrishna@oracle.com Fixes: ef962df057aa ("ocfs2: xattr: fix inlined xattr reflink") Signed-off-by: Gautham Ananthakrishna Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/refcounttree.c | 26 ++++++++++++++++++++++++-- fs/ocfs2/xattr.c | 11 +---------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 4f85508538fc..004393b13c0a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -25,6 +25,7 @@ #include "namei.h" #include "ocfs2_trace.h" #include "file.h" +#include "symlink.h" #include #include @@ -4148,8 +4149,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry, int ret; struct inode *inode = d_inode(old_dentry); struct buffer_head *new_bh = NULL; + struct ocfs2_inode_info *oi = OCFS2_I(inode); - if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) { + if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) { ret = -EINVAL; mlog_errno(ret); goto out; @@ -4175,6 +4177,26 @@ static int __ocfs2_reflink(struct dentry *old_dentry, goto out_unlock; } + if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) && + (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { + /* + * Adjust extent record count to reserve space for extended attribute. + * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). + */ + struct ocfs2_inode_info *new_oi = OCFS2_I(new_inode); + + if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && + !(ocfs2_inode_is_fast_symlink(new_inode))) { + struct ocfs2_dinode *new_di = (struct ocfs2_dinode *)new_bh->b_data; + struct ocfs2_dinode *old_di = (struct ocfs2_dinode *)old_bh->b_data; + struct ocfs2_extent_list *el = &new_di->id2.i_list; + int inline_size = le16_to_cpu(old_di->i_xattr_inline_size); + + le16_add_cpu(&el->l_count, -(inline_size / + sizeof(struct ocfs2_extent_rec))); + } + } + ret = ocfs2_create_reflink_node(inode, old_bh, new_inode, new_bh, preserve); if (ret) { @@ -4182,7 +4204,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry, goto inode_unlock; } - if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) { + if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) { ret = ocfs2_reflink_xattrs(inode, old_bh, new_inode, new_bh, preserve); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 0e58a5ce539e..dd0a05365e79 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -6511,16 +6511,7 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) } new_oi = OCFS2_I(args->new_inode); - /* - * Adjust extent record count to reserve space for extended attribute. - * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). - */ - if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && - !(ocfs2_inode_is_fast_symlink(args->new_inode))) { - struct ocfs2_extent_list *el = &new_di->id2.i_list; - le16_add_cpu(&el->l_count, -(inline_size / - sizeof(struct ocfs2_extent_rec))); - } + spin_lock(&new_oi->ip_lock); new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); From 7bf1823e010e8db2fb649c790bd1b449a75f52d8 Mon Sep 17 00:00:00 2001 From: Mohammed Anees Date: Tue, 24 Sep 2024 09:32:57 +0000 Subject: [PATCH 504/655] ocfs2: fix deadlock in ocfs2_get_system_file_inode syzbot has found a possible deadlock in ocfs2_get_system_file_inode [1]. The scenario is depicted here, CPU0 CPU1 lock(&ocfs2_file_ip_alloc_sem_key); lock(&osb->system_file_mutex); lock(&ocfs2_file_ip_alloc_sem_key); lock(&osb->system_file_mutex); The function calls which could lead to this are: CPU0 ocfs2_mknod - lock(&ocfs2_file_ip_alloc_sem_key); . . . ocfs2_get_system_file_inode - lock(&osb->system_file_mutex); CPU1 - ocfs2_fill_super - lock(&osb->system_file_mutex); . . . ocfs2_read_virt_blocks - lock(&ocfs2_file_ip_alloc_sem_key); This issue can be resolved by making the down_read -> down_read_try in the ocfs2_read_virt_blocks. [1] https://syzkaller.appspot.com/bug?extid=e0055ea09f1f5e6fabdd Link: https://lkml.kernel.org/r/20240924093257.7181-1-pvmohammedanees2003@gmail.com Signed-off-by: Mohammed Anees Reviewed-by: Joseph Qi Reported-by: Closes: https://syzkaller.appspot.com/bug?extid=e0055ea09f1f5e6fabdd Tested-by: syzbot+e0055ea09f1f5e6fabdd@syzkaller.appspotmail.com Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/extent_map.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 70a768b623cf..f7672472fa82 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -973,7 +973,13 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, } while (done < nr) { - down_read(&OCFS2_I(inode)->ip_alloc_sem); + if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) { + rc = -EAGAIN; + mlog(ML_ERROR, + "Inode #%llu ip_alloc_sem is temporarily unavailable\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno); + break; + } rc = ocfs2_extent_map_get_blocks(inode, v_block + done, &p_block, &p_count, NULL); up_read(&OCFS2_I(inode)->ip_alloc_sem); From ff7f5ad7bce4fd14f8ed057f1f593ade2840e84d Mon Sep 17 00:00:00 2001 From: "qiwu.chen" Date: Tue, 24 Sep 2024 16:50:04 +0800 Subject: [PATCH 505/655] mm: kfence: fix elapsed time for allocated/freed track Fix elapsed time for the allocated/freed track introduced by commit 62e73fd85d7bf. Link: https://lkml.kernel.org/r/20240924085004.75401-1-qiwu.chen@transsion.com Fixes: 62e73fd85d7b ("mm: kfence: print the elapsed time for allocated/freed track") Signed-off-by: qiwu.chen Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton --- mm/kfence/report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 451991a3a8f2..6370c5207d1a 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -109,7 +109,7 @@ static void kfence_print_stack(struct seq_file *seq, const struct kfence_metadat const struct kfence_track *track = show_alloc ? &meta->alloc_track : &meta->free_track; u64 ts_sec = track->ts_nsec; unsigned long rem_nsec = do_div(ts_sec, NSEC_PER_SEC); - u64 interval_nsec = local_clock() - meta->alloc_track.ts_nsec; + u64 interval_nsec = local_clock() - track->ts_nsec; unsigned long rem_interval_nsec = do_div(interval_nsec, NSEC_PER_SEC); /* Timestamp matches printk timestamp format. */ From 6901cf55de224b2ca51a5675b86c8ef241ae640c Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Tue, 24 Sep 2024 10:21:46 +0200 Subject: [PATCH 506/655] mm/damon/Kconfig: update DAMON doc URL The old URL doesn't really work anymore and as the documentation has been integrated in the main kernel documentation site, change the URL to point to that. Link: https://lkml.kernel.org/r/20240924082331.11499-1-didi.debian@cknow.org Signed-off-by: Diederik de Haas Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index fecb8172410c..35b72f88983a 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -9,7 +9,7 @@ config DAMON access frequency of each memory region. The information can be useful for performance-centric DRAM level memory management. - See https://damonitor.github.io/doc/html/latest-damon/index.html for + See https://www.kernel.org/doc/html/latest/mm/damon/index.html for more information. config DAMON_KUNIT_TEST From c5b1184decc819756ae549ba54c63b6790c4ddfd Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 24 Sep 2024 14:27:10 +0800 Subject: [PATCH 507/655] compiler.h: specify correct attribute for .rodata..c_jump_table Currently, there is an assembler message when generating kernel/bpf/core.o under CONFIG_OBJTOOL with LoongArch compiler toolchain: Warning: setting incorrect section attributes for .rodata..c_jump_table This is because the section ".rodata..c_jump_table" should be readonly, but there is a "W" (writable) part of the flags: $ readelf -S kernel/bpf/core.o | grep -A 1 "rodata..c" [34] .rodata..c_j[...] PROGBITS 0000000000000000 0000d2e0 0000000000000800 0000000000000000 WA 0 0 8 There is no above issue on x86 due to the generated section flag is only "A" (allocatable). In order to silence the warning on LoongArch, specify the attribute like ".rodata..c_jump_table,\"a\",@progbits #" explicitly, then the section attribute of ".rodata..c_jump_table" must be readonly in the kernel/bpf/core.o file. Before: $ objdump -h kernel/bpf/core.o | grep -A 1 "rodata..c" 21 .rodata..c_jump_table 00000800 0000000000000000 0000000000000000 0000d2e0 2**3 CONTENTS, ALLOC, LOAD, RELOC, DATA After: $ objdump -h kernel/bpf/core.o | grep -A 1 "rodata..c" 21 .rodata..c_jump_table 00000800 0000000000000000 0000000000000000 0000d2e0 2**3 CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA By the way, AFAICT, maybe the root cause is related with the different compiler behavior of various archs, so to some extent this change is a workaround for LoongArch, and also there is no effect for x86 which is the only port supported by objtool before LoongArch with this patch. Link: https://lkml.kernel.org/r/20240924062710.1243-1-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: [6.9+] Signed-off-by: Andrew Morton --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ec55bcce4146..4d4e23b6e3e7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -133,7 +133,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define annotate_unreachable() __annotate_unreachable(__COUNTER__) /* Annotate a C jump table to allow objtool to follow the code flow */ -#define __annotate_jump_table __section(".rodata..c_jump_table") +#define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #") #else /* !CONFIG_OBJTOOL */ #define annotate_reachable() From f30beffd977e98c33550bbeb6f278d157ff54844 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Mon, 23 Sep 2024 10:38:36 +0500 Subject: [PATCH 508/655] kselftests: mm: fix wrong __NR_userfaultfd value grep -rnIF "#define __NR_userfaultfd" tools/include/uapi/asm-generic/unistd.h:681:#define __NR_userfaultfd 282 arch/x86/include/generated/uapi/asm/unistd_32.h:374:#define __NR_userfaultfd 374 arch/x86/include/generated/uapi/asm/unistd_64.h:327:#define __NR_userfaultfd 323 arch/x86/include/generated/uapi/asm/unistd_x32.h:282:#define __NR_userfaultfd (__X32_SYSCALL_BIT + 323) arch/arm/include/generated/uapi/asm/unistd-eabi.h:347:#define __NR_userfaultfd (__NR_SYSCALL_BASE + 388) arch/arm/include/generated/uapi/asm/unistd-oabi.h:359:#define __NR_userfaultfd (__NR_SYSCALL_BASE + 388) include/uapi/asm-generic/unistd.h:681:#define __NR_userfaultfd 282 The number is dependent on the architecture. The above data shows that: x86 374 x86_64 323 The value of __NR_userfaultfd was changed to 282 when asm-generic/unistd.h was included. It makes the test to fail every time as the correct number of this syscall on x86_64 is 323. Fix the header to asm/unistd.h. Link: https://lkml.kernel.org/r/20240923053836.3270393-1-usama.anjum@collabora.com Fixes: a5c6bc590094 ("selftests/mm: remove local __NR_* definitions") Signed-off-by: Muhammad Usama Anjum Reviewed-by: Shuah Khan Reviewed-by: David Hildenbrand Cc: John Hubbard Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/pagemap_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index fc90af2a97b8..bcc73b4e805c 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include From c509f67df398f985717601563db577e91e67787f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 22 Sep 2024 08:05:07 -0700 Subject: [PATCH 509/655] Revert "list: test: fix tests for list_cut_position()" This reverts commit e620799c414a035dea1208bcb51c869744931dbb. The commit introduces unit test failures. Expected cur == &entries[i], but cur == 0000037fffadfd80 &entries[i] == 0000037fffadfd60 # list_test_list_cut_position: pass:0 fail:1 skip:0 total:1 not ok 21 list_test_list_cut_position # list_test_list_cut_before: EXPECTATION FAILED at lib/list-test.c:444 Expected cur == &entries[i], but cur == 0000037fffa9fd70 &entries[i] == 0000037fffa9fd60 # list_test_list_cut_before: EXPECTATION FAILED at lib/list-test.c:444 Expected cur == &entries[i], but cur == 0000037fffa9fd80 &entries[i] == 0000037fffa9fd70 Revert it. Link: https://lkml.kernel.org/r/20240922150507.553814-1-linux@roeck-us.net Fixes: e620799c414a ("list: test: fix tests for list_cut_position()") Signed-off-by: Guenter Roeck Cc: I Hsin Cheng Cc: David Gow Signed-off-by: Andrew Morton --- lib/list-test.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lib/list-test.c b/lib/list-test.c index 4f3dc75baec1..e207c4c98d70 100644 --- a/lib/list-test.c +++ b/lib/list-test.c @@ -408,13 +408,10 @@ static void list_test_list_cut_position(struct kunit *test) KUNIT_EXPECT_EQ(test, i, 2); - i = 0; list_for_each(cur, &list1) { KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]); i++; } - - KUNIT_EXPECT_EQ(test, i, 1); } static void list_test_list_cut_before(struct kunit *test) @@ -439,13 +436,10 @@ static void list_test_list_cut_before(struct kunit *test) KUNIT_EXPECT_EQ(test, i, 1); - i = 0; list_for_each(cur, &list1) { KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]); i++; } - - KUNIT_EXPECT_EQ(test, i, 2); } static void list_test_list_splice(struct kunit *test) From a530bbc53826c607f64e8ee466c3351efaf6aea5 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 20 Sep 2024 09:47:40 +0800 Subject: [PATCH 510/655] memory tiers: use default_dram_perf_ref_source in log message Commit 3718c02dbd4c ("acpi, hmat: calculate abstract distance with HMAT") added a default_dram_perf_ref_source variable that was initialized but never used. This causes kmemleak to report the following memory leak: unreferenced object 0xff11000225a47b60 (size 16): comm "swapper/0", pid 1, jiffies 4294761654 hex dump (first 16 bytes): 41 43 50 49 20 48 4d 41 54 00 c1 4b 7d b7 75 7c ACPI HMAT..K}.u| backtrace (crc e6d0e7b2): [] __kmalloc_node_track_caller_noprof+0x36b/0x440 [] kstrdup+0x36/0x60 [] mt_set_default_dram_perf+0x23a/0x2c0 [] hmat_init+0x2b3/0x660 [] do_one_initcall+0x11c/0x5c0 [] do_initcalls+0x1b4/0x1f0 [] kernel_init_freeable+0x4ae/0x520 [] kernel_init+0x1c/0x150 [] ret_from_fork+0x31/0x70 [] ret_from_fork_asm+0x1a/0x30 This reminds us that we forget to use the performance data source information. So, use the variable in the error log message to help identify the root cause of inconsistent performance number. Link: https://lkml.kernel.org/r/87y13mvo0n.fsf@yhuang6-desk2.ccr.corp.intel.com Fixes: 3718c02dbd4c ("acpi, hmat: calculate abstract distance with HMAT") Signed-off-by: "Huang, Ying" Reported-by: Waiman Long Acked-by: Waiman Long Cc: Alistair Popple Cc: Dave Jiang Signed-off-by: Andrew Morton --- mm/memory-tiers.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 9842acebd05e..fc14fe53e9b7 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -768,10 +768,10 @@ int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, pr_info( "memory-tiers: the performance of DRAM node %d mismatches that of the reference\n" "DRAM node %d.\n", nid, default_dram_perf_ref_nid); - pr_info(" performance of reference DRAM node %d:\n", - default_dram_perf_ref_nid); + pr_info(" performance of reference DRAM node %d from %s:\n", + default_dram_perf_ref_nid, default_dram_perf_ref_source); dump_hmem_attrs(&default_dram_perf, " "); - pr_info(" performance of DRAM node %d:\n", nid); + pr_info(" performance of DRAM node %d from %s:\n", nid, source); dump_hmem_attrs(perf, " "); pr_info( " disable default DRAM node performance based abstract distance algorithm.\n"); From 486fd58af7ac1098b68370b1d4d9f94a2a1c7124 Mon Sep 17 00:00:00 2001 From: Andrey Skvortsov Date: Mon, 23 Sep 2024 19:48:43 +0300 Subject: [PATCH 511/655] zram: don't free statically defined names When CONFIG_ZRAM_MULTI_COMP isn't set ZRAM_SECONDARY_COMP can hold default_compressor, because it's the same offset as ZRAM_PRIMARY_COMP, so we need to make sure that we don't attempt to kfree() the statically defined compressor name. This is detected by KASAN. ================================================================== Call trace: kfree+0x60/0x3a0 zram_destroy_comps+0x98/0x198 [zram] zram_reset_device+0x22c/0x4a8 [zram] reset_store+0x1bc/0x2d8 [zram] dev_attr_store+0x44/0x80 sysfs_kf_write+0xfc/0x188 kernfs_fop_write_iter+0x28c/0x428 vfs_write+0x4dc/0x9b8 ksys_write+0x100/0x1f8 __arm64_sys_write+0x74/0xb8 invoke_syscall+0xd8/0x260 el0_svc_common.constprop.0+0xb4/0x240 do_el0_svc+0x48/0x68 el0_svc+0x40/0xc8 el0t_64_sync_handler+0x120/0x130 el0t_64_sync+0x190/0x198 ================================================================== Link: https://lkml.kernel.org/r/20240923164843.1117010-1-andrej.skvortzov@gmail.com Fixes: 684826f8271a ("zram: free secondary algorithms names") Signed-off-by: Andrey Skvortsov Reviewed-by: Sergey Senozhatsky Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/lkml/57130e48-dbb6-4047-a8c7-ebf5aaea93f4@linux.vnet.ibm.com/ Tested-by: Venkat Rao Bagalkote Cc: Christophe JAILLET Cc: Jens Axboe Cc: Minchan Kim Cc: Sergey Senozhatsky Cc: Venkat Rao Bagalkote Cc: Chris Li Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c3d245617083..ad9c9bc3ccfc 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2115,8 +2115,10 @@ static void zram_destroy_comps(struct zram *zram) zram->num_active_comps--; } - for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { - kfree(zram->comp_algs[prio]); + for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { + /* Do not free statically defined compression algorithms */ + if (zram->comp_algs[prio] != default_compressor) + kfree(zram->comp_algs[prio]); zram->comp_algs[prio] = NULL; } From 2af148ef8549a12f8025286b8825c2833ee6bcb8 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 25 Sep 2024 17:06:00 +0800 Subject: [PATCH 512/655] ocfs2: fix uninit-value in ocfs2_get_block() syzbot reported an uninit-value BUG: BUG: KMSAN: uninit-value in ocfs2_get_block+0xed2/0x2710 fs/ocfs2/aops.c:159 ocfs2_get_block+0xed2/0x2710 fs/ocfs2/aops.c:159 do_mpage_readpage+0xc45/0x2780 fs/mpage.c:225 mpage_readahead+0x43f/0x840 fs/mpage.c:374 ocfs2_readahead+0x269/0x320 fs/ocfs2/aops.c:381 read_pages+0x193/0x1110 mm/readahead.c:160 page_cache_ra_unbounded+0x901/0x9f0 mm/readahead.c:273 do_page_cache_ra mm/readahead.c:303 [inline] force_page_cache_ra+0x3b1/0x4b0 mm/readahead.c:332 force_page_cache_readahead mm/internal.h:347 [inline] generic_fadvise+0x6b0/0xa90 mm/fadvise.c:106 vfs_fadvise mm/fadvise.c:185 [inline] ksys_fadvise64_64 mm/fadvise.c:199 [inline] __do_sys_fadvise64 mm/fadvise.c:214 [inline] __se_sys_fadvise64 mm/fadvise.c:212 [inline] __x64_sys_fadvise64+0x1fb/0x3a0 mm/fadvise.c:212 x64_sys_call+0xe11/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:222 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f This is because when ocfs2_extent_map_get_blocks() fails, p_blkno is uninitialized. So the error log will trigger the above uninit-value access. The error log is out-of-date since get_blocks() was removed long time ago. And the error code will be logged in ocfs2_extent_map_get_blocks() once ocfs2_get_cluster() fails, so fix this by only logging inode and block. Link: https://syzkaller.appspot.com/bug?extid=9709e73bae885b05314b Link: https://lkml.kernel.org/r/20240925090600.3643376-1-joseph.qi@linux.alibaba.com Fixes: ccd979bdbce9 ("[PATCH] OCFS2: The Second Oracle Cluster Filesystem") Signed-off-by: Joseph Qi Reported-by: syzbot+9709e73bae885b05314b@syzkaller.appspotmail.com Tested-by: syzbot+9709e73bae885b05314b@syzkaller.appspotmail.com Cc: Heming Zhao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/aops.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1fea43c33b6b..db72b3e924b3 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -156,9 +156,8 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock, err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count, &ext_flags); if (err) { - mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " - "%llu, NULL)\n", err, inode, (unsigned long long)iblock, - (unsigned long long)p_blkno); + mlog(ML_ERROR, "get_blocks() failed, inode: 0x%p, " + "block: %llu\n", inode, (unsigned long long)iblock); goto bail; } From 3cb576bc6dfb8940228b8130638860b631dd428a Mon Sep 17 00:00:00 2001 From: Frank Min Date: Wed, 25 Sep 2024 11:39:06 +0800 Subject: [PATCH 513/655] drm/amdgpu: fix PTE copy corruption for sdma 7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without setting dcc bit, there is ramdon PTE copy corruption on sdma 7. so add this bit and update the packet format accordingly. Acked-by: Alex Deucher Signed-off-by: Frank Min Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index cfd8e183ad50..a8763496aed3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1080,13 +1080,16 @@ static void sdma_v7_0_vm_copy_pte(struct amdgpu_ib *ib, unsigned bytes = count * 8; ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | + SDMA_PKT_COPY_LINEAR_HEADER_CPV(1); + ib->ptr[ib->length_dw++] = bytes - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src); ib->ptr[ib->length_dw++] = upper_32_bits(src); ib->ptr[ib->length_dw++] = lower_32_bits(pe); ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = 0; } @@ -1744,7 +1747,7 @@ static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev) } static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = { - .copy_pte_num_dw = 7, + .copy_pte_num_dw = 8, .copy_pte = sdma_v7_0_vm_copy_pte, .write_pte = sdma_v7_0_vm_write_pte, .set_pte_pde = sdma_v7_0_vm_set_pte_pde, From a8387ddc0d15a365dd04baaa325a863d3612e020 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 25 Sep 2024 14:17:53 -0400 Subject: [PATCH 514/655] drm/amdgpu: fix vbios fetching for SR-IOV SR-IOV fetches the vbios from VRAM in some cases. Re-enable the VRAM path for dGPUs and rename the function to make it clear that it is not IGP specific. Fixes: 042658d17a54 ("drm/amdgpu: clean up vbios fetching code") Reviewed-by: Yang Wang Tested-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 46bf623919d7..45affc02548c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -87,8 +87,9 @@ static bool check_atom_bios(uint8_t *bios, size_t size) * part of the system bios. On boot, the system bios puts a * copy of the igp rom at the start of vram if a discrete card is * present. + * For SR-IOV, the vbios image is also put in VRAM in the VF. */ -static bool igp_read_bios_from_vram(struct amdgpu_device *adev) +static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev) { uint8_t __iomem *bios; resource_size_t vram_base; @@ -414,7 +415,7 @@ static bool amdgpu_get_bios_apu(struct amdgpu_device *adev) goto success; } - if (igp_read_bios_from_vram(adev)) { + if (amdgpu_read_bios_from_vram(adev)) { dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n"); goto success; } @@ -448,6 +449,12 @@ static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev) goto success; } + /* this is required for SR-IOV */ + if (amdgpu_read_bios_from_vram(adev)) { + dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n"); + goto success; + } + if (amdgpu_read_platform_bios(adev)) { dev_info(adev->dev, "Fetched VBIOS from platform\n"); goto success; From 34ad56a467c320d07db22146cfb99ee01704a5de Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 6 Sep 2024 13:51:06 -0400 Subject: [PATCH 515/655] drm/amdgpu: bump driver version for cleared VRAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver now clears VRAM on allocation. Bump the driver version so mesa knows when it will get cleared vram by default. Reviewed-by: Marek Olšák Reviewed-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 86cff30d5c4e..db0763ffeff7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -117,9 +117,10 @@ * - 3.56.0 - Update IB start address and size alignment for decode and encode * - 3.57.0 - Compute tunneling on GFX10+ * - 3.58.0 - Add GFX12 DCC support + * - 3.59.0 - Cleared VRAM */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 58 +#define KMS_DRIVER_MINOR 59 #define KMS_DRIVER_PATCHLEVEL 0 /* From d7d2688bf4ea58734d73e18edcbf4684b1496d30 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 20 Sep 2024 19:05:37 +0800 Subject: [PATCH 516/655] drm/amd/pm: update workload mask after the setting update workload mask after the setting. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3625 Signed-off-by: Kenneth Feng Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 6 +++++- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 3 +++ drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 6 +++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a887ab945dfa..1d024b122b0c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2569,10 +2569,14 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, } } - return smu_cmn_send_smc_msg_with_param(smu, + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, workload_mask, NULL); + if (!ret) + smu->workload_mask = workload_mask; + + return ret; } static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 7bc95c404377..b891a5e0a396 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2501,8 +2501,11 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp return -EINVAL; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, 1 << workload_type, NULL); + if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); + else + smu->workload_mask = (1 << workload_type); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 43820d7d2c54..5899d01fa73d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1861,10 +1861,14 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - return smu_cmn_send_smc_msg_with_param(smu, + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, 1 << workload_type, NULL); + if (!ret) + smu->workload_mask = 1 << workload_type; + + return ret; } static int smu_v14_0_2_baco_enter(struct smu_context *smu) From df9b455633aee0bad3e5c3dc9fc1c860b13c96d2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 26 Sep 2024 14:58:30 +0100 Subject: [PATCH 517/655] netfs: Fix write oops in generic/346 (9p) and generic/074 (cifs) In netfslib, a buffered writeback operation has a 'write queue' of folios that are being written, held in a linear sequence of folio_queue structs. The 'issuer' adds new folio_queues on the leading edge of the queue and populates each one progressively; the 'collector' pops them off the trailing edge and discards them and the folios they point to as they are consumed. The queue is required to always retain at least one folio_queue structure. This allows the queue to be accessed without locking and with just a bit of barriering. When a new subrequest is prepared, its ->io_iter iterator is pointed at the current end of the write queue and then the iterator is extended as more data is added to the queue until the subrequest is committed. Now, the problem is that the folio_queue at the leading edge of the write queue when a subrequest is prepared might have been entirely consumed - but not yet removed from the queue as it is the only remaining one and is preventing the queue from collapsing. So, what happens is that subreq->io_iter is pointed at the spent folio_queue, then a new folio_queue is added, and, at that point, the collector is at entirely at liberty to immediately delete the spent folio_queue. This leaves the subreq->io_iter pointing at a freed object. If the system is lucky, iterate_folioq() sees ->io_iter, sees the as-yet uncorrupted freed object and advances to the next folio_queue in the queue. In the case seen, however, the freed object gets recycled and put back onto the queue at the tail and filled to the end. This confuses iterate_folioq() and it tries to step ->next, which may be NULL - resulting in an oops. Fix this by the following means: (1) When preparing a write subrequest, make sure there's a folio_queue struct with space in it at the leading edge of the queue. A function to make space is split out of the function to append a folio so that it can be called for this purpose. (2) If the request struct iterator is pointing to a completely spent folio_queue when we make space, then advance the iterator to the newly allocated folio_queue. The subrequest's iterator will then be set from this. The oops could be triggered using the generic/346 xfstest with a filesystem on9P over TCP with cache=loose. The oops looked something like: BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page ... RIP: 0010:_copy_from_iter+0x2db/0x530 ... Call Trace: ... p9pdu_vwritef+0x3d8/0x5d0 p9_client_prepare_req+0xa8/0x140 p9_client_rpc+0x81/0x280 p9_client_write+0xcf/0x1c0 v9fs_issue_write+0x87/0xc0 netfs_advance_write+0xa0/0xb0 netfs_write_folio.isra.0+0x42d/0x500 netfs_writepages+0x15a/0x1f0 do_writepages+0xd1/0x220 filemap_fdatawrite_wbc+0x5c/0x80 v9fs_mmap_vm_close+0x7d/0xb0 remove_vma+0x35/0x70 vms_complete_munmap_vmas+0x11a/0x170 do_vmi_align_munmap+0x17d/0x1c0 do_vmi_munmap+0x13e/0x150 __vm_munmap+0x92/0xd0 __x64_sys_munmap+0x17/0x20 do_syscall_64+0x80/0xe0 entry_SYSCALL_64_after_hwframe+0x71/0x79 This also fixed a similar-looking issue with cifs and generic/074. Fixes: cd0277ed0c18 ("netfs: Use new folio_queue data type and iterator instead of xarray iter") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202409180928.f20b5a08-oliver.sang@intel.com Closes: https://lore.kernel.org/oe-lkp/202409131438.3f225fbf-oliver.sang@intel.com Signed-off-by: David Howells Tested-by: kernel test robot cc: Eric Van Hensbergen cc: Latchesar Ionkov cc: Dominique Martinet cc: Christian Schoenebeck cc: Paulo Alcantara cc: Jeff Layton cc: v9fs@lists.linux.dev cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/netfs/internal.h | 1 + fs/netfs/misc.c | 74 ++++++++++++++++++++++++++++++------------ fs/netfs/write_issue.c | 12 ++++++- 3 files changed, 65 insertions(+), 22 deletions(-) diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index c9f0ed24cb7b..c562aec3b483 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -58,6 +58,7 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {} /* * misc.c */ +struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq); int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio, bool needs_put); struct folio_queue *netfs_delete_buffer_head(struct netfs_io_request *wreq); diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 0ad0982ce0e2..63280791de3b 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -8,35 +8,67 @@ #include #include "internal.h" +/* + * Make sure there's space in the rolling queue. + */ +struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq) +{ + struct folio_queue *tail = rreq->buffer_tail, *prev; + unsigned int prev_nr_slots = 0; + + if (WARN_ON_ONCE(!rreq->buffer && tail) || + WARN_ON_ONCE(rreq->buffer && !tail)) + return ERR_PTR(-EIO); + + prev = tail; + if (prev) { + if (!folioq_full(tail)) + return tail; + prev_nr_slots = folioq_nr_slots(tail); + } + + tail = kmalloc(sizeof(*tail), GFP_NOFS); + if (!tail) + return ERR_PTR(-ENOMEM); + netfs_stat(&netfs_n_folioq); + folioq_init(tail); + tail->prev = prev; + if (prev) + /* [!] NOTE: After we set prev->next, the consumer is entirely + * at liberty to delete prev. + */ + WRITE_ONCE(prev->next, tail); + + rreq->buffer_tail = tail; + if (!rreq->buffer) { + rreq->buffer = tail; + iov_iter_folio_queue(&rreq->io_iter, ITER_SOURCE, tail, 0, 0, 0); + } else { + /* Make sure we don't leave the master iterator pointing to a + * block that might get immediately consumed. + */ + if (rreq->io_iter.folioq == prev && + rreq->io_iter.folioq_slot == prev_nr_slots) { + rreq->io_iter.folioq = tail; + rreq->io_iter.folioq_slot = 0; + } + } + rreq->buffer_tail_slot = 0; + return tail; +} + /* * Append a folio to the rolling queue. */ int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio, bool needs_put) { - struct folio_queue *tail = rreq->buffer_tail; + struct folio_queue *tail; unsigned int slot, order = folio_order(folio); - if (WARN_ON_ONCE(!rreq->buffer && tail) || - WARN_ON_ONCE(rreq->buffer && !tail)) - return -EIO; - - if (!tail || folioq_full(tail)) { - tail = kmalloc(sizeof(*tail), GFP_NOFS); - if (!tail) - return -ENOMEM; - netfs_stat(&netfs_n_folioq); - folioq_init(tail); - tail->prev = rreq->buffer_tail; - if (tail->prev) - tail->prev->next = tail; - rreq->buffer_tail = tail; - if (!rreq->buffer) { - rreq->buffer = tail; - iov_iter_folio_queue(&rreq->io_iter, ITER_SOURCE, tail, 0, 0, 0); - } - rreq->buffer_tail_slot = 0; - } + tail = netfs_buffer_make_space(rreq); + if (IS_ERR(tail)) + return PTR_ERR(tail); rreq->io_iter.count += PAGE_SIZE << order; diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 04e66d587f77..0929d9fd4ce7 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -153,12 +153,22 @@ static void netfs_prepare_write(struct netfs_io_request *wreq, loff_t start) { struct netfs_io_subrequest *subreq; + struct iov_iter *wreq_iter = &wreq->io_iter; + + /* Make sure we don't point the iterator at a used-up folio_queue + * struct being used as a placeholder to prevent the queue from + * collapsing. In such a case, extend the queue. + */ + if (iov_iter_is_folioq(wreq_iter) && + wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq)) { + netfs_buffer_make_space(wreq); + } subreq = netfs_alloc_subrequest(wreq); subreq->source = stream->source; subreq->start = start; subreq->stream_nr = stream->stream_nr; - subreq->io_iter = wreq->io_iter; + subreq->io_iter = *wreq_iter; _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index); From c9c809f4137c3c0f962226c10d245d3ce2fd5b7c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 Sep 2024 12:56:46 -1000 Subject: [PATCH 518/655] scx_flatcg: Use a user DSQ for fallback instead of SCX_DSQ_GLOBAL scx_flatcg was using SCX_DSQ_GLOBAL for fallback handling. However, it is assuming that SCX_DSQ_GLOBAL isn't automatically consumed, which was true a while ago but is no longer the case. Also, there are further changes planned for SCX_DSQ_GLOBAL which will disallow explicit consumption from it. Switch to a user DSQ for fallback. Signed-off-by: Tejun Heo Acked-by: David Vernet --- tools/sched_ext/scx_flatcg.bpf.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index e272bc39bbbd..b722baf6da4b 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -49,7 +49,10 @@ /* * Maximum amount of retries to find a valid cgroup. */ -#define CGROUP_MAX_RETRIES 1024 +enum { + FALLBACK_DSQ = 0, + CGROUP_MAX_RETRIES = 1024, +}; char _license[] SEC("license") = "GPL"; @@ -377,7 +380,7 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags) scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, enq_flags); } else { stat_inc(FCG_STAT_GLOBAL); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dispatch(p, FALLBACK_DSQ, SCX_SLICE_DFL, enq_flags); } return; } @@ -780,7 +783,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev) pick_next_cgroup: cpuc->cur_at = now; - if (scx_bpf_consume(SCX_DSQ_GLOBAL)) { + if (scx_bpf_consume(FALLBACK_DSQ)) { cpuc->cur_cgid = 0; return; } @@ -837,7 +840,7 @@ int BPF_STRUCT_OPS_SLEEPABLE(fcg_cgroup_init, struct cgroup *cgrp, int ret; /* - * Technically incorrect as cgroup ID is full 64bit while dq ID is + * Technically incorrect as cgroup ID is full 64bit while dsq ID is * 63bit. Should not be a problem in practice and easy to spot in the * unlikely case that it breaks. */ @@ -925,6 +928,11 @@ void BPF_STRUCT_OPS(fcg_cgroup_move, struct task_struct *p, p->scx.dsq_vtime = to_cgc->tvtime_now + vtime_delta; } +s32 BPF_STRUCT_OPS_SLEEPABLE(fcg_init) +{ + return scx_bpf_create_dsq(FALLBACK_DSQ, -1); +} + void BPF_STRUCT_OPS(fcg_exit, struct scx_exit_info *ei) { UEI_RECORD(uei, ei); @@ -943,6 +951,7 @@ SCX_OPS_DEFINE(flatcg_ops, .cgroup_init = (void *)fcg_cgroup_init, .cgroup_exit = (void *)fcg_cgroup_exit, .cgroup_move = (void *)fcg_cgroup_move, + .init = (void *)fcg_init, .exit = (void *)fcg_exit, .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING, .name = "flatcg"); From 63fb3ec80516b256e9fc91de48567f5eda61d135 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 Sep 2024 12:56:46 -1000 Subject: [PATCH 519/655] sched_ext: Allow only user DSQs for scx_bpf_consume(), scx_bpf_dsq_nr_queued() and bpf_iter_scx_dsq_new() SCX_DSQ_GLOBAL is special in that it can't be used as a priority queue and is consumed implicitly, but all BPF DSQ related kfuncs could be used on it. SCX_DSQ_GLOBAL will be split per-node for scalability and those operations won't make sense anymore. Disallow SCX_DSQ_GLOBAL on scx_bpf_consume(), scx_bpf_dsq_nr_queued() and bpf_iter_scx_dsq_new(). This means that SCX_DSQ_GLOBAL can only be used as a dispatch target from BPF schedulers. With scx_flatcg, which was using SCX_DSQ_GLOBAL as the fallback DSQ, updated, this shouldn't affect any schedulers. This leaves find_dsq_for_dispatch() the only user of find_non_local_dsq(). Open code and remove find_non_local_dsq(). Signed-off-by: tejun heo Acked-by: David Vernet --- kernel/sched/ext.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index d74d1fe06999..ed2b914c42d1 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1808,16 +1808,6 @@ static struct scx_dispatch_q *find_user_dsq(u64 dsq_id) return rhashtable_lookup_fast(&dsq_hash, &dsq_id, dsq_hash_params); } -static struct scx_dispatch_q *find_non_local_dsq(u64 dsq_id) -{ - lockdep_assert(rcu_read_lock_any_held()); - - if (dsq_id == SCX_DSQ_GLOBAL) - return &scx_dsq_global; - else - return find_user_dsq(dsq_id); -} - static struct scx_dispatch_q *find_dsq_for_dispatch(struct rq *rq, u64 dsq_id, struct task_struct *p) { @@ -1835,7 +1825,11 @@ static struct scx_dispatch_q *find_dsq_for_dispatch(struct rq *rq, u64 dsq_id, return &cpu_rq(cpu)->scx.local_dsq; } - dsq = find_non_local_dsq(dsq_id); + if (dsq_id == SCX_DSQ_GLOBAL) + dsq = &scx_dsq_global; + else + dsq = find_user_dsq(dsq_id); + if (unlikely(!dsq)) { scx_ops_error("non-existent DSQ 0x%llx for %s[%d]", dsq_id, p->comm, p->pid); @@ -6176,7 +6170,7 @@ __bpf_kfunc bool scx_bpf_consume(u64 dsq_id) flush_dispatch_buf(dspc->rq); - dsq = find_non_local_dsq(dsq_id); + dsq = find_user_dsq(dsq_id); if (unlikely(!dsq)) { scx_ops_error("invalid DSQ ID 0x%016llx", dsq_id); return false; @@ -6497,7 +6491,7 @@ __bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id) goto out; } } else { - dsq = find_non_local_dsq(dsq_id); + dsq = find_user_dsq(dsq_id); if (dsq) { ret = READ_ONCE(dsq->nr); goto out; @@ -6546,7 +6540,7 @@ __bpf_kfunc int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, if (flags & ~__SCX_DSQ_ITER_USER_FLAGS) return -EINVAL; - kit->dsq = find_non_local_dsq(dsq_id); + kit->dsq = find_user_dsq(dsq_id); if (!kit->dsq) return -ENOENT; From bba26bf356d1c1314a7bb24041c64c5784febbb0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 Sep 2024 12:56:46 -1000 Subject: [PATCH 520/655] sched_ext: Relocate find_user_dsq() To prepare for the addition of find_global_dsq(). No functional changes. Signed-off-by: tejun heo Acked-by: David Vernet --- kernel/sched/ext.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index ed2b914c42d1..acb4db7827a4 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1029,6 +1029,11 @@ static bool u32_before(u32 a, u32 b) return (s32)(a - b) < 0; } +static struct scx_dispatch_q *find_user_dsq(u64 dsq_id) +{ + return rhashtable_lookup_fast(&dsq_hash, &dsq_id, dsq_hash_params); +} + /* * scx_kf_mask enforcement. Some kfuncs can only be called from specific SCX * ops. When invoking SCX ops, SCX_CALL_OP[_RET]() should be used to indicate @@ -1803,11 +1808,6 @@ static void dispatch_dequeue(struct rq *rq, struct task_struct *p) raw_spin_unlock(&dsq->lock); } -static struct scx_dispatch_q *find_user_dsq(u64 dsq_id) -{ - return rhashtable_lookup_fast(&dsq_hash, &dsq_id, dsq_hash_params); -} - static struct scx_dispatch_q *find_dsq_for_dispatch(struct rq *rq, u64 dsq_id, struct task_struct *p) { From b7b3b2dbae73b412c2d24b3d0ebf1110991e4510 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 Sep 2024 12:56:46 -1000 Subject: [PATCH 521/655] sched_ext: Split the global DSQ per NUMA node In the bypass mode, the global DSQ is used to schedule all tasks in simple FIFO order. All tasks are queued into the global DSQ and all CPUs try to execute tasks from it. This creates a lot of cross-node cacheline accesses and scheduling across the node boundaries, and can lead to live-lock conditions where the system takes tens of minutes to disable the BPF scheduler while executing in the bypass mode. Split the global DSQ per NUMA node. Each node has its own global DSQ. When a task is dispatched to SCX_DSQ_GLOBAL, it's put into the global DSQ local to the task's CPU and all CPUs in a node only consume its node-local global DSQ. This resolves a livelock condition which could be reliably triggered on an 2x EPYC 7642 system by running `stress-ng --race-sched 1024` together with `stress-ng --workload 80 --workload-threads 10` while repeatedly enabling and disabling a SCX scheduler. Signed-off-by: Tejun Heo Acked-by: David Vernet --- kernel/sched/ext.c | 73 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 13 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index acb4db7827a4..949a3c43000a 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -925,8 +925,15 @@ static unsigned long __percpu *scx_kick_cpus_pnt_seqs; */ static DEFINE_PER_CPU(struct task_struct *, direct_dispatch_task); -/* dispatch queues */ -static struct scx_dispatch_q __cacheline_aligned_in_smp scx_dsq_global; +/* + * Dispatch queues. + * + * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability. This is + * to avoid live-locking in bypass mode where all tasks are dispatched to + * %SCX_DSQ_GLOBAL and all CPUs consume from it. If per-node split isn't + * sufficient, it can be further split. + */ +static struct scx_dispatch_q **global_dsqs; static const struct rhashtable_params dsq_hash_params = { .key_len = 8, @@ -1029,6 +1036,11 @@ static bool u32_before(u32 a, u32 b) return (s32)(a - b) < 0; } +static struct scx_dispatch_q *find_global_dsq(struct task_struct *p) +{ + return global_dsqs[cpu_to_node(task_cpu(p))]; +} + static struct scx_dispatch_q *find_user_dsq(u64 dsq_id) { return rhashtable_lookup_fast(&dsq_hash, &dsq_id, dsq_hash_params); @@ -1642,7 +1654,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, scx_ops_error("attempting to dispatch to a destroyed dsq"); /* fall back to the global dsq */ raw_spin_unlock(&dsq->lock); - dsq = &scx_dsq_global; + dsq = find_global_dsq(p); raw_spin_lock(&dsq->lock); } } @@ -1820,20 +1832,20 @@ static struct scx_dispatch_q *find_dsq_for_dispatch(struct rq *rq, u64 dsq_id, s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK; if (!ops_cpu_valid(cpu, "in SCX_DSQ_LOCAL_ON dispatch verdict")) - return &scx_dsq_global; + return find_global_dsq(p); return &cpu_rq(cpu)->scx.local_dsq; } if (dsq_id == SCX_DSQ_GLOBAL) - dsq = &scx_dsq_global; + dsq = find_global_dsq(p); else dsq = find_user_dsq(dsq_id); if (unlikely(!dsq)) { scx_ops_error("non-existent DSQ 0x%llx for %s[%d]", dsq_id, p->comm, p->pid); - return &scx_dsq_global; + return find_global_dsq(p); } return dsq; @@ -2005,7 +2017,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, global: touch_core_sched(rq, p); /* see the comment in local: */ p->scx.slice = SCX_SLICE_DFL; - dispatch_enqueue(&scx_dsq_global, p, enq_flags); + dispatch_enqueue(find_global_dsq(p), p, enq_flags); } static bool task_runnable(const struct task_struct *p) @@ -2391,6 +2403,13 @@ static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq) return false; } +static bool consume_global_dsq(struct rq *rq) +{ + int node = cpu_to_node(cpu_of(rq)); + + return consume_dispatch_q(rq, global_dsqs[node]); +} + /** * dispatch_to_local_dsq - Dispatch a task to a local dsq * @rq: current rq which is locked @@ -2424,7 +2443,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq, #ifdef CONFIG_SMP if (unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) { - dispatch_enqueue(&scx_dsq_global, p, enq_flags | SCX_ENQ_CLEAR_OPSS); + dispatch_enqueue(find_global_dsq(p), p, + enq_flags | SCX_ENQ_CLEAR_OPSS); return; } @@ -2624,7 +2644,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev) if (rq->scx.local_dsq.nr) goto has_tasks; - if (consume_dispatch_q(rq, &scx_dsq_global)) + if (consume_global_dsq(rq)) goto has_tasks; if (!SCX_HAS_OP(dispatch) || scx_rq_bypassing(rq) || !scx_rq_online(rq)) @@ -2649,7 +2669,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev) if (rq->scx.local_dsq.nr) goto has_tasks; - if (consume_dispatch_q(rq, &scx_dsq_global)) + if (consume_global_dsq(rq)) goto has_tasks; /* @@ -4924,7 +4944,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) struct scx_task_iter sti; struct task_struct *p; unsigned long timeout; - int i, cpu, ret; + int i, cpu, node, ret; if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN), cpu_possible_mask)) { @@ -4943,6 +4963,34 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) } } + if (!global_dsqs) { + struct scx_dispatch_q **dsqs; + + dsqs = kcalloc(nr_node_ids, sizeof(dsqs[0]), GFP_KERNEL); + if (!dsqs) { + ret = -ENOMEM; + goto err_unlock; + } + + for_each_node_state(node, N_POSSIBLE) { + struct scx_dispatch_q *dsq; + + dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node); + if (!dsq) { + for_each_node_state(node, N_POSSIBLE) + kfree(dsqs[node]); + kfree(dsqs); + ret = -ENOMEM; + goto err_unlock; + } + + init_dsq(dsq, SCX_DSQ_GLOBAL); + dsqs[node] = dsq; + } + + global_dsqs = dsqs; + } + if (scx_ops_enable_state() != SCX_OPS_DISABLED) { ret = -EBUSY; goto err_unlock; @@ -5777,7 +5825,6 @@ void __init init_sched_ext_class(void) SCX_TG_ONLINE); BUG_ON(rhashtable_init(&dsq_hash, &dsq_hash_params)); - init_dsq(&scx_dsq_global, SCX_DSQ_GLOBAL); #ifdef CONFIG_SMP BUG_ON(!alloc_cpumask_var(&idle_masks.cpu, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&idle_masks.smt, GFP_KERNEL)); @@ -6053,7 +6100,7 @@ static bool scx_dispatch_from_dsq(struct bpf_iter_scx_dsq_kern *kit, if (dst_dsq->id == SCX_DSQ_LOCAL) { dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq); if (!task_can_run_on_remote_rq(p, dst_rq, true)) { - dst_dsq = &scx_dsq_global; + dst_dsq = find_global_dsq(p); dst_rq = src_rq; } } else { From 6f34d8d382d64e7d8e77f5a9ddfd06f4c04937b0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 Sep 2024 12:56:46 -1000 Subject: [PATCH 522/655] sched_ext: Use shorter slice while bypassing While bypassing, tasks are scheduled in FIFO order which favors tasks that hog CPUs. This can slow down e.g. unloading of the BPF scheduler. While bypassing, guaranteeing timely forward progress is the main goal. There's no point in giving long slices. Shorten the time slice used while bypassing from 20ms to 5ms. Signed-off-by: Tejun Heo Acked-by: David Vernet --- kernel/sched/ext.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 949a3c43000a..d6f6bf6caecc 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -9,6 +9,7 @@ #define SCX_OP_IDX(op) (offsetof(struct sched_ext_ops, op) / sizeof(void (*)(void))) enum scx_consts { + SCX_SLICE_BYPASS = SCX_SLICE_DFL / 4, SCX_DSP_DFL_MAX_BATCH = 32, SCX_DSP_MAX_LOOPS = 32, SCX_WATCHDOG_MAX_TIMEOUT = 30 * HZ, @@ -1944,6 +1945,7 @@ static bool scx_rq_online(struct rq *rq) static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, int sticky_cpu) { + bool bypassing = scx_rq_bypassing(rq); struct task_struct **ddsp_taskp; unsigned long qseq; @@ -1961,7 +1963,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, if (!scx_rq_online(rq)) goto local; - if (scx_rq_bypassing(rq)) + if (bypassing) goto global; if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID) @@ -2016,7 +2018,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, global: touch_core_sched(rq, p); /* see the comment in local: */ - p->scx.slice = SCX_SLICE_DFL; + p->scx.slice = bypassing ? SCX_SLICE_BYPASS : SCX_SLICE_DFL; dispatch_enqueue(find_global_dsq(p), p, enq_flags); } From b0abcd65ec545701b8793e12bc27dc98042b151a Mon Sep 17 00:00:00 2001 From: Enzo Matsumiya Date: Thu, 26 Sep 2024 14:46:13 -0300 Subject: [PATCH 523/655] smb: client: fix UAF in async decryption Doing an async decryption (large read) crashes with a slab-use-after-free way down in the crypto API. Reproducer: # mount.cifs -o ...,seal,esize=1 //srv/share /mnt # dd if=/mnt/largefile of=/dev/null ... [ 194.196391] ================================================================== [ 194.196844] BUG: KASAN: slab-use-after-free in gf128mul_4k_lle+0xc1/0x110 [ 194.197269] Read of size 8 at addr ffff888112bd0448 by task kworker/u77:2/899 [ 194.197707] [ 194.197818] CPU: 12 UID: 0 PID: 899 Comm: kworker/u77:2 Not tainted 6.11.0-lku-00028-gfca3ca14a17a-dirty #43 [ 194.198400] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-prebuilt.qemu.org 04/01/2014 [ 194.199046] Workqueue: smb3decryptd smb2_decrypt_offload [cifs] [ 194.200032] Call Trace: [ 194.200191] [ 194.200327] dump_stack_lvl+0x4e/0x70 [ 194.200558] ? gf128mul_4k_lle+0xc1/0x110 [ 194.200809] print_report+0x174/0x505 [ 194.201040] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 194.201352] ? srso_return_thunk+0x5/0x5f [ 194.201604] ? __virt_addr_valid+0xdf/0x1c0 [ 194.201868] ? gf128mul_4k_lle+0xc1/0x110 [ 194.202128] kasan_report+0xc8/0x150 [ 194.202361] ? gf128mul_4k_lle+0xc1/0x110 [ 194.202616] gf128mul_4k_lle+0xc1/0x110 [ 194.202863] ghash_update+0x184/0x210 [ 194.203103] shash_ahash_update+0x184/0x2a0 [ 194.203377] ? __pfx_shash_ahash_update+0x10/0x10 [ 194.203651] ? srso_return_thunk+0x5/0x5f [ 194.203877] ? crypto_gcm_init_common+0x1ba/0x340 [ 194.204142] gcm_hash_assoc_remain_continue+0x10a/0x140 [ 194.204434] crypt_message+0xec1/0x10a0 [cifs] [ 194.206489] ? __pfx_crypt_message+0x10/0x10 [cifs] [ 194.208507] ? srso_return_thunk+0x5/0x5f [ 194.209205] ? srso_return_thunk+0x5/0x5f [ 194.209925] ? srso_return_thunk+0x5/0x5f [ 194.210443] ? srso_return_thunk+0x5/0x5f [ 194.211037] decrypt_raw_data+0x15f/0x250 [cifs] [ 194.212906] ? __pfx_decrypt_raw_data+0x10/0x10 [cifs] [ 194.214670] ? srso_return_thunk+0x5/0x5f [ 194.215193] smb2_decrypt_offload+0x12a/0x6c0 [cifs] This is because TFM is being used in parallel. Fix this by allocating a new AEAD TFM for async decryption, but keep the existing one for synchronous READ cases (similar to what is done in smb3_calc_signature()). Also remove the calls to aead_request_set_callback() and crypto_wait_req() since it's always going to be a synchronous operation. Signed-off-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 47 ++++++++++++++++++++++++----------------- fs/smb/client/smb2pdu.c | 6 ++++++ 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 1ee2dd4a1cae..177173072bfa 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4309,7 +4309,7 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key) */ static int crypt_message(struct TCP_Server_Info *server, int num_rqst, - struct smb_rqst *rqst, int enc) + struct smb_rqst *rqst, int enc, struct crypto_aead *tfm) { struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)rqst[0].rq_iov[0].iov_base; @@ -4320,8 +4320,6 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, u8 key[SMB3_ENC_DEC_KEY_SIZE]; struct aead_request *req; u8 *iv; - DECLARE_CRYPTO_WAIT(wait); - struct crypto_aead *tfm; unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize); void *creq; size_t sensitive_size; @@ -4333,14 +4331,6 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, return rc; } - rc = smb3_crypto_aead_allocate(server); - if (rc) { - cifs_server_dbg(VFS, "%s: crypto alloc failed\n", __func__); - return rc; - } - - tfm = enc ? server->secmech.enc : server->secmech.dec; - if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) rc = crypto_aead_setkey(tfm, key, SMB3_GCM256_CRYPTKEY_SIZE); @@ -4380,11 +4370,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, aead_request_set_crypt(req, sg, sg, crypt_len, iv); aead_request_set_ad(req, assoc_data_len); - aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_req_done, &wait); - - rc = crypto_wait_req(enc ? crypto_aead_encrypt(req) - : crypto_aead_decrypt(req), &wait); + rc = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req); if (!rc && enc) memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE); @@ -4526,7 +4512,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, /* fill the 1st iov with a transform header */ fill_transform_hdr(tr_hdr, orig_len, old_rq, server->cipher_type); - rc = crypt_message(server, num_rqst, new_rq, 1); + rc = crypt_message(server, num_rqst, new_rq, 1, server->secmech.enc); cifs_dbg(FYI, "Encrypt message returned %d\n", rc); if (rc) goto err_free; @@ -4551,8 +4537,9 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, unsigned int buf_data_size, struct iov_iter *iter, bool is_offloaded) { - struct kvec iov[2]; + struct crypto_aead *tfm; struct smb_rqst rqst = {NULL}; + struct kvec iov[2]; size_t iter_size = 0; int rc; @@ -4568,9 +4555,31 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, iter_size = iov_iter_count(iter); } - rc = crypt_message(server, 1, &rqst, 0); + if (is_offloaded) { + if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) || + (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) + tfm = crypto_alloc_aead("gcm(aes)", 0, 0); + else + tfm = crypto_alloc_aead("ccm(aes)", 0, 0); + if (IS_ERR(tfm)) { + rc = PTR_ERR(tfm); + cifs_server_dbg(VFS, "%s: Failed alloc decrypt TFM, rc=%d\n", __func__, rc); + + return rc; + } + } else { + if (unlikely(!server->secmech.dec)) + return -EIO; + + tfm = server->secmech.dec; + } + + rc = crypt_message(server, 1, &rqst, 0, tfm); cifs_dbg(FYI, "Decrypt message returned %d\n", rc); + if (is_offloaded) + crypto_free_aead(tfm); + if (rc) return rc; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index bb225758448a..cb423bfe7377 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -1266,6 +1266,12 @@ SMB2_negotiate(const unsigned int xid, else cifs_server_dbg(VFS, "Missing expected negotiate contexts\n"); } + + if (server->cipher_type && !rc) { + rc = smb3_crypto_aead_allocate(server); + if (rc) + cifs_server_dbg(VFS, "%s: crypto alloc failed, rc=%d\n", __func__, rc); + } neg_exit: free_rsp_buf(resp_buftype, rsp); return rc; From f7025d861694362348efc14eaad6a17840c4e9a4 Mon Sep 17 00:00:00 2001 From: Enzo Matsumiya Date: Thu, 26 Sep 2024 14:46:14 -0300 Subject: [PATCH 524/655] smb: client: allocate crypto only for primary server For extra channels, point ->secmech.{enc,dec} to the primary server ones. Signed-off-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/smb/client/cifsencrypt.c | 17 +++++++++++------ fs/smb/client/smb2pdu.c | 10 +++++++--- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 7481b21a0489..15aa75e7f1c3 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -735,13 +735,18 @@ cifs_crypto_secmech_release(struct TCP_Server_Info *server) cifs_free_hash(&server->secmech.sha512); cifs_free_hash(&server->secmech.hmacmd5); - if (server->secmech.enc) { - crypto_free_aead(server->secmech.enc); - server->secmech.enc = NULL; - } + if (!SERVER_IS_CHAN(server)) { + if (server->secmech.enc) { + crypto_free_aead(server->secmech.enc); + server->secmech.enc = NULL; + } - if (server->secmech.dec) { - crypto_free_aead(server->secmech.dec); + if (server->secmech.dec) { + crypto_free_aead(server->secmech.dec); + server->secmech.dec = NULL; + } + } else { + server->secmech.enc = NULL; server->secmech.dec = NULL; } } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index cb423bfe7377..02828b9c3cb3 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -1268,9 +1268,13 @@ SMB2_negotiate(const unsigned int xid, } if (server->cipher_type && !rc) { - rc = smb3_crypto_aead_allocate(server); - if (rc) - cifs_server_dbg(VFS, "%s: crypto alloc failed, rc=%d\n", __func__, rc); + if (!SERVER_IS_CHAN(server)) { + rc = smb3_crypto_aead_allocate(server); + } else { + /* For channels, just reuse the primary server crypto secmech. */ + server->secmech.enc = server->primary_server->secmech.enc; + server->secmech.dec = server->primary_server->secmech.dec; + } } neg_exit: free_rsp_buf(resp_buftype, rsp); From a13ca780afab350f37f8be9eda2bf79d1aed9bdd Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 18 Sep 2024 02:04:01 -0300 Subject: [PATCH 525/655] smb: client: stop flooding dmesg in smb2_calc_signature() When having several mounts that share same credential and the client couldn't re-establish an SMB session due to an expired kerberos ticket or rotated password, smb2_calc_signature() will end up flooding dmesg when not finding SMB sessions to calculate signatures. Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/smb2transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index e4636fca821d..c8bf0000f73b 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -242,7 +242,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, ses = smb2_find_smb_ses(server, le64_to_cpu(shdr->SessionId)); if (unlikely(!ses)) { - cifs_server_dbg(VFS, "%s: Could not find session\n", __func__); + cifs_server_dbg(FYI, "%s: Could not find session\n", __func__); return -ENOENT; } From db44ca9f7bc00a368d345b9fa1ecee0c4e75ac48 Mon Sep 17 00:00:00 2001 From: Enzo Matsumiya Date: Thu, 26 Sep 2024 14:46:15 -0300 Subject: [PATCH 526/655] smb: client: make HMAC-MD5 TFM ephemeral The HMAC-MD5 shash TFM is used only briefly during Session Setup stage, when computing NTLMv2 hashes. There's no need to keep it allocated in servers' secmech the whole time, so keep its lifetime inside setup_ntlmv2_rsp(). Signed-off-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/smb/client/cifsencrypt.c | 133 ++++++++++++++---------------------- fs/smb/client/cifsglob.h | 1 - 2 files changed, 50 insertions(+), 84 deletions(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 15aa75e7f1c3..464e6ccdfa5f 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -416,7 +416,7 @@ find_timestamp(struct cifs_ses *ses) } static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, - const struct nls_table *nls_cp) + const struct nls_table *nls_cp, struct shash_desc *hmacmd5) { int rc = 0; int len; @@ -425,34 +425,26 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, wchar_t *domain; wchar_t *server; - if (!ses->server->secmech.hmacmd5) { - cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__); - return -1; - } - /* calculate md4 hash of password */ E_md4hash(ses->password, nt_hash, nls_cp); - rc = crypto_shash_setkey(ses->server->secmech.hmacmd5->tfm, nt_hash, - CIFS_NTHASH_SIZE); + rc = crypto_shash_setkey(hmacmd5->tfm, nt_hash, CIFS_NTHASH_SIZE); if (rc) { - cifs_dbg(VFS, "%s: Could not set NT Hash as a key\n", __func__); + cifs_dbg(VFS, "%s: Could not set NT hash as a key, rc=%d\n", __func__, rc); return rc; } - rc = crypto_shash_init(ses->server->secmech.hmacmd5); + rc = crypto_shash_init(hmacmd5); if (rc) { - cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__); + cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc); return rc; } /* convert ses->user_name to unicode */ len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); - if (user == NULL) { - rc = -ENOMEM; - return rc; - } + if (user == NULL) + return -ENOMEM; if (len) { len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); @@ -461,11 +453,10 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, *(u16 *)user = 0; } - rc = crypto_shash_update(ses->server->secmech.hmacmd5, - (char *)user, 2 * len); + rc = crypto_shash_update(hmacmd5, (char *)user, 2 * len); kfree(user); if (rc) { - cifs_dbg(VFS, "%s: Could not update with user\n", __func__); + cifs_dbg(VFS, "%s: Could not update with user, rc=%d\n", __func__, rc); return rc; } @@ -474,19 +465,15 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = strlen(ses->domainName); domain = kmalloc(2 + (len * 2), GFP_KERNEL); - if (domain == NULL) { - rc = -ENOMEM; - return rc; - } + if (domain == NULL) + return -ENOMEM; + len = cifs_strtoUTF16((__le16 *)domain, ses->domainName, len, nls_cp); - rc = - crypto_shash_update(ses->server->secmech.hmacmd5, - (char *)domain, 2 * len); + rc = crypto_shash_update(hmacmd5, (char *)domain, 2 * len); kfree(domain); if (rc) { - cifs_dbg(VFS, "%s: Could not update with domain\n", - __func__); + cifs_dbg(VFS, "%s: Could not update with domain, rc=%d\n", __func__, rc); return rc; } } else { @@ -494,33 +481,27 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = strlen(ses->ip_addr); server = kmalloc(2 + (len * 2), GFP_KERNEL); - if (server == NULL) { - rc = -ENOMEM; - return rc; - } - len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len, - nls_cp); - rc = - crypto_shash_update(ses->server->secmech.hmacmd5, - (char *)server, 2 * len); + if (server == NULL) + return -ENOMEM; + + len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len, nls_cp); + rc = crypto_shash_update(hmacmd5, (char *)server, 2 * len); kfree(server); if (rc) { - cifs_dbg(VFS, "%s: Could not update with server\n", - __func__); + cifs_dbg(VFS, "%s: Could not update with server, rc=%d\n", __func__, rc); return rc; } } - rc = crypto_shash_final(ses->server->secmech.hmacmd5, - ntlmv2_hash); + rc = crypto_shash_final(hmacmd5, ntlmv2_hash); if (rc) - cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); + cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc); return rc; } static int -CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) +CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_desc *hmacmd5) { int rc; struct ntlmv2_resp *ntlmv2 = (struct ntlmv2_resp *) @@ -531,43 +512,33 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE + offsetof(struct ntlmv2_resp, challenge.key[0])); - if (!ses->server->secmech.hmacmd5) { - cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__); - return -1; - } - - rc = crypto_shash_setkey(ses->server->secmech.hmacmd5->tfm, - ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); + rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { - cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n", - __func__); + cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc); return rc; } - rc = crypto_shash_init(ses->server->secmech.hmacmd5); + rc = crypto_shash_init(hmacmd5); if (rc) { - cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__); + cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc); return rc; } if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) - memcpy(ntlmv2->challenge.key, - ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); + memcpy(ntlmv2->challenge.key, ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); else - memcpy(ntlmv2->challenge.key, - ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); - rc = crypto_shash_update(ses->server->secmech.hmacmd5, - ntlmv2->challenge.key, hash_len); + memcpy(ntlmv2->challenge.key, ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); + + rc = crypto_shash_update(hmacmd5, ntlmv2->challenge.key, hash_len); if (rc) { - cifs_dbg(VFS, "%s: Could not update with response\n", __func__); + cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc); return rc; } /* Note that the MD5 digest over writes anon.challenge_key.key */ - rc = crypto_shash_final(ses->server->secmech.hmacmd5, - ntlmv2->ntlmv2_hash); + rc = crypto_shash_final(hmacmd5, ntlmv2->ntlmv2_hash); if (rc) - cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); + cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc); return rc; } @@ -575,6 +546,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) int setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) { + struct shash_desc *hmacmd5 = NULL; int rc; int baselen; unsigned int tilen; @@ -640,55 +612,51 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) cifs_server_lock(ses->server); - rc = cifs_alloc_hash("hmac(md5)", &ses->server->secmech.hmacmd5); + rc = cifs_alloc_hash("hmac(md5)", &hmacmd5); if (rc) { + cifs_dbg(VFS, "Could not allocate HMAC-MD5, rc=%d\n", rc); goto unlock; } /* calculate ntlmv2_hash */ - rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp); + rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp, hmacmd5); if (rc) { - cifs_dbg(VFS, "Could not get v2 hash rc %d\n", rc); + cifs_dbg(VFS, "Could not get NTLMv2 hash, rc=%d\n", rc); goto unlock; } /* calculate first part of the client response (CR1) */ - rc = CalcNTLMv2_response(ses, ntlmv2_hash); + rc = CalcNTLMv2_response(ses, ntlmv2_hash, hmacmd5); if (rc) { - cifs_dbg(VFS, "Could not calculate CR1 rc: %d\n", rc); + cifs_dbg(VFS, "Could not calculate CR1, rc=%d\n", rc); goto unlock; } /* now calculate the session key for NTLMv2 */ - rc = crypto_shash_setkey(ses->server->secmech.hmacmd5->tfm, - ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); + rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { - cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n", - __func__); + cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc); goto unlock; } - rc = crypto_shash_init(ses->server->secmech.hmacmd5); + rc = crypto_shash_init(hmacmd5); if (rc) { - cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__); + cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc); goto unlock; } - rc = crypto_shash_update(ses->server->secmech.hmacmd5, - ntlmv2->ntlmv2_hash, - CIFS_HMAC_MD5_HASH_SIZE); + rc = crypto_shash_update(hmacmd5, ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { - cifs_dbg(VFS, "%s: Could not update with response\n", __func__); + cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc); goto unlock; } - rc = crypto_shash_final(ses->server->secmech.hmacmd5, - ses->auth_key.response); + rc = crypto_shash_final(hmacmd5, ses->auth_key.response); if (rc) - cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); - + cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc); unlock: cifs_server_unlock(ses->server); + cifs_free_hash(&hmacmd5); setup_ntlmv2_rsp_ret: kfree_sensitive(tiblob); @@ -733,7 +701,6 @@ cifs_crypto_secmech_release(struct TCP_Server_Info *server) cifs_free_hash(&server->secmech.hmacsha256); cifs_free_hash(&server->secmech.md5); cifs_free_hash(&server->secmech.sha512); - cifs_free_hash(&server->secmech.hmacmd5); if (!SERVER_IS_CHAN(server)) { if (server->secmech.enc) { diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 15571cf0ba63..da35c160e7dd 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -178,7 +178,6 @@ struct session_key { /* crypto hashing related structure/fields, not specific to a sec mech */ struct cifs_secmech { - struct shash_desc *hmacmd5; /* hmacmd5 hash function, for NTLMv2/CR1 hashes */ struct shash_desc *md5; /* md5 hash function, for CIFS/SMB1 signatures */ struct shash_desc *hmacsha256; /* hmac-sha256 hash function, for SMB2 signatures */ struct shash_desc *sha512; /* sha512 hash function, for SMB3.1.1 preauth hash */ From 220d83b52c7d16ec3c168b82f4e6ce59c645f7ab Mon Sep 17 00:00:00 2001 From: Enzo Matsumiya Date: Thu, 26 Sep 2024 14:46:16 -0300 Subject: [PATCH 527/655] smb: client: make SHA-512 TFM ephemeral The SHA-512 shash TFM is used only briefly during Session Setup stage, when computing SMB 3.1.1 preauth hash. There's no need to keep it allocated in servers' secmech the whole time, so keep its lifetime inside smb311_update_preauth_hash(). This also makes smb311_crypto_shash_allocate() redundant, so expose smb3_crypto_shash_allocate() and use that. Signed-off-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/smb/client/cifsencrypt.c | 1 - fs/smb/client/cifsglob.h | 1 - fs/smb/client/sess.c | 2 +- fs/smb/client/smb2misc.c | 28 ++++++++++++++-------------- fs/smb/client/smb2proto.h | 2 +- fs/smb/client/smb2transport.c | 30 +----------------------------- 6 files changed, 17 insertions(+), 47 deletions(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 464e6ccdfa5f..2d851f596a72 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -700,7 +700,6 @@ cifs_crypto_secmech_release(struct TCP_Server_Info *server) cifs_free_hash(&server->secmech.aes_cmac); cifs_free_hash(&server->secmech.hmacsha256); cifs_free_hash(&server->secmech.md5); - cifs_free_hash(&server->secmech.sha512); if (!SERVER_IS_CHAN(server)) { if (server->secmech.enc) { diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index da35c160e7dd..315aac5dec05 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -180,7 +180,6 @@ struct session_key { struct cifs_secmech { struct shash_desc *md5; /* md5 hash function, for CIFS/SMB1 signatures */ struct shash_desc *hmacsha256; /* hmac-sha256 hash function, for SMB2 signatures */ - struct shash_desc *sha512; /* sha512 hash function, for SMB3.1.1 preauth hash */ struct shash_desc *aes_cmac; /* block-cipher based MAC function, for SMB3 signatures */ struct crypto_aead *enc; /* smb3 encryption AEAD TFM (AES-CCM and AES-GCM) */ diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 3216f786908f..03c0b484a4b5 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -624,7 +624,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, * to sign packets before we generate the channel signing key * (we sign with the session key) */ - rc = smb311_crypto_shash_allocate(chan->server); + rc = smb3_crypto_shash_allocate(chan->server); if (rc) { cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__); mutex_unlock(&ses->session_mutex); diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index f3c4b70b77b9..bdeb12ff53e3 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -906,41 +906,41 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, || (hdr->Status != cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED)))) return 0; - ok: - rc = smb311_crypto_shash_allocate(server); - if (rc) + rc = cifs_alloc_hash("sha512", &sha512); + if (rc) { + cifs_dbg(VFS, "%s: Could not allocate SHA512 shash, rc=%d\n", __func__, rc); return rc; + } - sha512 = server->secmech.sha512; rc = crypto_shash_init(sha512); if (rc) { - cifs_dbg(VFS, "%s: Could not init sha512 shash\n", __func__); - return rc; + cifs_dbg(VFS, "%s: Could not init SHA512 shash, rc=%d\n", __func__, rc); + goto err_free; } rc = crypto_shash_update(sha512, ses->preauth_sha_hash, SMB2_PREAUTH_HASH_SIZE); if (rc) { - cifs_dbg(VFS, "%s: Could not update sha512 shash\n", __func__); - return rc; + cifs_dbg(VFS, "%s: Could not update SHA512 shash, rc=%d\n", __func__, rc); + goto err_free; } for (i = 0; i < nvec; i++) { rc = crypto_shash_update(sha512, iov[i].iov_base, iov[i].iov_len); if (rc) { - cifs_dbg(VFS, "%s: Could not update sha512 shash\n", - __func__); - return rc; + cifs_dbg(VFS, "%s: Could not update SHA512 shash, rc=%d\n", __func__, rc); + goto err_free; } } rc = crypto_shash_final(sha512, ses->preauth_sha_hash); if (rc) { - cifs_dbg(VFS, "%s: Could not finalize sha512 shash\n", - __func__); - return rc; + cifs_dbg(VFS, "%s: Could not finalize SHA12 shash, rc=%d\n", __func__, rc); + goto err_free; } +err_free: + cifs_free_hash(&sha512); return 0; } diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index c7e1b149877a..56a896ff7cd9 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -291,7 +291,7 @@ extern int smb2_validate_and_copy_iov(unsigned int offset, extern void smb2_copy_fs_info_to_kstatfs( struct smb2_fs_full_size_info *pfs_inf, struct kstatfs *kst); -extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server); +extern int smb3_crypto_shash_allocate(struct TCP_Server_Info *server); extern int smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, struct kvec *iov, int nvec); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index c8bf0000f73b..f7e04c40d22e 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -26,8 +26,7 @@ #include "../common/smb2status.h" #include "smb2glob.h" -static int -smb3_crypto_shash_allocate(struct TCP_Server_Info *server) +int smb3_crypto_shash_allocate(struct TCP_Server_Info *server) { struct cifs_secmech *p = &server->secmech; int rc; @@ -46,33 +45,6 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server) return rc; } -int -smb311_crypto_shash_allocate(struct TCP_Server_Info *server) -{ - struct cifs_secmech *p = &server->secmech; - int rc = 0; - - rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256); - if (rc) - return rc; - - rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac); - if (rc) - goto err; - - rc = cifs_alloc_hash("sha512", &p->sha512); - if (rc) - goto err; - - return 0; - -err: - cifs_free_hash(&p->aes_cmac); - cifs_free_hash(&p->hmacsha256); - return rc; -} - - static int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { From 09573b1cc76e7ff8f056ab29ea1cdc152ec8c653 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 11 Sep 2024 17:42:34 +0800 Subject: [PATCH 528/655] net: ieee802154: mcr20a: Use IRQF_NO_AUTOEN flag in request_irq() disable_irq() after request_irq() still has a time gap in which interrupts can come. request_irq() with IRQF_NO_AUTOEN flag will disable IRQ auto-enable when request IRQ. Fixes: 8c6ad9cc5157 ("ieee802154: Add NXP MCR20A IEEE 802.15.4 transceiver driver") Reviewed-by: Miquel Raynal Signed-off-by: Jinjie Ruan Link: https://lore.kernel.org/20240911094234.1922418-1-ruanjinjie@huawei.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mcr20a.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 433fb5839203..020d392a98b6 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -1302,16 +1302,13 @@ mcr20a_probe(struct spi_device *spi) irq_type = IRQF_TRIGGER_FALLING; ret = devm_request_irq(&spi->dev, spi->irq, mcr20a_irq_isr, - irq_type, dev_name(&spi->dev), lp); + irq_type | IRQF_NO_AUTOEN, dev_name(&spi->dev), lp); if (ret) { dev_err(&spi->dev, "could not request_irq for mcr20a\n"); ret = -ENODEV; goto free_dev; } - /* disable_irq by default and wait for starting hardware */ - disable_irq(spi->irq); - ret = ieee802154_register_hw(hw); if (ret) { dev_crit(&spi->dev, "ieee802154_register_hw failed\n"); From 6d3405415f887aef5774c04ae9fefae63d82bdaf Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Sep 2024 11:34:18 +0200 Subject: [PATCH 529/655] i2c: keba: I2C_KEBA should depend on KEBA_CP500 The KEBA I2C controller is only present on KEBA PLC devices. Hence add a dependency on KEBA_CP500, to prevent asking the user about this driver when configuring a kernel without KEBA CP500 system FPGA support. Fixes: c7e08c816cd2fdf8 ("i2c: keba: Add KEBA I2C controller support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Gerhard Engleder Signed-off-by: Andi Shyti --- drivers/i2c/busses/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 53f18b351f53..6b3ba7e5723a 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -782,6 +782,7 @@ config I2C_JZ4780 config I2C_KEBA tristate "KEBA I2C controller support" depends on HAS_IOMEM + depends on KEBA_CP500 || COMPILE_TEST select AUXILIARY_BUS help This driver supports the I2C controller found in KEBA system FPGA From 0c8d604dea437b69a861479b413d629bc9b3da70 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 23 Sep 2024 11:42:50 +0800 Subject: [PATCH 530/655] i2c: xiic: Fix pm_runtime_set_suspended() with runtime pm enabled It is not valid to call pm_runtime_set_suspended() for devices with runtime PM enabled because it returns -EAGAIN if it is enabled already and working. So, call pm_runtime_disable() before to fix it. Fixes: 36ecbcab84d0 ("i2c: xiic: Implement power management") Cc: # v4.6+ Signed-off-by: Jinjie Ruan Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-xiic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 4c89aad02451..1d68177241a6 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -1337,8 +1337,8 @@ static int xiic_i2c_probe(struct platform_device *pdev) return 0; err_pm_disable: - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); return ret; } From d505d3593b52b6c43507f119572409087416ba28 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 23 Sep 2024 19:57:43 +0800 Subject: [PATCH 531/655] net: wwan: qcom_bam_dmux: Fix missing pm_runtime_disable() It's important to undo pm_runtime_use_autosuspend() with pm_runtime_dont_use_autosuspend() at driver exit time. But the pm_runtime_disable() and pm_runtime_dont_use_autosuspend() is missing in the error path for bam_dmux_probe(). So add it. Found by code review. Compile-tested only. Fixes: 21a0ffd9b38c ("net: wwan: Add Qualcomm BAM-DMUX WWAN network driver") Suggested-by: Stephan Gerhold Signed-off-by: Jinjie Ruan Reviewed-by: Stephan Gerhold Reviewed-by: Sergey Ryazanov Signed-off-by: David S. Miller --- drivers/net/wwan/qcom_bam_dmux.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wwan/qcom_bam_dmux.c b/drivers/net/wwan/qcom_bam_dmux.c index 26ca719fa0de..5dcb9a84a12e 100644 --- a/drivers/net/wwan/qcom_bam_dmux.c +++ b/drivers/net/wwan/qcom_bam_dmux.c @@ -823,17 +823,17 @@ static int bam_dmux_probe(struct platform_device *pdev) ret = devm_request_threaded_irq(dev, pc_ack_irq, NULL, bam_dmux_pc_ack_irq, IRQF_ONESHOT, NULL, dmux); if (ret) - return ret; + goto err_disable_pm; ret = devm_request_threaded_irq(dev, dmux->pc_irq, NULL, bam_dmux_pc_irq, IRQF_ONESHOT, NULL, dmux); if (ret) - return ret; + goto err_disable_pm; ret = irq_get_irqchip_state(dmux->pc_irq, IRQCHIP_STATE_LINE_LEVEL, &dmux->pc_state); if (ret) - return ret; + goto err_disable_pm; /* Check if remote finished initialization before us */ if (dmux->pc_state) { @@ -844,6 +844,11 @@ static int bam_dmux_probe(struct platform_device *pdev) } return 0; + +err_disable_pm: + pm_runtime_disable(dev); + pm_runtime_dont_use_autosuspend(dev); + return ret; } static void bam_dmux_remove(struct platform_device *pdev) From 8a89015644513ef69193a037eb966f2d55fe385a Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 26 Sep 2024 18:56:31 +0200 Subject: [PATCH 532/655] selftests: netfilter: Fix nft_audit.sh for newer nft binaries As a side-effect of nftables' commit dbff26bfba833 ("cache: consolidate reset command"), audit logs changed when more objects were reset than fit into a single netlink message. Since the objects' distribution in netlink messages is not relevant, implement a summarizing function which combines repeated audit logs into a single one with summed up 'entries=' value. Fixes: 203bb9d39866 ("selftests: netfilter: Extend nft_audit.sh") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- .../selftests/net/netfilter/nft_audit.sh | 57 ++++++++++--------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/tools/testing/selftests/net/netfilter/nft_audit.sh b/tools/testing/selftests/net/netfilter/nft_audit.sh index 902f8114bc80..87f2b4c725aa 100755 --- a/tools/testing/selftests/net/netfilter/nft_audit.sh +++ b/tools/testing/selftests/net/netfilter/nft_audit.sh @@ -48,12 +48,31 @@ logread_pid=$! trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT exec 3<"$logfile" +lsplit='s/^\(.*\) entries=\([^ ]*\) \(.*\)$/pfx="\1"\nval="\2"\nsfx="\3"/' +summarize_logs() { + sum=0 + while read line; do + eval $(sed "$lsplit" <<< "$line") + [[ $sum -gt 0 ]] && { + [[ "$pfx $sfx" == "$tpfx $tsfx" ]] && { + let "sum += val" + continue + } + echo "$tpfx entries=$sum $tsfx" + } + tpfx="$pfx" + tsfx="$sfx" + sum=$val + done + echo "$tpfx entries=$sum $tsfx" +} + do_test() { # (cmd, log) echo -n "testing for cmd: $1 ... " cat <&3 >/dev/null $1 >/dev/null || exit 1 sleep 0.1 - res=$(diff -a -u <(echo "$2") - <&3) + res=$(diff -a -u <(echo "$2") <(summarize_logs <&3)) [ $? -eq 0 ] && { echo "OK"; return; } echo "FAIL" grep -v '^\(---\|+++\|@@\)' <<< "$res" @@ -152,31 +171,17 @@ do_test 'nft reset rules t1 c2' \ 'table=t1 family=2 entries=3 op=nft_reset_rule' do_test 'nft reset rules table t1' \ -'table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule' +'table=t1 family=2 entries=9 op=nft_reset_rule' do_test 'nft reset rules t2 c3' \ -'table=t2 family=2 entries=189 op=nft_reset_rule -table=t2 family=2 entries=188 op=nft_reset_rule -table=t2 family=2 entries=126 op=nft_reset_rule' +'table=t2 family=2 entries=503 op=nft_reset_rule' do_test 'nft reset rules t2' \ -'table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=186 op=nft_reset_rule -table=t2 family=2 entries=188 op=nft_reset_rule -table=t2 family=2 entries=129 op=nft_reset_rule' +'table=t2 family=2 entries=509 op=nft_reset_rule' do_test 'nft reset rules' \ -'table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=180 op=nft_reset_rule -table=t2 family=2 entries=188 op=nft_reset_rule -table=t2 family=2 entries=135 op=nft_reset_rule' +'table=t1 family=2 entries=9 op=nft_reset_rule +table=t2 family=2 entries=509 op=nft_reset_rule' # resetting sets and elements @@ -200,13 +205,11 @@ do_test 'nft reset counters t1' \ 'table=t1 family=2 entries=1 op=nft_reset_obj' do_test 'nft reset counters t2' \ -'table=t2 family=2 entries=342 op=nft_reset_obj -table=t2 family=2 entries=158 op=nft_reset_obj' +'table=t2 family=2 entries=500 op=nft_reset_obj' do_test 'nft reset counters' \ 'table=t1 family=2 entries=1 op=nft_reset_obj -table=t2 family=2 entries=341 op=nft_reset_obj -table=t2 family=2 entries=159 op=nft_reset_obj' +table=t2 family=2 entries=500 op=nft_reset_obj' # resetting quotas @@ -217,13 +220,11 @@ do_test 'nft reset quotas t1' \ 'table=t1 family=2 entries=1 op=nft_reset_obj' do_test 'nft reset quotas t2' \ -'table=t2 family=2 entries=315 op=nft_reset_obj -table=t2 family=2 entries=185 op=nft_reset_obj' +'table=t2 family=2 entries=500 op=nft_reset_obj' do_test 'nft reset quotas' \ 'table=t1 family=2 entries=1 op=nft_reset_obj -table=t2 family=2 entries=314 op=nft_reset_obj -table=t2 family=2 entries=186 op=nft_reset_obj' +table=t2 family=2 entries=500 op=nft_reset_obj' # deleting rules From 92ceba94de6fb4cee2bf40b485979c342f44a492 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Sep 2024 18:56:11 +0000 Subject: [PATCH 533/655] netfilter: nf_tables: prevent nf_skb_duplicated corruption syzbot found that nf_dup_ipv4() or nf_dup_ipv6() could write per-cpu variable nf_skb_duplicated in an unsafe way [1]. Disabling preemption as hinted by the splat is not enough, we have to disable soft interrupts as well. [1] BUG: using __this_cpu_write() in preemptible [00000000] code: syz.4.282/6316 caller is nf_dup_ipv4+0x651/0x8f0 net/ipv4/netfilter/nf_dup_ipv4.c:87 CPU: 0 UID: 0 PID: 6316 Comm: syz.4.282 Not tainted 6.11.0-rc7-syzkaller-00104-g7052622fccb1 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 Call Trace: __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 check_preemption_disabled+0x10e/0x120 lib/smp_processor_id.c:49 nf_dup_ipv4+0x651/0x8f0 net/ipv4/netfilter/nf_dup_ipv4.c:87 nft_dup_ipv4_eval+0x1db/0x300 net/ipv4/netfilter/nft_dup_ipv4.c:30 expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] nft_do_chain+0x4ad/0x1da0 net/netfilter/nf_tables_core.c:288 nft_do_chain_ipv4+0x202/0x320 net/netfilter/nft_chain_filter.c:23 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xc3/0x220 net/netfilter/core.c:626 nf_hook+0x2c4/0x450 include/linux/netfilter.h:269 NF_HOOK_COND include/linux/netfilter.h:302 [inline] ip_output+0x185/0x230 net/ipv4/ip_output.c:433 ip_local_out net/ipv4/ip_output.c:129 [inline] ip_send_skb+0x74/0x100 net/ipv4/ip_output.c:1495 udp_send_skb+0xacf/0x1650 net/ipv4/udp.c:981 udp_sendmsg+0x1c21/0x2a60 net/ipv4/udp.c:1269 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x1a6/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2597 ___sys_sendmsg net/socket.c:2651 [inline] __sys_sendmmsg+0x3b2/0x740 net/socket.c:2737 __do_sys_sendmmsg net/socket.c:2766 [inline] __se_sys_sendmmsg net/socket.c:2763 [inline] __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2763 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f4ce4f7def9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f4ce5d4a038 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00007f4ce5135f80 RCX: 00007f4ce4f7def9 RDX: 0000000000000001 RSI: 0000000020005d40 RDI: 0000000000000006 RBP: 00007f4ce4ff0b76 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f4ce5135f80 R15: 00007ffd4cbc6d68 Fixes: d877f07112f1 ("netfilter: nf_tables: add nft_dup expression") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_dup_ipv4.c | 7 +++++-- net/ipv6/netfilter/nf_dup_ipv6.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index f4aed0789d69..ec94ee1051c7 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -53,8 +53,9 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, { struct iphdr *iph; + local_bh_disable(); if (this_cpu_read(nf_skb_duplicated)) - return; + goto out; /* * Copy the skb, and route the copy. Will later return %XT_CONTINUE for * the original skb, which should continue on its way as if nothing has @@ -62,7 +63,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, */ skb = pskb_copy(skb, GFP_ATOMIC); if (skb == NULL) - return; + goto out; #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ @@ -91,6 +92,8 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, } else { kfree_skb(skb); } +out: + local_bh_enable(); } EXPORT_SYMBOL_GPL(nf_dup_ipv4); diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index a0a2de30be3e..0c39c77fe8a8 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -47,11 +47,12 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb, void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, const struct in6_addr *gw, int oif) { + local_bh_disable(); if (this_cpu_read(nf_skb_duplicated)) - return; + goto out; skb = pskb_copy(skb, GFP_ATOMIC); if (skb == NULL) - return; + goto out; #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_reset_ct(skb); @@ -69,6 +70,8 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, } else { kfree_skb(skb); } +out: + local_bh_enable(); } EXPORT_SYMBOL_GPL(nf_dup_ipv6); From 10dbd23633f0433f8d13c2803d687b36a675ef60 Mon Sep 17 00:00:00 2001 From: zhang jiao Date: Fri, 27 Sep 2024 11:22:05 +0800 Subject: [PATCH 534/655] selftests: netfilter: Add missing return value There is no return value in count_entries, just add it. Fixes: eff3c558bb7e ("netfilter: ctnetlink: support filtering by zone") Signed-off-by: zhang jiao Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/netfilter/conntrack_dump_flush.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c index bd9317bf5ada..dc056fec993b 100644 --- a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c @@ -207,6 +207,7 @@ static int conntrack_data_generate_v6(struct mnl_socket *sock, static int count_entries(const struct nlmsghdr *nlh, void *data) { reply_counter++; + return MNL_CB_OK; } static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) From 54595f2807d203770ee50486cb23dc5763916d72 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Sep 2024 20:38:09 +0000 Subject: [PATCH 535/655] mailbox, remoteproc: omap2+: fix compile testing Selecting CONFIG_OMAP2PLUS_MBOX while compile testing causes a build failure: WARNING: unmet direct dependencies detected for OMAP2PLUS_MBOX Depends on [n]: MAILBOX [=y] && (ARCH_OMAP2PLUS || ARCH_K3) Selected by [m]: - TI_K3_M4_REMOTEPROC [=m] && REMOTEPROC [=y] && (ARCH_K3 || COMPILE_TEST [=y]) Using 'select' to force-enable another subsystem is generally a mistake and causes problems such as this one, so change the three drivers that link against this driver to use 'depends on' instead, and ensure the driver itself can be compile tested regardless of the platform. When compile-testing without CONFIG_TI_SCI_PROTOCOL=m, there is a chance for a link failure, so add a careful dependency on that. arm-linux-gnueabi-ld: drivers/remoteproc/ti_k3_m4_remoteproc.o: in function `k3_m4_rproc_probe': ti_k3_m4_remoteproc.c:(.text.k3_m4_rproc_probe+0x76): undefined reference to `devm_ti_sci_get_by_phandle' Fixes: ebcf9008a895 ("remoteproc: k3-m4: Add a remoteproc driver for M4F subsystem") Signed-off-by: Arnd Bergmann Reviewed-by: Mathieu Poirier Reviewed-by: Andrew Davis Reviewed-by: Martyn Welch Signed-off-by: Jassi Brar --- drivers/mailbox/Kconfig | 2 +- drivers/mailbox/omap-mailbox.c | 2 +- drivers/remoteproc/Kconfig | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index cbd9206cd7de..6fb995778636 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -74,7 +74,7 @@ config ARMADA_37XX_RWTM_MBOX config OMAP2PLUS_MBOX tristate "OMAP2+ Mailbox framework support" - depends on ARCH_OMAP2PLUS || ARCH_K3 + depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST help Mailbox implementation for OMAP family chips with hardware for interprocessor communication involving DSP, IVA1.0 and IVA2 in diff --git a/drivers/mailbox/omap-mailbox.c b/drivers/mailbox/omap-mailbox.c index 7a87424657a1..6797770474a5 100644 --- a/drivers/mailbox/omap-mailbox.c +++ b/drivers/mailbox/omap-mailbox.c @@ -603,7 +603,7 @@ static struct platform_driver omap_mbox_driver = { .driver = { .name = "omap-mailbox", .pm = &omap_mbox_pm_ops, - .of_match_table = of_match_ptr(omap_mailbox_of_match), + .of_match_table = omap_mailbox_of_match, }, }; module_platform_driver(omap_mbox_driver); diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index dda2ada215b7..7ab80fe511a7 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -330,8 +330,7 @@ config STM32_RPROC config TI_K3_DSP_REMOTEPROC tristate "TI K3 DSP remoteproc support" depends on ARCH_K3 - select MAILBOX - select OMAP2PLUS_MBOX + depends on OMAP2PLUS_MBOX help Say m here to support TI's C66x and C71x DSP remote processor subsystems on various TI K3 family of SoCs through the remote @@ -343,8 +342,7 @@ config TI_K3_DSP_REMOTEPROC config TI_K3_R5_REMOTEPROC tristate "TI K3 R5 remoteproc support" depends on ARCH_K3 - select MAILBOX - select OMAP2PLUS_MBOX + depends on OMAP2PLUS_MBOX help Say m here to support TI's R5F remote processor subsystems on various TI K3 family of SoCs through the remote processor From f53e1c9c726d83092167f2226f32bd3b73f26c21 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 12 Sep 2024 12:34:42 -0400 Subject: [PATCH 536/655] Bluetooth: MGMT: Fix possible crash on mgmt_index_removed If mgmt_index_removed is called while there are commands queued on cmd_sync it could lead to crashes like the bellow trace: 0x0000053D: __list_del_entry_valid_or_report+0x98/0xdc 0x0000053D: mgmt_pending_remove+0x18/0x58 [bluetooth] 0x0000053E: mgmt_remove_adv_monitor_complete+0x80/0x108 [bluetooth] 0x0000053E: hci_cmd_sync_work+0xbc/0x164 [bluetooth] So while handling mgmt_index_removed this attempts to dequeue commands passed as user_data to cmd_sync. Fixes: 7cf5c2978f23 ("Bluetooth: hci_sync: Refactor remove Adv Monitor") Reported-by: jiaymao Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e4f564d6f6fb..4157d9f23f46 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1453,10 +1453,15 @@ static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data) static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) { - if (cmd->cmd_complete) { - u8 *status = data; + struct cmd_lookup *match = data; - cmd->cmd_complete(cmd, *status); + /* dequeue cmd_sync entries using cmd as data as that is about to be + * removed/freed. + */ + hci_cmd_sync_dequeue(match->hdev, NULL, cmd, NULL); + + if (cmd->cmd_complete) { + cmd->cmd_complete(cmd, match->mgmt_status); mgmt_pending_remove(cmd); return; @@ -9394,12 +9399,12 @@ void mgmt_index_added(struct hci_dev *hdev) void mgmt_index_removed(struct hci_dev *hdev) { struct mgmt_ev_ext_index ev; - u8 status = MGMT_STATUS_INVALID_INDEX; + struct cmd_lookup match = { NULL, hdev, MGMT_STATUS_INVALID_INDEX }; if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); + mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, @@ -9450,7 +9455,7 @@ void mgmt_power_on(struct hci_dev *hdev, int err) void __mgmt_power_off(struct hci_dev *hdev) { struct cmd_lookup match = { NULL, hdev }; - u8 status, zero_cod[] = { 0, 0, 0 }; + u8 zero_cod[] = { 0, 0, 0 }; mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); @@ -9462,11 +9467,11 @@ void __mgmt_power_off(struct hci_dev *hdev) * status responses. */ if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) - status = MGMT_STATUS_INVALID_INDEX; + match.mgmt_status = MGMT_STATUS_INVALID_INDEX; else - status = MGMT_STATUS_NOT_POWERED; + match.mgmt_status = MGMT_STATUS_NOT_POWERED; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); + mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, From 333b4fd11e89b29c84c269123f871883a30be586 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 23 Sep 2024 12:47:39 -0400 Subject: [PATCH 537/655] Bluetooth: L2CAP: Fix uaf in l2cap_connect [Syzbot reported] BUG: KASAN: slab-use-after-free in l2cap_connect.constprop.0+0x10d8/0x1270 net/bluetooth/l2cap_core.c:3949 Read of size 8 at addr ffff8880241e9800 by task kworker/u9:0/54 CPU: 0 UID: 0 PID: 54 Comm: kworker/u9:0 Not tainted 6.11.0-rc6-syzkaller-00268-g788220eee30d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 Workqueue: hci2 hci_rx_work Call Trace: __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:119 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc3/0x620 mm/kasan/report.c:488 kasan_report+0xd9/0x110 mm/kasan/report.c:601 l2cap_connect.constprop.0+0x10d8/0x1270 net/bluetooth/l2cap_core.c:3949 l2cap_connect_req net/bluetooth/l2cap_core.c:4080 [inline] l2cap_bredr_sig_cmd net/bluetooth/l2cap_core.c:4772 [inline] l2cap_sig_channel net/bluetooth/l2cap_core.c:5543 [inline] l2cap_recv_frame+0xf0b/0x8eb0 net/bluetooth/l2cap_core.c:6825 l2cap_recv_acldata+0x9b4/0xb70 net/bluetooth/l2cap_core.c:7514 hci_acldata_packet net/bluetooth/hci_core.c:3791 [inline] hci_rx_work+0xaab/0x1610 net/bluetooth/hci_core.c:4028 process_one_work+0x9c5/0x1b40 kernel/workqueue.c:3231 process_scheduled_works kernel/workqueue.c:3312 [inline] worker_thread+0x6c8/0xed0 kernel/workqueue.c:3389 kthread+0x2c1/0x3a0 kernel/kthread.c:389 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 ... Freed by task 5245: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3b/0x60 mm/kasan/generic.c:579 poison_slab_object+0xf7/0x160 mm/kasan/common.c:240 __kasan_slab_free+0x32/0x50 mm/kasan/common.c:256 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2256 [inline] slab_free mm/slub.c:4477 [inline] kfree+0x12a/0x3b0 mm/slub.c:4598 l2cap_conn_free net/bluetooth/l2cap_core.c:1810 [inline] kref_put include/linux/kref.h:65 [inline] l2cap_conn_put net/bluetooth/l2cap_core.c:1822 [inline] l2cap_conn_del+0x59d/0x730 net/bluetooth/l2cap_core.c:1802 l2cap_connect_cfm+0x9e6/0xf80 net/bluetooth/l2cap_core.c:7241 hci_connect_cfm include/net/bluetooth/hci_core.h:1960 [inline] hci_conn_failed+0x1c3/0x370 net/bluetooth/hci_conn.c:1265 hci_abort_conn_sync+0x75a/0xb50 net/bluetooth/hci_sync.c:5583 abort_conn_sync+0x197/0x360 net/bluetooth/hci_conn.c:2917 hci_cmd_sync_work+0x1a4/0x410 net/bluetooth/hci_sync.c:328 process_one_work+0x9c5/0x1b40 kernel/workqueue.c:3231 process_scheduled_works kernel/workqueue.c:3312 [inline] worker_thread+0x6c8/0xed0 kernel/workqueue.c:3389 kthread+0x2c1/0x3a0 kernel/kthread.c:389 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Reported-by: syzbot+c12e2f941af1feb5632c@syzkaller.appspotmail.com Tested-by: syzbot+c12e2f941af1feb5632c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c12e2f941af1feb5632c Fixes: 7b064edae38d ("Bluetooth: Fix authentication if acl data comes before remote feature evt") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 2 ++ net/bluetooth/hci_event.c | 2 +- net/bluetooth/l2cap_core.c | 8 -------- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d6976db02c06..b2f8f9c5b610 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3782,6 +3782,8 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, handle); + if (conn && hci_dev_test_flag(hdev, HCI_MGMT)) + mgmt_device_connected(hdev, conn, NULL, 0); hci_dev_unlock(hdev); if (conn) { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1c82dcdf6e8f..b87c0f1dab9e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3706,7 +3706,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev, void *data, goto unlock; } - if (!ev->status && !test_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) { + if (!ev->status) { struct hci_cp_remote_name_req cp; memset(&cp, 0, sizeof(cp)); bacpy(&cp.bdaddr, &conn->dst); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9988ba382b68..6544c1ed7143 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4066,17 +4066,9 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, static int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { - struct hci_dev *hdev = conn->hcon->hdev; - struct hci_conn *hcon = conn->hcon; - if (cmd_len < sizeof(struct l2cap_conn_req)) return -EPROTO; - hci_dev_lock(hdev); - if (hci_dev_test_flag(hdev, HCI_MGMT)) - mgmt_device_connected(hdev, hcon, NULL, 0); - hci_dev_unlock(hdev); - l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP); return 0; } From 7b1ab460592ca818e7b52f27cd3ec86af79220d1 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Thu, 12 Sep 2024 11:12:04 +0800 Subject: [PATCH 538/655] Bluetooth: btmrvl: Use IRQF_NO_AUTOEN flag in request_irq() disable_irq() after request_irq() still has a time gap in which interrupts can come. request_irq() with IRQF_NO_AUTOEN flag will disable IRQ auto-enable when request IRQ. Fixes: bb7f4f0bcee6 ("btmrvl: add platform specific wakeup interrupt support") Signed-off-by: Jinjie Ruan Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmrvl_sdio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c index 85b7f2bb4259..07cd308f7abf 100644 --- a/drivers/bluetooth/btmrvl_sdio.c +++ b/drivers/bluetooth/btmrvl_sdio.c @@ -92,7 +92,7 @@ static int btmrvl_sdio_probe_of(struct device *dev, } else { ret = devm_request_irq(dev, cfg->irq_bt, btmrvl_wake_irq_bt, - 0, "bt_wake", card); + IRQF_NO_AUTOEN, "bt_wake", card); if (ret) { dev_err(dev, "Failed to request irq_bt %d (%d)\n", @@ -101,7 +101,6 @@ static int btmrvl_sdio_probe_of(struct device *dev, /* Configure wakeup (enabled by default) */ device_init_wakeup(dev, true); - disable_irq(cfg->irq_bt); } } From b25e11f978b63cb7857890edb3a698599cddb10e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 12 Sep 2024 12:17:00 -0400 Subject: [PATCH 539/655] Bluetooth: hci_event: Align BR/EDR JUST_WORKS paring with LE This aligned BR/EDR JUST_WORKS method with LE which since 92516cd97fd4 ("Bluetooth: Always request for user confirmation for Just Works") always request user confirmation with confirm_hint set since the likes of bluetoothd have dedicated policy around JUST_WORKS method (e.g. main.conf:JustWorksRepairing). CVE: CVE-2024-8805 Cc: stable@vger.kernel.org Fixes: ba15a58b179e ("Bluetooth: Fix SSP acceptor just-works confirmation without MITM") Signed-off-by: Luiz Augusto von Dentz Tested-by: Kiran K --- net/bluetooth/hci_event.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b87c0f1dab9e..561c8cb87473 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5324,19 +5324,16 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, void *data, goto unlock; } - /* If no side requires MITM protection; auto-accept */ + /* If no side requires MITM protection; use JUST_CFM method */ if ((!loc_mitm || conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) && (!rem_mitm || conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)) { - /* If we're not the initiators request authorization to - * proceed from user space (mgmt_user_confirm with - * confirm_hint set to 1). The exception is if neither - * side had MITM or if the local IO capability is - * NoInputNoOutput, in which case we do auto-accept + /* If we're not the initiator of request authorization and the + * local IO capability is not NoInputNoOutput, use JUST_WORKS + * method (mgmt_user_confirm with confirm_hint set to 1). */ if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && - conn->io_capability != HCI_IO_NO_INPUT_OUTPUT && - (loc_mitm || rem_mitm)) { + conn->io_capability != HCI_IO_NO_INPUT_OUTPUT) { bt_dev_dbg(hdev, "Confirming auto-accept as acceptor"); confirm_hint = 1; goto confirm; From cb787f4ac0c2e439ea8d7e6387b925f74576bdf8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 Sep 2024 02:56:11 +0100 Subject: [PATCH 540/655] [tree-wide] finally take no_llseek out no_llseek had been defined to NULL two years ago, in commit 868941b14441 ("fs: remove no_llseek") To quote that commit, At -rc1 we'll need do a mechanical removal of no_llseek - git grep -l -w no_llseek | grep -v porting.rst | while read i; do sed -i '/\/d' $i done would do it. Unfortunately, that hadn't been done. Linus, could you do that now, so that we could finally put that thing to rest? All instances are of the form .llseek = no_llseek, so it's obviously safe. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- Documentation/watchdog/convert_drivers_to_kernel_api.rst | 1 - arch/parisc/kernel/perf.c | 1 - arch/s390/hypfs/hypfs_dbfs.c | 1 - arch/s390/hypfs/inode.c | 1 - arch/s390/kernel/debug.c | 1 - arch/s390/kernel/perf_cpum_cf.c | 1 - arch/s390/kernel/sysinfo.c | 1 - arch/s390/pci/pci_clp.c | 1 - arch/um/drivers/harddog_kern.c | 1 - arch/um/drivers/hostaudio_kern.c | 2 -- arch/x86/kernel/cpu/mce/dev-mcelog.c | 1 - arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 1 - drivers/acpi/apei/erst-dbg.c | 1 - drivers/auxdisplay/charlcd.c | 1 - drivers/block/mtip32xx/mtip32xx.c | 2 -- drivers/block/pktcdvd.c | 1 - drivers/block/ublk_drv.c | 1 - drivers/bluetooth/hci_vhci.c | 1 - drivers/bus/moxtet.c | 2 -- drivers/char/applicom.c | 1 - drivers/char/ds1620.c | 1 - drivers/char/dtlk.c | 1 - drivers/char/hpet.c | 1 - drivers/char/ipmi/ipmi_watchdog.c | 1 - drivers/char/pc8736x_gpio.c | 1 - drivers/char/ppdev.c | 1 - drivers/char/scx200_gpio.c | 1 - drivers/char/sonypi.c | 1 - drivers/char/tpm/tpm-dev.c | 1 - drivers/char/tpm/tpm_vtpm_proxy.c | 1 - drivers/char/tpm/tpmrm-dev.c | 1 - drivers/char/virtio_console.c | 1 - drivers/counter/counter-chrdev.c | 1 - drivers/firewire/core-cdev.c | 1 - drivers/firmware/arm_scmi/driver.c | 1 - drivers/firmware/arm_scmi/raw_mode.c | 5 ----- drivers/firmware/efi/capsule-loader.c | 1 - drivers/firmware/efi/test/efi_test.c | 1 - drivers/firmware/turris-mox-rwtm.c | 1 - drivers/gnss/core.c | 1 - drivers/gpio/gpio-mockup.c | 1 - drivers/gpio/gpio-sloppy-logic-analyzer.c | 1 - drivers/gpio/gpiolib-cdev.c | 1 - drivers/gpu/drm/drm_file.c | 1 - drivers/gpu/drm/i915/i915_perf.c | 1 - drivers/gpu/drm/msm/msm_perf.c | 1 - drivers/gpu/drm/msm/msm_rd.c | 1 - drivers/gpu/drm/xe/xe_oa.c | 1 - drivers/hid/uhid.c | 1 - drivers/hwmon/asus_atk0110.c | 1 - drivers/hwmon/fschmd.c | 1 - drivers/hwmon/w83793.c | 1 - drivers/hwtracing/coresight/coresight-etb10.c | 1 - drivers/hwtracing/coresight/coresight-tmc-core.c | 1 - drivers/hwtracing/coresight/ultrasoc-smb.c | 1 - drivers/hwtracing/intel_th/msu.c | 1 - drivers/hwtracing/stm/core.c | 1 - drivers/i2c/i2c-dev.c | 1 - drivers/infiniband/core/ucma.c | 1 - drivers/infiniband/core/user_mad.c | 2 -- drivers/infiniband/core/uverbs_main.c | 4 ---- drivers/infiniband/hw/hfi1/fault.c | 1 - drivers/infiniband/hw/mlx5/devx.c | 2 -- drivers/input/evdev.c | 1 - drivers/input/joydev.c | 1 - drivers/input/keyboard/applespi.c | 1 - drivers/input/misc/uinput.c | 1 - drivers/input/serio/userio.c | 1 - drivers/iommu/iommufd/fault.c | 1 - drivers/isdn/capi/capi.c | 1 - drivers/isdn/mISDN/timerdev.c | 1 - drivers/leds/uleds.c | 1 - drivers/macintosh/adb.c | 1 - drivers/macintosh/smu.c | 1 - drivers/media/cec/core/cec-api.c | 1 - drivers/media/mc/mc-devnode.c | 1 - drivers/media/rc/lirc_dev.c | 1 - drivers/media/usb/uvc/uvc_debugfs.c | 1 - drivers/media/v4l2-core/v4l2-dev.c | 1 - drivers/message/fusion/mptctl.c | 1 - drivers/misc/lis3lv02d/lis3lv02d.c | 1 - drivers/misc/mei/main.c | 1 - drivers/misc/ntsync.c | 2 -- drivers/misc/phantom.c | 1 - drivers/mmc/core/block.c | 1 - drivers/mtd/ubi/cdev.c | 2 -- drivers/mtd/ubi/debug.c | 1 - drivers/net/netdevsim/fib.c | 1 - drivers/net/tap.c | 1 - drivers/net/tun.c | 1 - drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 1 - drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 1 - drivers/platform/chrome/cros_ec_debugfs.c | 1 - drivers/platform/chrome/wilco_ec/debugfs.c | 1 - drivers/platform/chrome/wilco_ec/event.c | 1 - drivers/platform/chrome/wilco_ec/telemetry.c | 1 - drivers/platform/surface/surface_aggregator_cdev.c | 1 - drivers/platform/surface/surface_dtx.c | 1 - drivers/pps/pps.c | 1 - drivers/rtc/dev.c | 1 - drivers/rtc/rtc-m41t80.c | 1 - drivers/s390/char/fs3270.c | 1 - drivers/s390/char/sclp_ctl.c | 1 - drivers/s390/char/tape_char.c | 1 - drivers/s390/char/uvdevice.c | 1 - drivers/s390/char/vmcp.c | 1 - drivers/s390/char/vmlogrdr.c | 1 - drivers/s390/char/zcore.c | 2 -- drivers/s390/cio/chsc_sch.c | 1 - drivers/s390/cio/css.c | 1 - drivers/s390/crypto/pkey_api.c | 1 - drivers/s390/crypto/zcrypt_api.c | 1 - drivers/sbus/char/openprom.c | 1 - drivers/sbus/char/uctrl.c | 1 - drivers/scsi/sg.c | 1 - drivers/spi/spidev.c | 1 - drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c | 1 - drivers/tty/tty_io.c | 3 --- drivers/usb/gadget/function/f_fs.c | 2 -- drivers/usb/gadget/legacy/inode.c | 2 -- drivers/usb/gadget/legacy/raw_gadget.c | 1 - drivers/usb/gadget/udc/atmel_usba_udc.c | 1 - drivers/usb/misc/ldusb.c | 1 - drivers/usb/mon/mon_bin.c | 1 - drivers/usb/mon/mon_stat.c | 1 - drivers/usb/mon/mon_text.c | 2 -- drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 2 -- drivers/vfio/pci/mlx5/main.c | 2 -- drivers/vfio/pci/pds/lm.c | 2 -- drivers/vfio/pci/qat/main.c | 2 -- drivers/virt/coco/tdx-guest/tdx-guest.c | 1 - drivers/watchdog/acquirewdt.c | 1 - drivers/watchdog/advantechwdt.c | 1 - drivers/watchdog/alim1535_wdt.c | 1 - drivers/watchdog/alim7101_wdt.c | 1 - drivers/watchdog/at91rm9200_wdt.c | 1 - drivers/watchdog/ath79_wdt.c | 1 - drivers/watchdog/cpu5wdt.c | 1 - drivers/watchdog/cpwd.c | 1 - drivers/watchdog/eurotechwdt.c | 1 - drivers/watchdog/gef_wdt.c | 1 - drivers/watchdog/geodewdt.c | 1 - drivers/watchdog/ib700wdt.c | 1 - drivers/watchdog/ibmasr.c | 1 - drivers/watchdog/indydog.c | 1 - drivers/watchdog/it8712f_wdt.c | 1 - drivers/watchdog/m54xx_wdt.c | 1 - drivers/watchdog/machzwd.c | 1 - drivers/watchdog/mixcomwd.c | 1 - drivers/watchdog/mtx-1_wdt.c | 1 - drivers/watchdog/nv_tco.c | 1 - drivers/watchdog/pc87413_wdt.c | 1 - drivers/watchdog/pcwd.c | 2 -- drivers/watchdog/pcwd_pci.c | 2 -- drivers/watchdog/pcwd_usb.c | 2 -- drivers/watchdog/pika_wdt.c | 1 - drivers/watchdog/rc32434_wdt.c | 1 - drivers/watchdog/rdc321x_wdt.c | 1 - drivers/watchdog/riowd.c | 1 - drivers/watchdog/sa1100_wdt.c | 1 - drivers/watchdog/sb_wdog.c | 1 - drivers/watchdog/sbc60xxwdt.c | 1 - drivers/watchdog/sbc7240_wdt.c | 1 - drivers/watchdog/sbc8360.c | 1 - drivers/watchdog/sbc_epx_c3.c | 1 - drivers/watchdog/sbc_fitpc2_wdt.c | 1 - drivers/watchdog/sc1200wdt.c | 1 - drivers/watchdog/sc520_wdt.c | 1 - drivers/watchdog/sch311x_wdt.c | 1 - drivers/watchdog/scx200_wdt.c | 1 - drivers/watchdog/smsc37b787_wdt.c | 1 - drivers/watchdog/w83877f_wdt.c | 1 - drivers/watchdog/w83977f_wdt.c | 1 - drivers/watchdog/wafer5823wdt.c | 1 - drivers/watchdog/wdrtas.c | 2 -- drivers/watchdog/wdt.c | 2 -- drivers/watchdog/wdt285.c | 1 - drivers/watchdog/wdt977.c | 1 - drivers/watchdog/wdt_pci.c | 2 -- drivers/xen/evtchn.c | 1 - drivers/xen/mcelog.c | 1 - drivers/xen/xenbus/xenbus_dev_frontend.c | 1 - fs/bcachefs/chardev.c | 1 - fs/bcachefs/thread_with_file.c | 2 -- fs/debugfs/file.c | 1 - fs/dlm/debug_fs.c | 1 - fs/efivarfs/file.c | 1 - fs/fsopen.c | 1 - fs/fuse/control.c | 4 ---- fs/fuse/dev.c | 1 - fs/nsfs.c | 1 - fs/pipe.c | 1 - fs/ubifs/debug.c | 2 -- include/linux/debugfs.h | 1 - include/linux/fs.h | 1 - kernel/bpf/bpf_iter.c | 1 - kernel/events/core.c | 1 - kernel/power/user.c | 1 - kernel/relay.c | 1 - kernel/time/posix-clock.c | 1 - kernel/trace/rv/rv.c | 3 --- kernel/trace/rv/rv_reactors.c | 1 - kernel/trace/trace.c | 3 --- mm/huge_memory.c | 1 - net/mac80211/rc80211_minstrel_ht_debugfs.c | 2 -- net/rfkill/core.c | 1 - net/socket.c | 1 - net/sunrpc/cache.c | 4 ---- net/sunrpc/rpc_pipe.c | 1 - samples/vfio-mdev/mtty.c | 2 -- scripts/coccinelle/api/stream_open.cocci | 1 - sound/core/control.c | 1 - sound/core/oss/mixer_oss.c | 1 - sound/core/oss/pcm_oss.c | 1 - sound/core/pcm_native.c | 2 -- sound/core/rawmidi.c | 1 - sound/core/seq/seq_clientmgr.c | 1 - sound/core/timer.c | 1 - sound/oss/dmasound/dmasound_core.c | 3 --- sound/soc/intel/avs/debugfs.c | 3 --- virt/kvm/kvm_main.c | 1 - 221 files changed, 270 deletions(-) diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.rst b/Documentation/watchdog/convert_drivers_to_kernel_api.rst index a1c3f038ce0e..e83609a5d007 100644 --- a/Documentation/watchdog/convert_drivers_to_kernel_api.rst +++ b/Documentation/watchdog/convert_drivers_to_kernel_api.rst @@ -75,7 +75,6 @@ Example conversion:: -static const struct file_operations s3c2410wdt_fops = { - .owner = THIS_MODULE, - - .llseek = no_llseek, - .write = s3c2410wdt_write, - .unlocked_ioctl = s3c2410wdt_ioctl, - .open = s3c2410wdt_open, diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index b0f0816879df..5e8e37a722ef 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -466,7 +466,6 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } static const struct file_operations perf_fops = { - .llseek = no_llseek, .read = perf_read, .write = perf_write, .unlocked_ioctl = perf_ioctl, diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index 0e855c5e91c5..5d9effb0867c 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -76,7 +76,6 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static const struct file_operations dbfs_ops = { .read = dbfs_read, - .llseek = no_llseek, .unlocked_ioctl = dbfs_ioctl, }; diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 858beaf4a8cb..d428635abf08 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -443,7 +443,6 @@ static const struct file_operations hypfs_file_ops = { .release = hypfs_release, .read_iter = hypfs_read_iter, .write_iter = hypfs_write_iter, - .llseek = no_llseek, }; static struct file_system_type hypfs_type = { diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index bce50ca75ea7..e62bea9ab21e 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -163,7 +163,6 @@ static const struct file_operations debug_file_ops = { .write = debug_input, .open = debug_open, .release = debug_close, - .llseek = no_llseek, }; static struct dentry *debug_debugfs_root_entry; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 18b0d025f3a2..e2e0aa463fbd 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1698,7 +1698,6 @@ static const struct file_operations cfset_fops = { .release = cfset_release, .unlocked_ioctl = cfset_ioctl, .compat_ioctl = cfset_ioctl, - .llseek = no_llseek }; static struct miscdevice cfset_dev = { diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 2be30a96696a..88055f58fbda 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -498,7 +498,6 @@ static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \ .open = stsi_open_##fc##_##s1##_##s2, \ .release = stsi_release, \ .read = stsi_read, \ - .llseek = no_llseek, \ }; static int stsi_release(struct inode *inode, struct file *file) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index ee90a91ed888..6f55a59a0871 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -657,7 +657,6 @@ static const struct file_operations clp_misc_fops = { .release = clp_misc_release, .unlocked_ioctl = clp_misc_ioctl, .compat_ioctl = clp_misc_ioctl, - .llseek = no_llseek, }; static struct miscdevice clp_misc_device = { diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c index 99a7144b229f..819aabb4ecdc 100644 --- a/arch/um/drivers/harddog_kern.c +++ b/arch/um/drivers/harddog_kern.c @@ -164,7 +164,6 @@ static const struct file_operations harddog_fops = { .compat_ioctl = compat_ptr_ioctl, .open = harddog_open, .release = harddog_release, - .llseek = no_llseek, }; static struct miscdevice harddog_miscdev = { diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index c42b793bce65..9d228878cea2 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -291,7 +291,6 @@ static int hostmixer_release(struct inode *inode, struct file *file) static const struct file_operations hostaudio_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = hostaudio_read, .write = hostaudio_write, .poll = hostaudio_poll, @@ -304,7 +303,6 @@ static const struct file_operations hostaudio_fops = { static const struct file_operations hostmixer_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = hostmixer_ioctl_mixdev, .open = hostmixer_open_mixdev, .release = hostmixer_release, diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c index a3aa0199222e..af44fd5dbd7c 100644 --- a/arch/x86/kernel/cpu/mce/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c @@ -331,7 +331,6 @@ static const struct file_operations mce_chrdev_ops = { .poll = mce_chrdev_poll, .unlocked_ioctl = mce_chrdev_ioctl, .compat_ioctl = compat_ptr_ioctl, - .llseek = no_llseek, }; static struct miscdevice mce_chrdev_device = { diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index e69489d48625..972e6b6b0481 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -1567,7 +1567,6 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) static const struct file_operations pseudo_lock_dev_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = NULL, .write = NULL, .open = pseudo_lock_dev_open, diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index 8bc71cdc2270..246076341e8c 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -199,7 +199,6 @@ static const struct file_operations erst_dbg_ops = { .read = erst_dbg_read, .write = erst_dbg_write, .unlocked_ioctl = erst_dbg_ioctl, - .llseek = no_llseek, }; static struct miscdevice erst_dbg_dev = { diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index bb9463814454..19b619376d48 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -526,7 +526,6 @@ static const struct file_operations charlcd_fops = { .write = charlcd_write, .open = charlcd_open, .release = charlcd_release, - .llseek = no_llseek, }; static struct miscdevice charlcd_dev = { diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 11901f2812ad..223faa9d5ffd 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2259,14 +2259,12 @@ static const struct file_operations mtip_regs_fops = { .owner = THIS_MODULE, .open = simple_open, .read = mtip_hw_read_registers, - .llseek = no_llseek, }; static const struct file_operations mtip_flags_fops = { .owner = THIS_MODULE, .open = simple_open, .read = mtip_hw_read_flags, - .llseek = no_llseek, }; static void mtip_hw_debugfs_init(struct driver_data *dd) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 3edb37a41312..499c110465e3 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2835,7 +2835,6 @@ static const struct file_operations pkt_ctl_fops = { .compat_ioctl = pkt_ctl_compat_ioctl, #endif .owner = THIS_MODULE, - .llseek = no_llseek, }; static struct miscdevice pkt_misc = { diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index bca06bfb4bc3..a6c8e5cc6051 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1983,7 +1983,6 @@ static const struct file_operations ublk_ch_fops = { .owner = THIS_MODULE, .open = ublk_ch_open, .release = ublk_ch_release, - .llseek = no_llseek, .read_iter = ublk_ch_read_iter, .write_iter = ublk_ch_write_iter, .uring_cmd = ublk_ch_uring_cmd, diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 43e9ac5a3324..aa6af351d02d 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -679,7 +679,6 @@ static const struct file_operations vhci_fops = { .poll = vhci_poll, .open = vhci_open, .release = vhci_release, - .llseek = no_llseek, }; static struct miscdevice vhci_miscdev = { diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index 8412406c4f1d..6276551d7968 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -484,7 +484,6 @@ static const struct file_operations input_fops = { .owner = THIS_MODULE, .open = moxtet_debug_open, .read = input_read, - .llseek = no_llseek, }; static ssize_t output_read(struct file *file, char __user *buf, size_t len, @@ -549,7 +548,6 @@ static const struct file_operations output_fops = { .open = moxtet_debug_open, .read = output_read, .write = output_write, - .llseek = no_llseek, }; static int moxtet_register_debugfs(struct moxtet *moxtet) diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c index 69314532f38c..9fed9706d9cd 100644 --- a/drivers/char/applicom.c +++ b/drivers/char/applicom.c @@ -111,7 +111,6 @@ static irqreturn_t ac_interrupt(int, void *); static const struct file_operations ac_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = ac_read, .write = ac_write, .unlocked_ioctl = ac_ioctl, diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c index a4f4291b4492..44a1cdbd4bfb 100644 --- a/drivers/char/ds1620.c +++ b/drivers/char/ds1620.c @@ -353,7 +353,6 @@ static const struct file_operations ds1620_fops = { .open = ds1620_open, .read = ds1620_read, .unlocked_ioctl = ds1620_unlocked_ioctl, - .llseek = no_llseek, }; static struct miscdevice ds1620_miscdev = { diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c index 5a1a73310e97..27f5f9d19531 100644 --- a/drivers/char/dtlk.c +++ b/drivers/char/dtlk.c @@ -107,7 +107,6 @@ static const struct file_operations dtlk_fops = .unlocked_ioctl = dtlk_ioctl, .open = dtlk_open, .release = dtlk_release, - .llseek = no_llseek, }; /* local prototypes */ diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 3dadc4accee3..e904e476e49a 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -700,7 +700,6 @@ hpet_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static const struct file_operations hpet_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = hpet_read, .poll = hpet_poll, .unlocked_ioctl = hpet_ioctl, diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 9a459257489f..335eea80054e 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -903,7 +903,6 @@ static const struct file_operations ipmi_wdog_fops = { .open = ipmi_open, .release = ipmi_close, .fasync = ipmi_fasync, - .llseek = no_llseek, }; static struct miscdevice ipmi_wdog_miscdev = { diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c index c39a836ebd15..5f4696813cea 100644 --- a/drivers/char/pc8736x_gpio.c +++ b/drivers/char/pc8736x_gpio.c @@ -235,7 +235,6 @@ static const struct file_operations pc8736x_gpio_fileops = { .open = pc8736x_gpio_open, .write = nsc_gpio_write, .read = nsc_gpio_read, - .llseek = no_llseek, }; static void __init pc8736x_init_shadow(void) diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index eaff98dbaa8c..d1dfbd8d4d42 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -786,7 +786,6 @@ static const struct class ppdev_class = { static const struct file_operations pp_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = pp_read, .write = pp_write, .poll = pp_poll, diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c index 9f701dcba95c..700e6affea6f 100644 --- a/drivers/char/scx200_gpio.c +++ b/drivers/char/scx200_gpio.c @@ -68,7 +68,6 @@ static const struct file_operations scx200_gpio_fileops = { .read = nsc_gpio_read, .open = scx200_gpio_open, .release = scx200_gpio_release, - .llseek = no_llseek, }; static struct cdev scx200_gpio_cdev; /* use 1 cdev for all pins */ diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index bb5115b1736a..0f8185e541ed 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -1054,7 +1054,6 @@ static const struct file_operations sonypi_misc_fops = { .release = sonypi_misc_release, .fasync = sonypi_misc_fasync, .unlocked_ioctl = sonypi_misc_ioctl, - .llseek = no_llseek, }; static struct miscdevice sonypi_misc_device = { diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c index e2c0baa69fef..97c94b5e9340 100644 --- a/drivers/char/tpm/tpm-dev.c +++ b/drivers/char/tpm/tpm-dev.c @@ -59,7 +59,6 @@ static int tpm_release(struct inode *inode, struct file *file) const struct file_operations tpm_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .open = tpm_open, .read = tpm_common_read, .write = tpm_common_write, diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c index 11c502039faf..8fe4a01eea12 100644 --- a/drivers/char/tpm/tpm_vtpm_proxy.c +++ b/drivers/char/tpm/tpm_vtpm_proxy.c @@ -243,7 +243,6 @@ static int vtpm_proxy_fops_release(struct inode *inode, struct file *filp) static const struct file_operations vtpm_proxy_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = vtpm_proxy_fops_read, .write = vtpm_proxy_fops_write, .poll = vtpm_proxy_fops_poll, diff --git a/drivers/char/tpm/tpmrm-dev.c b/drivers/char/tpm/tpmrm-dev.c index eef0fb06ea83..c25df7ea064e 100644 --- a/drivers/char/tpm/tpmrm-dev.c +++ b/drivers/char/tpm/tpmrm-dev.c @@ -46,7 +46,6 @@ static int tpmrm_release(struct inode *inode, struct file *file) const struct file_operations tpmrm_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .open = tpmrm_open, .read = tpm_common_read, .write = tpm_common_write, diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index de7d720d99fa..99a7f2441e70 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1093,7 +1093,6 @@ static const struct file_operations port_fops = { .poll = port_fops_poll, .release = port_fops_release, .fasync = port_fops_fasync, - .llseek = no_llseek, }; /* diff --git a/drivers/counter/counter-chrdev.c b/drivers/counter/counter-chrdev.c index afc94d0062b1..3ee75e1a78cd 100644 --- a/drivers/counter/counter-chrdev.c +++ b/drivers/counter/counter-chrdev.c @@ -454,7 +454,6 @@ static int counter_chrdev_release(struct inode *inode, struct file *filp) static const struct file_operations counter_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = counter_chrdev_read, .poll = counter_chrdev_poll, .unlocked_ioctl = counter_chrdev_ioctl, diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 518eaa073b2b..b360dca2c69e 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1911,7 +1911,6 @@ static __poll_t fw_device_op_poll(struct file *file, poll_table * pt) const struct file_operations fw_device_ops = { .owner = THIS_MODULE, - .llseek = no_llseek, .open = fw_device_op_open, .read = fw_device_op_read, .unlocked_ioctl = fw_device_op_ioctl, diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 69c15135371c..88c5c4ff4bb6 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2886,7 +2886,6 @@ static ssize_t reset_all_on_write(struct file *filp, const char __user *buf, static const struct file_operations fops_reset_counts = { .owner = THIS_MODULE, .open = simple_open, - .llseek = no_llseek, .write = reset_all_on_write, }; diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c index 130d13e9cd6b..9e89a6a763da 100644 --- a/drivers/firmware/arm_scmi/raw_mode.c +++ b/drivers/firmware/arm_scmi/raw_mode.c @@ -950,7 +950,6 @@ static const struct file_operations scmi_dbg_raw_mode_reset_fops = { .open = scmi_dbg_raw_mode_open, .release = scmi_dbg_raw_mode_release, .write = scmi_dbg_raw_mode_reset_write, - .llseek = no_llseek, .owner = THIS_MODULE, }; @@ -960,7 +959,6 @@ static const struct file_operations scmi_dbg_raw_mode_message_fops = { .read = scmi_dbg_raw_mode_message_read, .write = scmi_dbg_raw_mode_message_write, .poll = scmi_dbg_raw_mode_message_poll, - .llseek = no_llseek, .owner = THIS_MODULE, }; @@ -977,7 +975,6 @@ static const struct file_operations scmi_dbg_raw_mode_message_async_fops = { .read = scmi_dbg_raw_mode_message_read, .write = scmi_dbg_raw_mode_message_async_write, .poll = scmi_dbg_raw_mode_message_poll, - .llseek = no_llseek, .owner = THIS_MODULE, }; @@ -1001,7 +998,6 @@ static const struct file_operations scmi_dbg_raw_mode_notification_fops = { .release = scmi_dbg_raw_mode_release, .read = scmi_test_dbg_raw_mode_notif_read, .poll = scmi_test_dbg_raw_mode_notif_poll, - .llseek = no_llseek, .owner = THIS_MODULE, }; @@ -1025,7 +1021,6 @@ static const struct file_operations scmi_dbg_raw_mode_errors_fops = { .release = scmi_dbg_raw_mode_release, .read = scmi_test_dbg_raw_mode_errors_read, .poll = scmi_test_dbg_raw_mode_errors_poll, - .llseek = no_llseek, .owner = THIS_MODULE, }; diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index 97bafb5f7038..0c17bdd388e1 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -309,7 +309,6 @@ static const struct file_operations efi_capsule_fops = { .open = efi_capsule_open, .write = efi_capsule_write, .release = efi_capsule_release, - .llseek = no_llseek, }; static struct miscdevice efi_capsule_misc = { diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c index 47d67bb0a516..9e2628728aad 100644 --- a/drivers/firmware/efi/test/efi_test.c +++ b/drivers/firmware/efi/test/efi_test.c @@ -750,7 +750,6 @@ static const struct file_operations efi_test_fops = { .unlocked_ioctl = efi_test_ioctl, .open = efi_test_open, .release = efi_test_close, - .llseek = no_llseek, }; static struct miscdevice efi_test_dev = { diff --git a/drivers/firmware/turris-mox-rwtm.c b/drivers/firmware/turris-mox-rwtm.c index 525ebdc7ded5..f3bc0d427825 100644 --- a/drivers/firmware/turris-mox-rwtm.c +++ b/drivers/firmware/turris-mox-rwtm.c @@ -386,7 +386,6 @@ static const struct file_operations do_sign_fops = { .open = rwtm_debug_open, .read = do_sign_read, .write = do_sign_write, - .llseek = no_llseek, }; static void rwtm_debugfs_release(void *root) diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c index 48f2ee0f78c4..883ef86ad3fc 100644 --- a/drivers/gnss/core.c +++ b/drivers/gnss/core.c @@ -206,7 +206,6 @@ static const struct file_operations gnss_fops = { .read = gnss_read, .write = gnss_write, .poll = gnss_poll, - .llseek = no_llseek, }; static struct class *gnss_class; diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index 455eecf6380e..d39c6618bade 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -347,7 +347,6 @@ static const struct file_operations gpio_mockup_debugfs_ops = { .open = gpio_mockup_debugfs_open, .read = gpio_mockup_debugfs_read, .write = gpio_mockup_debugfs_write, - .llseek = no_llseek, .release = single_release, }; diff --git a/drivers/gpio/gpio-sloppy-logic-analyzer.c b/drivers/gpio/gpio-sloppy-logic-analyzer.c index aed6d1f6cfc3..07e0d7180579 100644 --- a/drivers/gpio/gpio-sloppy-logic-analyzer.c +++ b/drivers/gpio/gpio-sloppy-logic-analyzer.c @@ -217,7 +217,6 @@ static const struct file_operations fops_trigger = { .owner = THIS_MODULE, .open = trigger_open, .write = trigger_write, - .llseek = no_llseek, .release = single_release, }; diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 5aac59de0d76..78c9d9ed3d68 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -2842,7 +2842,6 @@ static const struct file_operations gpio_fileops = { .poll = lineinfo_watch_poll, .read = lineinfo_watch_read, .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = gpio_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = gpio_ioctl_compat, diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 07e493d14d0c..ad1dc638c83b 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -103,7 +103,6 @@ bool drm_dev_needs_global_mutex(struct drm_device *dev) * .compat_ioctl = drm_compat_ioctl, // NULL if CONFIG_COMPAT=n * .poll = drm_poll, * .read = drm_read, - * .llseek = no_llseek, * .mmap = drm_gem_mmap, * }; * diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 025a79fe5920..2406cda75b7b 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3751,7 +3751,6 @@ static int i915_perf_release(struct inode *inode, struct file *file) static const struct file_operations fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .release = i915_perf_release, .poll = i915_perf_poll, .read = i915_perf_read, diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c index 3d3da79fec2a..d3c7889aaf26 100644 --- a/drivers/gpu/drm/msm/msm_perf.c +++ b/drivers/gpu/drm/msm/msm_perf.c @@ -192,7 +192,6 @@ static const struct file_operations perf_debugfs_fops = { .owner = THIS_MODULE, .open = perf_open, .read = perf_read, - .llseek = no_llseek, .release = perf_release, }; diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index ca44fd291c5b..39138e190cb9 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -227,7 +227,6 @@ static const struct file_operations rd_debugfs_fops = { .owner = THIS_MODULE, .open = rd_open, .read = rd_read, - .llseek = no_llseek, .release = rd_release, }; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 0369cc016f6a..eae38a49ee8e 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1263,7 +1263,6 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) static const struct file_operations xe_oa_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .release = xe_oa_release, .poll = xe_oa_poll, .read = xe_oa_read, diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c index a54c7995b9be..21a70420151e 100644 --- a/drivers/hid/uhid.c +++ b/drivers/hid/uhid.c @@ -803,7 +803,6 @@ static const struct file_operations uhid_fops = { .read = uhid_char_read, .write = uhid_char_write, .poll = uhid_char_poll, - .llseek = no_llseek, }; static struct miscdevice uhid_misc = { diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c index 3751c1e3eddd..1dc7e24fe4c5 100644 --- a/drivers/hwmon/asus_atk0110.c +++ b/drivers/hwmon/asus_atk0110.c @@ -783,7 +783,6 @@ static const struct file_operations atk_debugfs_ggrp_fops = { .read = atk_debugfs_ggrp_read, .open = atk_debugfs_ggrp_open, .release = atk_debugfs_ggrp_release, - .llseek = no_llseek, }; static void atk_debugfs_init(struct atk_data *data) diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c index 1811f84d835e..a303959879ef 100644 --- a/drivers/hwmon/fschmd.c +++ b/drivers/hwmon/fschmd.c @@ -948,7 +948,6 @@ static long watchdog_ioctl(struct file *filp, unsigned int cmd, static const struct file_operations watchdog_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .open = watchdog_open, .release = watchdog_release, .write = watchdog_write, diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c index 0acf6bd0227f..67728f60333f 100644 --- a/drivers/hwmon/w83793.c +++ b/drivers/hwmon/w83793.c @@ -1451,7 +1451,6 @@ static long watchdog_ioctl(struct file *filp, unsigned int cmd, static const struct file_operations watchdog_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .open = watchdog_open, .release = watchdog_close, .write = watchdog_write, diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 7edd3f1d0d46..aea9ac9c4bd0 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -652,7 +652,6 @@ static const struct file_operations etb_fops = { .open = etb_open, .read = etb_read, .release = etb_release, - .llseek = no_llseek, }; static struct attribute *coresight_etb_mgmt_attrs[] = { diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index b54562f392f3..3a482fd2cb22 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -220,7 +220,6 @@ static const struct file_operations tmc_fops = { .open = tmc_open, .read = tmc_read, .release = tmc_release, - .llseek = no_llseek, }; static enum tmc_mem_intf_width tmc_get_memwidth(u32 devid) diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c index f9ebf20c91e6..ef7f560f0ffa 100644 --- a/drivers/hwtracing/coresight/ultrasoc-smb.c +++ b/drivers/hwtracing/coresight/ultrasoc-smb.c @@ -163,7 +163,6 @@ static const struct file_operations smb_fops = { .open = smb_open, .read = smb_read, .release = smb_release, - .llseek = no_llseek, }; static ssize_t buf_size_show(struct device *dev, struct device_attribute *attr, diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index be63d5b8f193..66123d684ac9 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -1677,7 +1677,6 @@ static const struct file_operations intel_th_msc_fops = { .release = intel_th_msc_release, .read = intel_th_msc_read, .mmap = intel_th_msc_mmap, - .llseek = no_llseek, .owner = THIS_MODULE, }; diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index ccf39a80dc4f..cdba4e875b28 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -839,7 +839,6 @@ static const struct file_operations stm_fops = { .mmap = stm_char_mmap, .unlocked_ioctl = stm_char_ioctl, .compat_ioctl = compat_ptr_ioctl, - .llseek = no_llseek, }; static void stm_device_release(struct device *dev) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index f4fb212b7f39..61f7c4003d2f 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -637,7 +637,6 @@ static int i2cdev_release(struct inode *inode, struct file *file) static const struct file_operations i2cdev_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = i2cdev_read, .write = i2cdev_write, .unlocked_ioctl = i2cdev_ioctl, diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index dc57d07a1f45..5dbb248e9625 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1817,7 +1817,6 @@ static const struct file_operations ucma_fops = { .release = ucma_close, .write = ucma_write, .poll = ucma_poll, - .llseek = no_llseek, }; static struct miscdevice ucma_misc = { diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index f760dfffa188..fd67fc9fe85a 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1082,7 +1082,6 @@ static const struct file_operations umad_fops = { #endif .open = ib_umad_open, .release = ib_umad_close, - .llseek = no_llseek, }; static int ib_umad_sm_open(struct inode *inode, struct file *filp) @@ -1150,7 +1149,6 @@ static const struct file_operations umad_sm_fops = { .owner = THIS_MODULE, .open = ib_umad_sm_open, .release = ib_umad_sm_close, - .llseek = no_llseek, }; static struct ib_umad_port *get_port(struct ib_device *ibdev, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index bc099287de9a..94454186ed81 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -353,7 +353,6 @@ const struct file_operations uverbs_event_fops = { .poll = ib_uverbs_comp_event_poll, .release = uverbs_uobject_fd_release, .fasync = ib_uverbs_comp_event_fasync, - .llseek = no_llseek, }; const struct file_operations uverbs_async_event_fops = { @@ -362,7 +361,6 @@ const struct file_operations uverbs_async_event_fops = { .poll = ib_uverbs_async_event_poll, .release = uverbs_async_event_release, .fasync = ib_uverbs_async_event_fasync, - .llseek = no_llseek, }; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) @@ -991,7 +989,6 @@ static const struct file_operations uverbs_fops = { .write = ib_uverbs_write, .open = ib_uverbs_open, .release = ib_uverbs_close, - .llseek = no_llseek, .unlocked_ioctl = ib_uverbs_ioctl, .compat_ioctl = compat_ptr_ioctl, }; @@ -1002,7 +999,6 @@ static const struct file_operations uverbs_mmap_fops = { .mmap = ib_uverbs_mmap, .open = ib_uverbs_open, .release = ib_uverbs_close, - .llseek = no_llseek, .unlocked_ioctl = ib_uverbs_ioctl, .compat_ioctl = compat_ptr_ioctl, }; diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index 35d2382ee618..ec9ee59fcf0c 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -203,7 +203,6 @@ static const struct file_operations __fault_opcodes_fops = { .open = fault_opcodes_open, .read = fault_opcodes_read, .write = fault_opcodes_write, - .llseek = no_llseek }; void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 253fea374a72..69999d8d24f3 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2673,7 +2673,6 @@ static const struct file_operations devx_async_cmd_event_fops = { .read = devx_async_cmd_event_read, .poll = devx_async_cmd_event_poll, .release = uverbs_uobject_fd_release, - .llseek = no_llseek, }; static ssize_t devx_async_event_read(struct file *filp, char __user *buf, @@ -2788,7 +2787,6 @@ static const struct file_operations devx_async_event_fops = { .read = devx_async_event_read, .poll = devx_async_event_poll, .release = uverbs_uobject_fd_release, - .llseek = no_llseek, }; static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index eb4906552ac8..b5cbb57ee5f6 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -1299,7 +1299,6 @@ static const struct file_operations evdev_fops = { .compat_ioctl = evdev_ioctl_compat, #endif .fasync = evdev_fasync, - .llseek = no_llseek, }; /* diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index 5824bca02e5a..ba2b17288bcd 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -718,7 +718,6 @@ static const struct file_operations joydev_fops = { .compat_ioctl = joydev_compat_ioctl, #endif .fasync = joydev_fasync, - .llseek = no_llseek, }; /* diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c index cf25177b4830..707c5a8ae736 100644 --- a/drivers/input/keyboard/applespi.c +++ b/drivers/input/keyboard/applespi.c @@ -1007,7 +1007,6 @@ static const struct file_operations applespi_tp_dim_fops = { .owner = THIS_MODULE, .open = applespi_tp_dim_open, .read = applespi_tp_dim_read, - .llseek = no_llseek, }; static void report_finger_data(struct input_dev *input, int slot, diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 445856c9127a..2c51ea9d01d7 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -1132,7 +1132,6 @@ static const struct file_operations uinput_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = uinput_compat_ioctl, #endif - .llseek = no_llseek, }; static struct miscdevice uinput_misc = { diff --git a/drivers/input/serio/userio.c b/drivers/input/serio/userio.c index a88e2eee55c3..1ab12b247f98 100644 --- a/drivers/input/serio/userio.c +++ b/drivers/input/serio/userio.c @@ -267,7 +267,6 @@ static const struct file_operations userio_fops = { .read = userio_char_read, .write = userio_char_write, .poll = userio_char_poll, - .llseek = no_llseek, }; static struct miscdevice userio_misc = { diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index 8c8226f0dffd..e590973ce5cf 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -360,7 +360,6 @@ static const struct file_operations iommufd_fault_fops = { .write = iommufd_fault_fops_write, .poll = iommufd_fault_fops_poll, .release = iommufd_fault_fops_release, - .llseek = no_llseek, }; int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index 3ed257334562..70dee9ad4bae 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -1024,7 +1024,6 @@ static int capi_release(struct inode *inode, struct file *file) static const struct file_operations capi_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = capi_read, .write = capi_write, .poll = capi_poll, diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c index 83d6b484d3c6..7cfa8c61dba0 100644 --- a/drivers/isdn/mISDN/timerdev.c +++ b/drivers/isdn/mISDN/timerdev.c @@ -266,7 +266,6 @@ static const struct file_operations mISDN_fops = { .unlocked_ioctl = mISDN_ioctl, .open = mISDN_open, .release = mISDN_close, - .llseek = no_llseek, }; static struct miscdevice mISDNtimer = { diff --git a/drivers/leds/uleds.c b/drivers/leds/uleds.c index 3d361c920030..374a841f18c3 100644 --- a/drivers/leds/uleds.c +++ b/drivers/leds/uleds.c @@ -200,7 +200,6 @@ static const struct file_operations uleds_fops = { .read = uleds_read, .write = uleds_write, .poll = uleds_poll, - .llseek = no_llseek, }; static struct miscdevice uleds_misc = { diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index b0407c5fadb2..88adee42ba82 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -842,7 +842,6 @@ static ssize_t adb_write(struct file *file, const char __user *buf, static const struct file_operations adb_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = adb_read, .write = adb_write, .open = adb_open, diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index b2b78a53e532..a01bc5090cdf 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -1314,7 +1314,6 @@ static int smu_release(struct inode *inode, struct file *file) static const struct file_operations smu_device_fops = { - .llseek = no_llseek, .read = smu_read, .write = smu_write, .poll = smu_fpoll, diff --git a/drivers/media/cec/core/cec-api.c b/drivers/media/cec/core/cec-api.c index c75a4057f00e..c50299246fc4 100644 --- a/drivers/media/cec/core/cec-api.c +++ b/drivers/media/cec/core/cec-api.c @@ -698,5 +698,4 @@ const struct file_operations cec_devnode_fops = { .compat_ioctl = cec_ioctl, .release = cec_release, .poll = cec_poll, - .llseek = no_llseek, }; diff --git a/drivers/media/mc/mc-devnode.c b/drivers/media/mc/mc-devnode.c index 318e267e798e..56444edaf136 100644 --- a/drivers/media/mc/mc-devnode.c +++ b/drivers/media/mc/mc-devnode.c @@ -204,7 +204,6 @@ static const struct file_operations media_devnode_fops = { #endif /* CONFIG_COMPAT */ .release = media_release, .poll = media_poll, - .llseek = no_llseek, }; int __must_check media_devnode_register(struct media_device *mdev, diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index b8dfd530fab7..f042f3f14afa 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -706,7 +706,6 @@ static const struct file_operations lirc_fops = { .poll = lirc_poll, .open = lirc_open, .release = lirc_close, - .llseek = no_llseek, }; static void lirc_release_device(struct device *ld) diff --git a/drivers/media/usb/uvc/uvc_debugfs.c b/drivers/media/usb/uvc/uvc_debugfs.c index 1a1258d4ffca..14fa41cb8148 100644 --- a/drivers/media/usb/uvc/uvc_debugfs.c +++ b/drivers/media/usb/uvc/uvc_debugfs.c @@ -59,7 +59,6 @@ static int uvc_debugfs_stats_release(struct inode *inode, struct file *file) static const struct file_operations uvc_debugfs_stats_fops = { .owner = THIS_MODULE, .open = uvc_debugfs_stats_open, - .llseek = no_llseek, .read = uvc_debugfs_stats_read, .release = uvc_debugfs_stats_release, }; diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c index 570ba00e00b3..3d7711cc42bc 100644 --- a/drivers/media/v4l2-core/v4l2-dev.c +++ b/drivers/media/v4l2-core/v4l2-dev.c @@ -483,7 +483,6 @@ static const struct file_operations v4l2_fops = { #endif .release = v4l2_release, .poll = v4l2_poll, - .llseek = no_llseek, }; /** diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c index 9f3999750c23..4766d8518dc9 100644 --- a/drivers/message/fusion/mptctl.c +++ b/drivers/message/fusion/mptctl.c @@ -2691,7 +2691,6 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg) static const struct file_operations mptctl_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .fasync = mptctl_fasync, .unlocked_ioctl = mptctl_ioctl, #ifdef CONFIG_COMPAT diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index 1fc635a27568..4233dc4cc7d6 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -669,7 +669,6 @@ static int lis3lv02d_misc_fasync(int fd, struct file *file, int on) static const struct file_operations lis3lv02d_misc_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = lis3lv02d_misc_read, .open = lis3lv02d_misc_open, .release = lis3lv02d_misc_release, diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 40c3fe26f76d..1f5aaf16e300 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -1176,7 +1176,6 @@ static const struct file_operations mei_fops = { .poll = mei_poll, .fsync = mei_fsync, .fasync = mei_fasync, - .llseek = no_llseek }; /** diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 3c2f743c58b0..4954553b7baa 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -126,7 +126,6 @@ static const struct file_operations ntsync_obj_fops = { .release = ntsync_obj_release, .unlocked_ioctl = ntsync_obj_ioctl, .compat_ioctl = compat_ptr_ioctl, - .llseek = no_llseek, }; static struct ntsync_obj *ntsync_alloc_obj(struct ntsync_device *dev, @@ -233,7 +232,6 @@ static const struct file_operations ntsync_fops = { .release = ntsync_char_release, .unlocked_ioctl = ntsync_char_ioctl, .compat_ioctl = compat_ptr_ioctl, - .llseek = no_llseek, }; static struct miscdevice ntsync_misc = { diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c index 30bd7c39c261..701db2c5859b 100644 --- a/drivers/misc/phantom.c +++ b/drivers/misc/phantom.c @@ -279,7 +279,6 @@ static const struct file_operations phantom_file_ops = { .unlocked_ioctl = phantom_ioctl, .compat_ioctl = phantom_compat_ioctl, .poll = phantom_poll, - .llseek = no_llseek, }; static irqreturn_t phantom_isr(int irq, void *data) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index f58bea534004..ef06a4d5d65b 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2734,7 +2734,6 @@ static const struct file_operations mmc_rpmb_fileops = { .release = mmc_rpmb_chrdev_release, .open = mmc_rpmb_chrdev_open, .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = mmc_rpmb_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = mmc_rpmb_ioctl_compat, diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 0d8f04cf03c5..6bb80d7714bc 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -1095,7 +1095,6 @@ const struct file_operations ubi_vol_cdev_operations = { /* UBI character device operations */ const struct file_operations ubi_cdev_operations = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = ubi_cdev_ioctl, .compat_ioctl = compat_ptr_ioctl, }; @@ -1105,5 +1104,4 @@ const struct file_operations ubi_ctrl_cdev_operations = { .owner = THIS_MODULE, .unlocked_ioctl = ctrl_cdev_ioctl, .compat_ioctl = compat_ptr_ioctl, - .llseek = no_llseek, }; diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index 9ec3b8b6a0aa..d2a53961d8e2 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -470,7 +470,6 @@ static const struct file_operations dfs_fops = { .read = dfs_file_read, .write = dfs_file_write, .open = simple_open, - .llseek = no_llseek, .owner = THIS_MODULE, }; diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index a1f91ff8ec56..41e80f78b316 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -1414,7 +1414,6 @@ static ssize_t nsim_nexthop_bucket_activity_write(struct file *file, static const struct file_operations nsim_nexthop_bucket_activity_fops = { .open = simple_open, .write = nsim_nexthop_bucket_activity_write, - .llseek = no_llseek, .owner = THIS_MODULE, }; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 77574f7a3bd4..5aa41d5f7765 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1162,7 +1162,6 @@ static const struct file_operations tap_fops = { .read_iter = tap_read_iter, .write_iter = tap_write_iter, .poll = tap_poll, - .llseek = no_llseek, .unlocked_ioctl = tap_ioctl, .compat_ioctl = compat_ptr_ioctl, }; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 5f77faef0ff1..9a0f6eb32016 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3543,7 +3543,6 @@ static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file) static const struct file_operations tun_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read_iter = tun_chr_read_iter, .write_iter = tun_chr_write_iter, .poll = tun_chr_poll, diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index df53dd1d7e74..da72fd2d541f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -1184,7 +1184,6 @@ static ssize_t bus_reset_write(struct file *file, const char __user *user_buf, static const struct file_operations bus_reset_fops = { .open = simple_open, - .llseek = no_llseek, .write = bus_reset_write, }; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 99a541d442bb..49a6aff42376 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -3768,7 +3768,6 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) } const struct file_operations iwl_dbgfs_d3_test_ops = { - .llseek = no_llseek, .open = iwl_mvm_d3_test_open, .read = iwl_mvm_d3_test_read, .release = iwl_mvm_d3_test_release, diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c index 4525ad1b59f4..839154c46e46 100644 --- a/drivers/platform/chrome/cros_ec_debugfs.c +++ b/drivers/platform/chrome/cros_ec_debugfs.c @@ -302,7 +302,6 @@ static const struct file_operations cros_ec_console_log_fops = { .owner = THIS_MODULE, .open = cros_ec_console_log_open, .read = cros_ec_console_log_read, - .llseek = no_llseek, .poll = cros_ec_console_log_poll, .release = cros_ec_console_log_release, }; diff --git a/drivers/platform/chrome/wilco_ec/debugfs.c b/drivers/platform/chrome/wilco_ec/debugfs.c index 983f2fa44ba5..99486086af6a 100644 --- a/drivers/platform/chrome/wilco_ec/debugfs.c +++ b/drivers/platform/chrome/wilco_ec/debugfs.c @@ -156,7 +156,6 @@ static const struct file_operations fops_raw = { .owner = THIS_MODULE, .read = raw_read, .write = raw_write, - .llseek = no_llseek, }; #define CMD_KB_CHROME 0x88 diff --git a/drivers/platform/chrome/wilco_ec/event.c b/drivers/platform/chrome/wilco_ec/event.c index bd1fb53ba028..196e46a1d489 100644 --- a/drivers/platform/chrome/wilco_ec/event.c +++ b/drivers/platform/chrome/wilco_ec/event.c @@ -403,7 +403,6 @@ static const struct file_operations event_fops = { .poll = event_poll, .read = event_read, .release = event_release, - .llseek = no_llseek, .owner = THIS_MODULE, }; diff --git a/drivers/platform/chrome/wilco_ec/telemetry.c b/drivers/platform/chrome/wilco_ec/telemetry.c index 21d4cbbb009a..a87877e4300a 100644 --- a/drivers/platform/chrome/wilco_ec/telemetry.c +++ b/drivers/platform/chrome/wilco_ec/telemetry.c @@ -330,7 +330,6 @@ static const struct file_operations telem_fops = { .write = telem_write, .read = telem_read, .release = telem_release, - .llseek = no_llseek, .owner = THIS_MODULE, }; diff --git a/drivers/platform/surface/surface_aggregator_cdev.c b/drivers/platform/surface/surface_aggregator_cdev.c index 07e065b9159f..165b1416230d 100644 --- a/drivers/platform/surface/surface_aggregator_cdev.c +++ b/drivers/platform/surface/surface_aggregator_cdev.c @@ -670,7 +670,6 @@ static const struct file_operations ssam_controller_fops = { .fasync = ssam_cdev_fasync, .unlocked_ioctl = ssam_cdev_device_ioctl, .compat_ioctl = ssam_cdev_device_ioctl, - .llseek = no_llseek, }; diff --git a/drivers/platform/surface/surface_dtx.c b/drivers/platform/surface/surface_dtx.c index 2de843b7ea70..89ca6b50e812 100644 --- a/drivers/platform/surface/surface_dtx.c +++ b/drivers/platform/surface/surface_dtx.c @@ -555,7 +555,6 @@ static const struct file_operations surface_dtx_fops = { .fasync = surface_dtx_fasync, .unlocked_ioctl = surface_dtx_ioctl, .compat_ioctl = surface_dtx_ioctl, - .llseek = no_llseek, }; diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c index 5d19baae6a38..25d47907db17 100644 --- a/drivers/pps/pps.c +++ b/drivers/pps/pps.c @@ -319,7 +319,6 @@ static int pps_cdev_release(struct inode *inode, struct file *file) static const struct file_operations pps_cdev_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .poll = pps_cdev_poll, .fasync = pps_cdev_fasync, .compat_ioctl = pps_cdev_compat_ioctl, diff --git a/drivers/rtc/dev.c b/drivers/rtc/dev.c index 4aad9bb99868..c4a3ab53dcd4 100644 --- a/drivers/rtc/dev.c +++ b/drivers/rtc/dev.c @@ -523,7 +523,6 @@ static int rtc_dev_release(struct inode *inode, struct file *file) static const struct file_operations rtc_dev_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = rtc_dev_read, .poll = rtc_dev_poll, .unlocked_ioctl = rtc_dev_ioctl, diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index 0013bff0447d..1f58ae8b151e 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -850,7 +850,6 @@ static const struct file_operations wdt_fops = { .write = wdt_write, .open = wdt_open, .release = wdt_release, - .llseek = no_llseek, }; static struct miscdevice wdt_dev = { diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 61515781c5dd..cfe7efd5b5da 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -515,7 +515,6 @@ static const struct file_operations fs3270_fops = { .compat_ioctl = fs3270_ioctl, /* ioctl */ .open = fs3270_open, /* open */ .release = fs3270_close, /* release */ - .llseek = no_llseek, }; static void fs3270_create_cb(int minor) diff --git a/drivers/s390/char/sclp_ctl.c b/drivers/s390/char/sclp_ctl.c index 248b5db3eaa8..dd6051602070 100644 --- a/drivers/s390/char/sclp_ctl.c +++ b/drivers/s390/char/sclp_ctl.c @@ -115,7 +115,6 @@ static const struct file_operations sclp_ctl_fops = { .open = nonseekable_open, .unlocked_ioctl = sclp_ctl_ioctl, .compat_ioctl = sclp_ctl_ioctl, - .llseek = no_llseek, }; /* diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c index cc8237afeffa..89778d922d9f 100644 --- a/drivers/s390/char/tape_char.c +++ b/drivers/s390/char/tape_char.c @@ -52,7 +52,6 @@ static const struct file_operations tape_fops = #endif .open = tapechar_open, .release = tapechar_release, - .llseek = no_llseek, }; static int tapechar_major = TAPECHAR_MAJOR; diff --git a/drivers/s390/char/uvdevice.c b/drivers/s390/char/uvdevice.c index 42c9f77f8da0..f598edc5f251 100644 --- a/drivers/s390/char/uvdevice.c +++ b/drivers/s390/char/uvdevice.c @@ -448,7 +448,6 @@ static long uvio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) static const struct file_operations uvio_dev_fops = { .owner = THIS_MODULE, .unlocked_ioctl = uvio_ioctl, - .llseek = no_llseek, }; static struct miscdevice uvio_dev_miscdev = { diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c index eb0520a9d4af..c6d58335beb4 100644 --- a/drivers/s390/char/vmcp.c +++ b/drivers/s390/char/vmcp.c @@ -242,7 +242,6 @@ static const struct file_operations vmcp_fops = { .write = vmcp_write, .unlocked_ioctl = vmcp_ioctl, .compat_ioctl = vmcp_ioctl, - .llseek = no_llseek, }; static struct miscdevice vmcp_dev = { diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index c09e1e09fb66..bd5cecc44123 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -96,7 +96,6 @@ static const struct file_operations vmlogrdr_fops = { .open = vmlogrdr_open, .release = vmlogrdr_release, .read = vmlogrdr_read, - .llseek = no_llseek, }; diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 0969fa01df58..33cebb91b933 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -165,7 +165,6 @@ static const struct file_operations zcore_reipl_fops = { .write = zcore_reipl_write, .open = zcore_reipl_open, .release = zcore_reipl_release, - .llseek = no_llseek, }; static ssize_t zcore_hsa_read(struct file *filp, char __user *buf, @@ -200,7 +199,6 @@ static const struct file_operations zcore_hsa_fops = { .write = zcore_hsa_write, .read = zcore_hsa_read, .open = nonseekable_open, - .llseek = no_llseek, }; static int __init check_sdias(void) diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index e6c800653f98..1e58ee3cc87d 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -924,7 +924,6 @@ static const struct file_operations chsc_fops = { .release = chsc_release, .unlocked_ioctl = chsc_ioctl, .compat_ioctl = chsc_ioctl, - .llseek = no_llseek, }; static struct miscdevice chsc_misc_device = { diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 53b68f8c32f3..7b59d20bf785 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -1332,7 +1332,6 @@ static ssize_t cio_settle_write(struct file *file, const char __user *buf, static const struct proc_ops cio_settle_proc_ops = { .proc_open = nonseekable_open, .proc_write = cio_settle_write, - .proc_lseek = no_llseek, }; static int __init cio_settle_init(void) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index c20251e00cf9..3a39e167bdbf 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -776,7 +776,6 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd, static const struct file_operations pkey_fops = { .owner = THIS_MODULE, .open = nonseekable_open, - .llseek = no_llseek, .unlocked_ioctl = pkey_unlocked_ioctl, }; diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index f9a47b54c51a..5020696f1379 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1908,7 +1908,6 @@ static const struct file_operations zcrypt_fops = { #endif .open = zcrypt_open, .release = zcrypt_release, - .llseek = no_llseek, }; /* diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c index cc178874c4a6..8643947fee8e 100644 --- a/drivers/sbus/char/openprom.c +++ b/drivers/sbus/char/openprom.c @@ -687,7 +687,6 @@ static int openprom_release(struct inode * inode, struct file * file) static const struct file_operations openprom_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = openprom_ioctl, .compat_ioctl = openprom_compat_ioctl, .open = openprom_open, diff --git a/drivers/sbus/char/uctrl.c b/drivers/sbus/char/uctrl.c index 3c88f29f4c47..8bbed7a7afb7 100644 --- a/drivers/sbus/char/uctrl.c +++ b/drivers/sbus/char/uctrl.c @@ -221,7 +221,6 @@ static irqreturn_t uctrl_interrupt(int irq, void *dev_id) static const struct file_operations uctrl_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = uctrl_ioctl, .open = uctrl_open, }; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index baf870a03ecf..f86be197fedd 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1424,7 +1424,6 @@ static const struct file_operations sg_fops = { .mmap = sg_mmap, .release = sg_release, .fasync = sg_fasync, - .llseek = no_llseek, }; static const struct class sg_sysfs_class = { diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 5539c5d139d4..653f82984216 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -685,7 +685,6 @@ static const struct file_operations spidev_fops = { .compat_ioctl = spidev_compat_ioctl, .open = spidev_open, .release = spidev_release, - .llseek = no_llseek, }; /*-------------------------------------------------------------------------*/ diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c index 4b4a4d63e61f..cb149bcdd7d5 100644 --- a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c +++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c @@ -564,7 +564,6 @@ static const struct file_operations acpi_thermal_rel_fops = { .open = acpi_thermal_rel_open, .release = acpi_thermal_rel_release, .unlocked_ioctl = acpi_thermal_rel_ioctl, - .llseek = no_llseek, }; static struct miscdevice acpi_thermal_rel_misc_device = { diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 6bd28a042dff..9771072da177 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -459,7 +459,6 @@ static void tty_show_fdinfo(struct seq_file *m, struct file *file) } static const struct file_operations tty_fops = { - .llseek = no_llseek, .read_iter = tty_read, .write_iter = tty_write, .splice_read = copy_splice_read, @@ -474,7 +473,6 @@ static const struct file_operations tty_fops = { }; static const struct file_operations console_fops = { - .llseek = no_llseek, .read_iter = tty_read, .write_iter = redirected_tty_write, .splice_read = copy_splice_read, @@ -488,7 +486,6 @@ static const struct file_operations console_fops = { }; static const struct file_operations hung_up_tty_fops = { - .llseek = no_llseek, .read_iter = hung_up_tty_read, .write_iter = hung_up_tty_write, .poll = hung_up_tty_poll, diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 05b52e61a66f..c626bb73ea59 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -722,7 +722,6 @@ static __poll_t ffs_ep0_poll(struct file *file, poll_table *wait) } static const struct file_operations ffs_ep0_operations = { - .llseek = no_llseek, .open = ffs_ep0_open, .write = ffs_ep0_write, @@ -1830,7 +1829,6 @@ static long ffs_epfile_ioctl(struct file *file, unsigned code, } static const struct file_operations ffs_epfile_operations = { - .llseek = no_llseek, .open = ffs_epfile_open, .write_iter = ffs_epfile_write_iter, diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 03179b1880fd..9c7381661016 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -705,7 +705,6 @@ static const struct file_operations ep_io_operations = { .open = ep_open, .release = ep_release, - .llseek = no_llseek, .unlocked_ioctl = ep_ioctl, .read_iter = ep_read_iter, .write_iter = ep_write_iter, @@ -1939,7 +1938,6 @@ gadget_dev_open (struct inode *inode, struct file *fd) } static const struct file_operations ep0_operations = { - .llseek = no_llseek, .open = gadget_dev_open, .read = ep0_read, diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index 399fca32a8ac..112fd18d8c99 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -1364,7 +1364,6 @@ static const struct file_operations raw_fops = { .unlocked_ioctl = raw_ioctl, .compat_ioctl = raw_ioctl, .release = raw_release, - .llseek = no_llseek, }; static struct miscdevice raw_misc_device = { diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index b76885d78e8a..4928eba19327 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -187,7 +187,6 @@ static int regs_dbg_release(struct inode *inode, struct file *file) static const struct file_operations queue_dbg_fops = { .owner = THIS_MODULE, .open = queue_dbg_open, - .llseek = no_llseek, .read = queue_dbg_read, .release = queue_dbg_release, }; diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 7cbef74dfc9a..f392d6f84df9 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -627,7 +627,6 @@ static const struct file_operations ld_usb_fops = { .open = ld_usb_open, .release = ld_usb_release, .poll = ld_usb_poll, - .llseek = no_llseek, }; /* diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index 4e30de4db1c0..afb71c18415d 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -1289,7 +1289,6 @@ static int mon_bin_mmap(struct file *filp, struct vm_area_struct *vma) static const struct file_operations mon_fops_binary = { .owner = THIS_MODULE, .open = mon_bin_open, - .llseek = no_llseek, .read = mon_bin_read, /* .write = mon_text_write, */ .poll = mon_bin_poll, diff --git a/drivers/usb/mon/mon_stat.c b/drivers/usb/mon/mon_stat.c index 3c23805ab1a4..398e02af6a2b 100644 --- a/drivers/usb/mon/mon_stat.c +++ b/drivers/usb/mon/mon_stat.c @@ -62,7 +62,6 @@ static int mon_stat_release(struct inode *inode, struct file *file) const struct file_operations mon_fops_stat = { .owner = THIS_MODULE, .open = mon_stat_open, - .llseek = no_llseek, .read = mon_stat_read, /* .write = mon_stat_write, */ /* .poll = mon_stat_poll, */ diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c index 2fe9b95bac1d..68b9b2b41189 100644 --- a/drivers/usb/mon/mon_text.c +++ b/drivers/usb/mon/mon_text.c @@ -685,7 +685,6 @@ static int mon_text_release(struct inode *inode, struct file *file) static const struct file_operations mon_fops_text_t = { .owner = THIS_MODULE, .open = mon_text_open, - .llseek = no_llseek, .read = mon_text_read_t, .release = mon_text_release, }; @@ -693,7 +692,6 @@ static const struct file_operations mon_fops_text_t = { static const struct file_operations mon_fops_text_u = { .owner = THIS_MODULE, .open = mon_text_open, - .llseek = no_llseek, .read = mon_text_read_u, .release = mon_text_release, }; diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 9a3e97108ace..0d632ba5d2a3 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -723,7 +723,6 @@ static const struct file_operations hisi_acc_vf_resume_fops = { .owner = THIS_MODULE, .write = hisi_acc_vf_resume_write, .release = hisi_acc_vf_release_file, - .llseek = no_llseek, }; static struct hisi_acc_vf_migration_file * @@ -845,7 +844,6 @@ static const struct file_operations hisi_acc_vf_save_fops = { .unlocked_ioctl = hisi_acc_vf_precopy_ioctl, .compat_ioctl = compat_ptr_ioctl, .release = hisi_acc_vf_release_file, - .llseek = no_llseek, }; static struct hisi_acc_vf_migration_file * diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 61d9b0f9146d..242c23eef452 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -587,7 +587,6 @@ static const struct file_operations mlx5vf_save_fops = { .unlocked_ioctl = mlx5vf_precopy_ioctl, .compat_ioctl = compat_ptr_ioctl, .release = mlx5vf_release_file, - .llseek = no_llseek, }; static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev) @@ -1000,7 +999,6 @@ static const struct file_operations mlx5vf_resume_fops = { .owner = THIS_MODULE, .write = mlx5vf_resume_write, .release = mlx5vf_release_file, - .llseek = no_llseek, }; static struct mlx5_vf_migration_file * diff --git a/drivers/vfio/pci/pds/lm.c b/drivers/vfio/pci/pds/lm.c index 6b94cc0bf45b..f2673d395236 100644 --- a/drivers/vfio/pci/pds/lm.c +++ b/drivers/vfio/pci/pds/lm.c @@ -235,7 +235,6 @@ static const struct file_operations pds_vfio_save_fops = { .owner = THIS_MODULE, .read = pds_vfio_save_read, .release = pds_vfio_release_file, - .llseek = no_llseek, }; static int pds_vfio_get_save_file(struct pds_vfio_pci_device *pds_vfio) @@ -334,7 +333,6 @@ static const struct file_operations pds_vfio_restore_fops = { .owner = THIS_MODULE, .write = pds_vfio_restore_write, .release = pds_vfio_release_file, - .llseek = no_llseek, }; static int pds_vfio_get_restore_file(struct pds_vfio_pci_device *pds_vfio) diff --git a/drivers/vfio/pci/qat/main.c b/drivers/vfio/pci/qat/main.c index e36740a282e7..be3644ced17b 100644 --- a/drivers/vfio/pci/qat/main.c +++ b/drivers/vfio/pci/qat/main.c @@ -220,7 +220,6 @@ static const struct file_operations qat_vf_save_fops = { .unlocked_ioctl = qat_vf_precopy_ioctl, .compat_ioctl = compat_ptr_ioctl, .release = qat_vf_release_file, - .llseek = no_llseek, }; static int qat_vf_save_state(struct qat_vf_core_device *qat_vdev, @@ -345,7 +344,6 @@ static const struct file_operations qat_vf_resume_fops = { .owner = THIS_MODULE, .write = qat_vf_resume_write, .release = qat_vf_release_file, - .llseek = no_llseek, }; static struct qat_vf_migration_file * diff --git a/drivers/virt/coco/tdx-guest/tdx-guest.c b/drivers/virt/coco/tdx-guest/tdx-guest.c index 2acba56ad42e..d7db6c824e13 100644 --- a/drivers/virt/coco/tdx-guest/tdx-guest.c +++ b/drivers/virt/coco/tdx-guest/tdx-guest.c @@ -285,7 +285,6 @@ static long tdx_guest_ioctl(struct file *file, unsigned int cmd, static const struct file_operations tdx_guest_fops = { .owner = THIS_MODULE, .unlocked_ioctl = tdx_guest_ioctl, - .llseek = no_llseek, }; static struct miscdevice tdx_misc_dev = { diff --git a/drivers/watchdog/acquirewdt.c b/drivers/watchdog/acquirewdt.c index 53b04abd55b0..08ca18e91124 100644 --- a/drivers/watchdog/acquirewdt.c +++ b/drivers/watchdog/acquirewdt.c @@ -218,7 +218,6 @@ static int acq_close(struct inode *inode, struct file *file) static const struct file_operations acq_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = acq_write, .unlocked_ioctl = acq_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/advantechwdt.c b/drivers/watchdog/advantechwdt.c index 7a0acbc3e4dd..e41cd3ba4e0e 100644 --- a/drivers/watchdog/advantechwdt.c +++ b/drivers/watchdog/advantechwdt.c @@ -217,7 +217,6 @@ static int advwdt_close(struct inode *inode, struct file *file) static const struct file_operations advwdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = advwdt_write, .unlocked_ioctl = advwdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/alim1535_wdt.c b/drivers/watchdog/alim1535_wdt.c index bfb9a91ca1df..1ecbd1ac5c3a 100644 --- a/drivers/watchdog/alim1535_wdt.c +++ b/drivers/watchdog/alim1535_wdt.c @@ -359,7 +359,6 @@ static int __init ali_find_watchdog(void) static const struct file_operations ali_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = ali_write, .unlocked_ioctl = ali_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/alim7101_wdt.c b/drivers/watchdog/alim7101_wdt.c index 4ff7f5afb7aa..9c7cf939ba3d 100644 --- a/drivers/watchdog/alim7101_wdt.c +++ b/drivers/watchdog/alim7101_wdt.c @@ -289,7 +289,6 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static const struct file_operations wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = fop_write, .open = fop_open, .release = fop_close, diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c index 558015f08c7a..17382512a609 100644 --- a/drivers/watchdog/at91rm9200_wdt.c +++ b/drivers/watchdog/at91rm9200_wdt.c @@ -210,7 +210,6 @@ static ssize_t at91_wdt_write(struct file *file, const char *data, static const struct file_operations at91wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = at91_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, .open = at91_wdt_open, diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c index e5cc30622b12..d16b2c583fa4 100644 --- a/drivers/watchdog/ath79_wdt.c +++ b/drivers/watchdog/ath79_wdt.c @@ -231,7 +231,6 @@ static long ath79_wdt_ioctl(struct file *file, unsigned int cmd, static const struct file_operations ath79_wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = ath79_wdt_write, .unlocked_ioctl = ath79_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/cpu5wdt.c b/drivers/watchdog/cpu5wdt.c index 9f279c0e13a6..f94b84048612 100644 --- a/drivers/watchdog/cpu5wdt.c +++ b/drivers/watchdog/cpu5wdt.c @@ -185,7 +185,6 @@ static ssize_t cpu5wdt_write(struct file *file, const char __user *buf, static const struct file_operations cpu5wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = cpu5wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, .open = cpu5wdt_open, diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c index 901b94d456db..8ee81f018dda 100644 --- a/drivers/watchdog/cpwd.c +++ b/drivers/watchdog/cpwd.c @@ -507,7 +507,6 @@ static const struct file_operations cpwd_fops = { .write = cpwd_write, .read = cpwd_read, .release = cpwd_release, - .llseek = no_llseek, }; static int cpwd_probe(struct platform_device *op) diff --git a/drivers/watchdog/eurotechwdt.c b/drivers/watchdog/eurotechwdt.c index e26609ad4c17..10c647b1226a 100644 --- a/drivers/watchdog/eurotechwdt.c +++ b/drivers/watchdog/eurotechwdt.c @@ -368,7 +368,6 @@ static int eurwdt_notify_sys(struct notifier_block *this, unsigned long code, static const struct file_operations eurwdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = eurwdt_write, .unlocked_ioctl = eurwdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/gef_wdt.c b/drivers/watchdog/gef_wdt.c index 6a1db1c783fa..d854fcfbfa5b 100644 --- a/drivers/watchdog/gef_wdt.c +++ b/drivers/watchdog/gef_wdt.c @@ -245,7 +245,6 @@ static int gef_wdt_release(struct inode *inode, struct file *file) static const struct file_operations gef_wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = gef_wdt_write, .unlocked_ioctl = gef_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/geodewdt.c b/drivers/watchdog/geodewdt.c index 5186c37ad451..4ed6d139320b 100644 --- a/drivers/watchdog/geodewdt.c +++ b/drivers/watchdog/geodewdt.c @@ -196,7 +196,6 @@ static long geodewdt_ioctl(struct file *file, unsigned int cmd, static const struct file_operations geodewdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = geodewdt_write, .unlocked_ioctl = geodewdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/ib700wdt.c b/drivers/watchdog/ib700wdt.c index 39ea97009abd..b041ad90a62c 100644 --- a/drivers/watchdog/ib700wdt.c +++ b/drivers/watchdog/ib700wdt.c @@ -256,7 +256,6 @@ static int ibwdt_close(struct inode *inode, struct file *file) static const struct file_operations ibwdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = ibwdt_write, .unlocked_ioctl = ibwdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/ibmasr.c b/drivers/watchdog/ibmasr.c index 6955c693b5fd..cf845f865945 100644 --- a/drivers/watchdog/ibmasr.c +++ b/drivers/watchdog/ibmasr.c @@ -340,7 +340,6 @@ static int asr_release(struct inode *inode, struct file *file) static const struct file_operations asr_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = asr_write, .unlocked_ioctl = asr_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/indydog.c b/drivers/watchdog/indydog.c index 9857bb74a723..d3092d261345 100644 --- a/drivers/watchdog/indydog.c +++ b/drivers/watchdog/indydog.c @@ -149,7 +149,6 @@ static int indydog_notify_sys(struct notifier_block *this, static const struct file_operations indydog_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = indydog_write, .unlocked_ioctl = indydog_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/it8712f_wdt.c b/drivers/watchdog/it8712f_wdt.c index 3ce6a58bd81e..b776e6766c9d 100644 --- a/drivers/watchdog/it8712f_wdt.c +++ b/drivers/watchdog/it8712f_wdt.c @@ -341,7 +341,6 @@ static int it8712f_wdt_release(struct inode *inode, struct file *file) static const struct file_operations it8712f_wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = it8712f_wdt_write, .unlocked_ioctl = it8712f_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/m54xx_wdt.c b/drivers/watchdog/m54xx_wdt.c index 062ea3e6497e..26bd073bd375 100644 --- a/drivers/watchdog/m54xx_wdt.c +++ b/drivers/watchdog/m54xx_wdt.c @@ -179,7 +179,6 @@ static int m54xx_wdt_release(struct inode *inode, struct file *file) static const struct file_operations m54xx_wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = m54xx_wdt_write, .unlocked_ioctl = m54xx_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/machzwd.c b/drivers/watchdog/machzwd.c index 73f2221f6222..73d641486909 100644 --- a/drivers/watchdog/machzwd.c +++ b/drivers/watchdog/machzwd.c @@ -359,7 +359,6 @@ static int zf_notify_sys(struct notifier_block *this, unsigned long code, static const struct file_operations zf_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = zf_write, .unlocked_ioctl = zf_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/mixcomwd.c b/drivers/watchdog/mixcomwd.c index d387bad377c4..70d9cf84c342 100644 --- a/drivers/watchdog/mixcomwd.c +++ b/drivers/watchdog/mixcomwd.c @@ -224,7 +224,6 @@ static long mixcomwd_ioctl(struct file *file, static const struct file_operations mixcomwd_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = mixcomwd_write, .unlocked_ioctl = mixcomwd_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/mtx-1_wdt.c b/drivers/watchdog/mtx-1_wdt.c index 06756135033d..11f05024a181 100644 --- a/drivers/watchdog/mtx-1_wdt.c +++ b/drivers/watchdog/mtx-1_wdt.c @@ -177,7 +177,6 @@ static ssize_t mtx1_wdt_write(struct file *file, const char *buf, static const struct file_operations mtx1_wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = mtx1_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, .open = mtx1_wdt_open, diff --git a/drivers/watchdog/nv_tco.c b/drivers/watchdog/nv_tco.c index ac4a9c16341d..f8eb1f65a59e 100644 --- a/drivers/watchdog/nv_tco.c +++ b/drivers/watchdog/nv_tco.c @@ -264,7 +264,6 @@ static long nv_tco_ioctl(struct file *file, unsigned int cmd, static const struct file_operations nv_tco_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = nv_tco_write, .unlocked_ioctl = nv_tco_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/pc87413_wdt.c b/drivers/watchdog/pc87413_wdt.c index c7f745caf203..fbf835d112b8 100644 --- a/drivers/watchdog/pc87413_wdt.c +++ b/drivers/watchdog/pc87413_wdt.c @@ -470,7 +470,6 @@ static int pc87413_notify_sys(struct notifier_block *this, static const struct file_operations pc87413_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = pc87413_write, .unlocked_ioctl = pc87413_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/pcwd.c b/drivers/watchdog/pcwd.c index a793b03a785d..1a4282235aac 100644 --- a/drivers/watchdog/pcwd.c +++ b/drivers/watchdog/pcwd.c @@ -749,7 +749,6 @@ static int pcwd_temp_close(struct inode *inode, struct file *file) static const struct file_operations pcwd_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = pcwd_write, .unlocked_ioctl = pcwd_ioctl, .compat_ioctl = compat_ptr_ioctl, @@ -765,7 +764,6 @@ static struct miscdevice pcwd_miscdev = { static const struct file_operations pcwd_temp_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = pcwd_temp_read, .open = pcwd_temp_open, .release = pcwd_temp_close, diff --git a/drivers/watchdog/pcwd_pci.c b/drivers/watchdog/pcwd_pci.c index 54d86fcb1837..a489b426f2ba 100644 --- a/drivers/watchdog/pcwd_pci.c +++ b/drivers/watchdog/pcwd_pci.c @@ -643,7 +643,6 @@ static int pcipcwd_notify_sys(struct notifier_block *this, unsigned long code, static const struct file_operations pcipcwd_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = pcipcwd_write, .unlocked_ioctl = pcipcwd_ioctl, .compat_ioctl = compat_ptr_ioctl, @@ -659,7 +658,6 @@ static struct miscdevice pcipcwd_miscdev = { static const struct file_operations pcipcwd_temp_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = pcipcwd_temp_read, .open = pcipcwd_temp_open, .release = pcipcwd_temp_release, diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c index 8202f0a6b093..132699e2f247 100644 --- a/drivers/watchdog/pcwd_usb.c +++ b/drivers/watchdog/pcwd_usb.c @@ -549,7 +549,6 @@ static int usb_pcwd_notify_sys(struct notifier_block *this, unsigned long code, static const struct file_operations usb_pcwd_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = usb_pcwd_write, .unlocked_ioctl = usb_pcwd_ioctl, .compat_ioctl = compat_ptr_ioctl, @@ -565,7 +564,6 @@ static struct miscdevice usb_pcwd_miscdev = { static const struct file_operations usb_pcwd_temperature_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = usb_pcwd_temperature_read, .open = usb_pcwd_temperature_open, .release = usb_pcwd_temperature_release, diff --git a/drivers/watchdog/pika_wdt.c b/drivers/watchdog/pika_wdt.c index 782b8c23d99c..393aa4b1bc13 100644 --- a/drivers/watchdog/pika_wdt.c +++ b/drivers/watchdog/pika_wdt.c @@ -209,7 +209,6 @@ static long pikawdt_ioctl(struct file *file, static const struct file_operations pikawdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .open = pikawdt_open, .release = pikawdt_release, .write = pikawdt_write, diff --git a/drivers/watchdog/rc32434_wdt.c b/drivers/watchdog/rc32434_wdt.c index 417f9b75679c..efadbb9d7ce7 100644 --- a/drivers/watchdog/rc32434_wdt.c +++ b/drivers/watchdog/rc32434_wdt.c @@ -242,7 +242,6 @@ static long rc32434_wdt_ioctl(struct file *file, unsigned int cmd, static const struct file_operations rc32434_wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = rc32434_wdt_write, .unlocked_ioctl = rc32434_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/rdc321x_wdt.c b/drivers/watchdog/rdc321x_wdt.c index 6176f4343fc5..80490316a27f 100644 --- a/drivers/watchdog/rdc321x_wdt.c +++ b/drivers/watchdog/rdc321x_wdt.c @@ -197,7 +197,6 @@ static ssize_t rdc321x_wdt_write(struct file *file, const char __user *buf, static const struct file_operations rdc321x_wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = rdc321x_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, .open = rdc321x_wdt_open, diff --git a/drivers/watchdog/riowd.c b/drivers/watchdog/riowd.c index b293792a292a..f47d90d01c19 100644 --- a/drivers/watchdog/riowd.c +++ b/drivers/watchdog/riowd.c @@ -160,7 +160,6 @@ static ssize_t riowd_write(struct file *file, const char __user *buf, static const struct file_operations riowd_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = riowd_ioctl, .compat_ioctl = compat_ptr_ioctl, .open = riowd_open, diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c index 34a917221e31..6e91ee3fbfb5 100644 --- a/drivers/watchdog/sa1100_wdt.c +++ b/drivers/watchdog/sa1100_wdt.c @@ -164,7 +164,6 @@ static long sa1100dog_ioctl(struct file *file, unsigned int cmd, static const struct file_operations sa1100dog_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = sa1100dog_write, .unlocked_ioctl = sa1100dog_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/sb_wdog.c b/drivers/watchdog/sb_wdog.c index 504be461f992..eaa68b54cf56 100644 --- a/drivers/watchdog/sb_wdog.c +++ b/drivers/watchdog/sb_wdog.c @@ -234,7 +234,6 @@ static int sbwdog_notify_sys(struct notifier_block *this, unsigned long code, static const struct file_operations sbwdog_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = sbwdog_write, .unlocked_ioctl = sbwdog_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/sbc60xxwdt.c b/drivers/watchdog/sbc60xxwdt.c index 7b974802dfc7..e9bf12918ed8 100644 --- a/drivers/watchdog/sbc60xxwdt.c +++ b/drivers/watchdog/sbc60xxwdt.c @@ -275,7 +275,6 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static const struct file_operations wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = fop_write, .open = fop_open, .release = fop_close, diff --git a/drivers/watchdog/sbc7240_wdt.c b/drivers/watchdog/sbc7240_wdt.c index d640b26e18a6..21a1f0b32070 100644 --- a/drivers/watchdog/sbc7240_wdt.c +++ b/drivers/watchdog/sbc7240_wdt.c @@ -205,7 +205,6 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static const struct file_operations wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = fop_write, .open = fop_open, .release = fop_close, diff --git a/drivers/watchdog/sbc8360.c b/drivers/watchdog/sbc8360.c index 4f8b9912fc51..a9fd1615b4c3 100644 --- a/drivers/watchdog/sbc8360.c +++ b/drivers/watchdog/sbc8360.c @@ -301,7 +301,6 @@ static int sbc8360_notify_sys(struct notifier_block *this, unsigned long code, static const struct file_operations sbc8360_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = sbc8360_write, .open = sbc8360_open, .release = sbc8360_close, diff --git a/drivers/watchdog/sbc_epx_c3.c b/drivers/watchdog/sbc_epx_c3.c index 5e3a9ddb952e..1d291dc0a4a6 100644 --- a/drivers/watchdog/sbc_epx_c3.c +++ b/drivers/watchdog/sbc_epx_c3.c @@ -153,7 +153,6 @@ static int epx_c3_notify_sys(struct notifier_block *this, unsigned long code, static const struct file_operations epx_c3_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = epx_c3_write, .unlocked_ioctl = epx_c3_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/sbc_fitpc2_wdt.c b/drivers/watchdog/sbc_fitpc2_wdt.c index b8eb8d5ca1af..ff9e44825423 100644 --- a/drivers/watchdog/sbc_fitpc2_wdt.c +++ b/drivers/watchdog/sbc_fitpc2_wdt.c @@ -181,7 +181,6 @@ static int fitpc2_wdt_release(struct inode *inode, struct file *file) static const struct file_operations fitpc2_wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = fitpc2_wdt_write, .unlocked_ioctl = fitpc2_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/sc1200wdt.c b/drivers/watchdog/sc1200wdt.c index f22ebe89fe13..76a58715f665 100644 --- a/drivers/watchdog/sc1200wdt.c +++ b/drivers/watchdog/sc1200wdt.c @@ -304,7 +304,6 @@ static struct notifier_block sc1200wdt_notifier = { static const struct file_operations sc1200wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = sc1200wdt_write, .unlocked_ioctl = sc1200wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/sc520_wdt.c b/drivers/watchdog/sc520_wdt.c index ca65468f4b9c..e849e1af267b 100644 --- a/drivers/watchdog/sc520_wdt.c +++ b/drivers/watchdog/sc520_wdt.c @@ -331,7 +331,6 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static const struct file_operations wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = fop_write, .open = fop_open, .release = fop_close, diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c index 409d49880170..76053158d259 100644 --- a/drivers/watchdog/sch311x_wdt.c +++ b/drivers/watchdog/sch311x_wdt.c @@ -334,7 +334,6 @@ static int sch311x_wdt_close(struct inode *inode, struct file *file) static const struct file_operations sch311x_wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = sch311x_wdt_write, .unlocked_ioctl = sch311x_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/scx200_wdt.c b/drivers/watchdog/scx200_wdt.c index 7b5e18323f3f..4dd8549e3674 100644 --- a/drivers/watchdog/scx200_wdt.c +++ b/drivers/watchdog/scx200_wdt.c @@ -198,7 +198,6 @@ static long scx200_wdt_ioctl(struct file *file, unsigned int cmd, static const struct file_operations scx200_wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = scx200_wdt_write, .unlocked_ioctl = scx200_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/smsc37b787_wdt.c b/drivers/watchdog/smsc37b787_wdt.c index 7463df479d11..97ca500ec8a8 100644 --- a/drivers/watchdog/smsc37b787_wdt.c +++ b/drivers/watchdog/smsc37b787_wdt.c @@ -502,7 +502,6 @@ static int wb_smsc_wdt_notify_sys(struct notifier_block *this, static const struct file_operations wb_smsc_wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = wb_smsc_wdt_write, .unlocked_ioctl = wb_smsc_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/w83877f_wdt.c b/drivers/watchdog/w83877f_wdt.c index f2650863fd02..1937084c182c 100644 --- a/drivers/watchdog/w83877f_wdt.c +++ b/drivers/watchdog/w83877f_wdt.c @@ -299,7 +299,6 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static const struct file_operations wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = fop_write, .open = fop_open, .release = fop_close, diff --git a/drivers/watchdog/w83977f_wdt.c b/drivers/watchdog/w83977f_wdt.c index 31bf21ceaf48..3776030fa7c6 100644 --- a/drivers/watchdog/w83977f_wdt.c +++ b/drivers/watchdog/w83977f_wdt.c @@ -443,7 +443,6 @@ static int wdt_notify_sys(struct notifier_block *this, unsigned long code, static const struct file_operations wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = wdt_write, .unlocked_ioctl = wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/wafer5823wdt.c b/drivers/watchdog/wafer5823wdt.c index a8a1ed215e1e..291109349e73 100644 --- a/drivers/watchdog/wafer5823wdt.c +++ b/drivers/watchdog/wafer5823wdt.c @@ -227,7 +227,6 @@ static int wafwdt_notify_sys(struct notifier_block *this, unsigned long code, static const struct file_operations wafwdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = wafwdt_write, .unlocked_ioctl = wafwdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/wdrtas.c b/drivers/watchdog/wdrtas.c index c00627825de8..d4fe0bc82211 100644 --- a/drivers/watchdog/wdrtas.c +++ b/drivers/watchdog/wdrtas.c @@ -469,7 +469,6 @@ static int wdrtas_reboot(struct notifier_block *this, static const struct file_operations wdrtas_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = wdrtas_write, .unlocked_ioctl = wdrtas_ioctl, .compat_ioctl = compat_ptr_ioctl, @@ -485,7 +484,6 @@ static struct miscdevice wdrtas_miscdev = { static const struct file_operations wdrtas_temp_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = wdrtas_temp_read, .open = wdrtas_temp_open, .release = wdrtas_temp_close, diff --git a/drivers/watchdog/wdt.c b/drivers/watchdog/wdt.c index 183876156243..3980d60bacd8 100644 --- a/drivers/watchdog/wdt.c +++ b/drivers/watchdog/wdt.c @@ -520,7 +520,6 @@ static int wdt_notify_sys(struct notifier_block *this, unsigned long code, static const struct file_operations wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = wdt_write, .unlocked_ioctl = wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, @@ -536,7 +535,6 @@ static struct miscdevice wdt_miscdev = { static const struct file_operations wdt_temp_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = wdt_temp_read, .open = wdt_temp_open, .release = wdt_temp_release, diff --git a/drivers/watchdog/wdt285.c b/drivers/watchdog/wdt285.c index 5b7be7a62d54..78681d9f7d53 100644 --- a/drivers/watchdog/wdt285.c +++ b/drivers/watchdog/wdt285.c @@ -178,7 +178,6 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, static const struct file_operations watchdog_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = watchdog_write, .unlocked_ioctl = watchdog_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/wdt977.c b/drivers/watchdog/wdt977.c index c9b8e863f70f..4f449ac4dda4 100644 --- a/drivers/watchdog/wdt977.c +++ b/drivers/watchdog/wdt977.c @@ -419,7 +419,6 @@ static int wdt977_notify_sys(struct notifier_block *this, unsigned long code, static const struct file_operations wdt977_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = wdt977_write, .unlocked_ioctl = wdt977_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/drivers/watchdog/wdt_pci.c b/drivers/watchdog/wdt_pci.c index d5e56b601351..dc5f29560e9b 100644 --- a/drivers/watchdog/wdt_pci.c +++ b/drivers/watchdog/wdt_pci.c @@ -563,7 +563,6 @@ static int wdtpci_notify_sys(struct notifier_block *this, unsigned long code, static const struct file_operations wdtpci_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = wdtpci_write, .unlocked_ioctl = wdtpci_ioctl, .compat_ioctl = compat_ptr_ioctl, @@ -579,7 +578,6 @@ static struct miscdevice wdtpci_miscdev = { static const struct file_operations wdtpci_temp_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = wdtpci_temp_read, .open = wdtpci_temp_open, .release = wdtpci_temp_release, diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 9b7fcc7dbb38..7e4a13e632dc 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -694,7 +694,6 @@ static const struct file_operations evtchn_fops = { .fasync = evtchn_fasync, .open = evtchn_open, .release = evtchn_release, - .llseek = no_llseek, }; static struct miscdevice evtchn_miscdev = { diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c index e9ac3b8c4167..4f65b641c054 100644 --- a/drivers/xen/mcelog.c +++ b/drivers/xen/mcelog.c @@ -182,7 +182,6 @@ static const struct file_operations xen_mce_chrdev_ops = { .read = xen_mce_chrdev_read, .poll = xen_mce_chrdev_poll, .unlocked_ioctl = xen_mce_chrdev_ioctl, - .llseek = no_llseek, }; static struct miscdevice xen_mce_chrdev_device = { diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 6f56640092a9..46f8916597e5 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -700,7 +700,6 @@ const struct file_operations xen_xenbus_fops = { .open = xenbus_file_open, .release = xenbus_file_release, .poll = xenbus_file_poll, - .llseek = no_llseek, }; EXPORT_SYMBOL_GPL(xen_xenbus_fops); diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index ef1f74866e23..cbfd88f98472 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -471,7 +471,6 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, static const struct file_operations bcachefs_data_ops = { .release = bch2_data_job_release, .read = bch2_data_job_read, - .llseek = no_llseek, }; static long bch2_ioctl_data(struct bch_fs *c, diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index fb3442a7c67f..dea73bc1cb51 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -275,7 +275,6 @@ static long thread_with_stdio_ioctl(struct file *file, unsigned int cmd, unsigne } static const struct file_operations thread_with_stdio_fops = { - .llseek = no_llseek, .read = thread_with_stdio_read, .write = thread_with_stdio_write, .poll = thread_with_stdio_poll, @@ -285,7 +284,6 @@ static const struct file_operations thread_with_stdio_fops = { }; static const struct file_operations thread_with_stdout_fops = { - .llseek = no_llseek, .read = thread_with_stdio_read, .poll = thread_with_stdout_poll, .flush = thread_with_stdio_flush, diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index c6f4a9a98b85..67299e8b734e 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -1218,7 +1218,6 @@ static const struct file_operations u32_array_fops = { .open = u32_array_open, .release = u32_array_release, .read = u32_array_read, - .llseek = no_llseek, }; /** diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 7112958c2e5b..700a0cbb2f14 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -733,7 +733,6 @@ static ssize_t dlm_rawmsg_write(struct file *fp, const char __user *user_buf, static const struct file_operations dlm_rawmsg_fops = { .open = simple_open, .write = dlm_rawmsg_write, - .llseek = no_llseek, }; void *dlm_create_debug_comms_file(int nodeid, void *data) diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index 7e9961639802..23c51d62f902 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -110,5 +110,4 @@ const struct file_operations efivarfs_file_operations = { .open = simple_open, .read = efivarfs_file_read, .write = efivarfs_file_write, - .llseek = no_llseek, }; diff --git a/fs/fsopen.c b/fs/fsopen.c index ee92ca58429e..6cef3deccded 100644 --- a/fs/fsopen.c +++ b/fs/fsopen.c @@ -78,7 +78,6 @@ static int fscontext_release(struct inode *inode, struct file *file) const struct file_operations fscontext_fops = { .read = fscontext_read, .release = fscontext_release, - .llseek = no_llseek, }; /* diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 97ac994ff78f..2a730d88cc3b 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -183,27 +183,23 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file, static const struct file_operations fuse_ctl_abort_ops = { .open = nonseekable_open, .write = fuse_conn_abort_write, - .llseek = no_llseek, }; static const struct file_operations fuse_ctl_waiting_ops = { .open = nonseekable_open, .read = fuse_conn_waiting_read, - .llseek = no_llseek, }; static const struct file_operations fuse_conn_max_background_ops = { .open = nonseekable_open, .read = fuse_conn_max_background_read, .write = fuse_conn_max_background_write, - .llseek = no_llseek, }; static const struct file_operations fuse_conn_congestion_threshold_ops = { .open = nonseekable_open, .read = fuse_conn_congestion_threshold_read, .write = fuse_conn_congestion_threshold_write, - .llseek = no_llseek, }; static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 46ed30a4e0fc..1f64ae6d7a69 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2456,7 +2456,6 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd, const struct file_operations fuse_dev_operations = { .owner = THIS_MODULE, .open = fuse_dev_open, - .llseek = no_llseek, .read_iter = fuse_dev_read, .splice_read = fuse_dev_splice_read, .write_iter = fuse_dev_write, diff --git a/fs/nsfs.c b/fs/nsfs.c index 67ee176b8824..c675fc40ce2d 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -22,7 +22,6 @@ static struct vfsmount *nsfs_mnt; static long ns_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); static const struct file_operations ns_file_operations = { - .llseek = no_llseek, .unlocked_ioctl = ns_ioctl, .compat_ioctl = compat_ptr_ioctl, }; diff --git a/fs/pipe.c b/fs/pipe.c index 4083ba492cb6..12b22c2723b7 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1231,7 +1231,6 @@ static int fifo_open(struct inode *inode, struct file *filp) const struct file_operations pipefifo_fops = { .open = fifo_open, - .llseek = no_llseek, .read_iter = pipe_read, .write_iter = pipe_write, .poll = pipe_poll, diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index d91cec93d968..5cc69beaa62e 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2807,7 +2807,6 @@ static const struct file_operations dfs_fops = { .read = dfs_file_read, .write = dfs_file_write, .owner = THIS_MODULE, - .llseek = no_llseek, }; /** @@ -2952,7 +2951,6 @@ static const struct file_operations dfs_global_fops = { .read = dfs_global_file_read, .write = dfs_global_file_write, .owner = THIS_MODULE, - .llseek = no_llseek, }; /** diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index c9c65b132c0f..0928a6c8ae1e 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -57,7 +57,6 @@ static const struct file_operations __fops = { \ .release = simple_attr_release, \ .read = debugfs_attr_read, \ .write = (__is_signed) ? debugfs_attr_write_signed : debugfs_attr_write, \ - .llseek = no_llseek, \ } #define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ diff --git a/include/linux/fs.h b/include/linux/fs.h index eae5b67e4a15..e3c603d01337 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3234,7 +3234,6 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *, extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); -#define no_llseek NULL extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 112581cf97e7..106735145948 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -283,7 +283,6 @@ static int iter_release(struct inode *inode, struct file *file) const struct file_operations bpf_iter_fops = { .open = iter_open, - .llseek = no_llseek, .read = bpf_seq_read, .release = iter_release, }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 5a8071c45c80..e3589c4287cb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6821,7 +6821,6 @@ static int perf_fasync(int fd, struct file *filp, int on) } static const struct file_operations perf_fops = { - .llseek = no_llseek, .release = perf_release, .read = perf_read, .poll = perf_poll, diff --git a/kernel/power/user.c b/kernel/power/user.c index 3aa41ba22129..3f9e3efb9f6e 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -447,7 +447,6 @@ static const struct file_operations snapshot_fops = { .release = snapshot_release, .read = snapshot_read, .write = snapshot_write, - .llseek = no_llseek, .unlocked_ioctl = snapshot_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = snapshot_compat_ioctl, diff --git a/kernel/relay.c b/kernel/relay.c index a8e90e98bf2c..a8ae436dc77e 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1079,7 +1079,6 @@ const struct file_operations relay_file_operations = { .poll = relay_file_poll, .mmap = relay_file_mmap, .read = relay_file_read, - .llseek = no_llseek, .release = relay_file_release, }; EXPORT_SYMBOL_GPL(relay_file_operations); diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 4782edcbe7b9..c2f3d0c490d5 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -168,7 +168,6 @@ static int posix_clock_release(struct inode *inode, struct file *fp) static const struct file_operations posix_clock_file_operations = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = posix_clock_read, .poll = posix_clock_poll, .unlocked_ioctl = posix_clock_ioctl, diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index df0745a42a3f..dc819aec43e8 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -306,7 +306,6 @@ static ssize_t monitor_enable_write_data(struct file *filp, const char __user *u static const struct file_operations interface_enable_fops = { .open = simple_open, - .llseek = no_llseek, .write = monitor_enable_write_data, .read = monitor_enable_read_data, }; @@ -329,7 +328,6 @@ static ssize_t monitor_desc_read_data(struct file *filp, char __user *user_buf, static const struct file_operations interface_desc_fops = { .open = simple_open, - .llseek = no_llseek, .read = monitor_desc_read_data, }; @@ -674,7 +672,6 @@ static ssize_t monitoring_on_write_data(struct file *filp, const char __user *us static const struct file_operations monitoring_on_fops = { .open = simple_open, - .llseek = no_llseek, .write = monitoring_on_write_data, .read = monitoring_on_read_data, }; diff --git a/kernel/trace/rv/rv_reactors.c b/kernel/trace/rv/rv_reactors.c index 6aae106695b6..7b49cbe388d4 100644 --- a/kernel/trace/rv/rv_reactors.c +++ b/kernel/trace/rv/rv_reactors.c @@ -426,7 +426,6 @@ static ssize_t reacting_on_write_data(struct file *filp, const char __user *user static const struct file_operations reacting_on_fops = { .open = simple_open, - .llseek = no_llseek, .write = reacting_on_write_data, .read = reacting_on_read_data, }; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b4f348b4653f..c01375adc471 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7557,7 +7557,6 @@ static const struct file_operations tracing_pipe_fops = { .read = tracing_read_pipe, .splice_read = tracing_splice_read_pipe, .release = tracing_release_pipe, - .llseek = no_llseek, }; static const struct file_operations tracing_entries_fops = { @@ -7636,7 +7635,6 @@ static const struct file_operations snapshot_raw_fops = { .read = tracing_buffers_read, .release = tracing_buffers_release, .splice_read = tracing_buffers_splice_read, - .llseek = no_llseek, }; #endif /* CONFIG_TRACER_SNAPSHOT */ @@ -8466,7 +8464,6 @@ static const struct file_operations tracing_buffers_fops = { .flush = tracing_buffers_flush, .splice_read = tracing_buffers_splice_read, .unlocked_ioctl = tracing_buffers_ioctl, - .llseek = no_llseek, .mmap = tracing_buffers_mmap, }; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0580ac9e47b9..3ca89e0279a7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -4115,7 +4115,6 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf, static const struct file_operations split_huge_pages_fops = { .owner = THIS_MODULE, .write = split_huge_pages_write, - .llseek = no_llseek, }; static int __init split_huge_pages_debugfs(void) diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index 25b8a67a63a4..85149c774505 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -187,7 +187,6 @@ static const struct file_operations minstrel_ht_stat_fops = { .open = minstrel_ht_stats_open, .read = minstrel_stats_read, .release = minstrel_stats_release, - .llseek = no_llseek, }; static char * @@ -323,7 +322,6 @@ static const struct file_operations minstrel_ht_stat_csv_fops = { .open = minstrel_ht_stats_csv_open, .read = minstrel_stats_read, .release = minstrel_stats_release, - .llseek = no_llseek, }; void diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 13a5126bc36e..7d3e82e4c2fc 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1394,7 +1394,6 @@ static const struct file_operations rfkill_fops = { .release = rfkill_fop_release, .unlocked_ioctl = rfkill_fop_ioctl, .compat_ioctl = compat_ptr_ioctl, - .llseek = no_llseek, }; #define RFKILL_NAME "rfkill" diff --git a/net/socket.c b/net/socket.c index 7b046dd3e9a7..601ad74930ef 100644 --- a/net/socket.c +++ b/net/socket.c @@ -153,7 +153,6 @@ static void sock_show_fdinfo(struct seq_file *m, struct file *f) static const struct file_operations socket_file_ops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read_iter = sock_read_iter, .write_iter = sock_write_iter, .poll = sock_poll, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 4f31e73dc34d..1bd3e531b0e0 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1592,7 +1592,6 @@ static int cache_release_procfs(struct inode *inode, struct file *filp) } static const struct proc_ops cache_channel_proc_ops = { - .proc_lseek = no_llseek, .proc_read = cache_read_procfs, .proc_write = cache_write_procfs, .proc_poll = cache_poll_procfs, @@ -1658,7 +1657,6 @@ static const struct proc_ops cache_flush_proc_ops = { .proc_read = read_flush_procfs, .proc_write = write_flush_procfs, .proc_release = release_flush_procfs, - .proc_lseek = no_llseek, }; static void remove_cache_proc_entries(struct cache_detail *cd) @@ -1811,7 +1809,6 @@ static int cache_release_pipefs(struct inode *inode, struct file *filp) const struct file_operations cache_file_operations_pipefs = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = cache_read_pipefs, .write = cache_write_pipefs, .poll = cache_poll_pipefs, @@ -1877,7 +1874,6 @@ const struct file_operations cache_flush_operations_pipefs = { .read = read_flush_pipefs, .write = write_flush_pipefs, .release = release_flush_pipefs, - .llseek = no_llseek, }; int sunrpc_cache_register_pipefs(struct dentry *parent, diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 910a5d850d04..7ce3721c06ca 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -385,7 +385,6 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) static const struct file_operations rpc_pipe_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = rpc_pipe_read, .write = rpc_pipe_write, .poll = rpc_pipe_poll, diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index b382c696c877..59eefe2fed10 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -927,7 +927,6 @@ static const struct file_operations mtty_save_fops = { .unlocked_ioctl = mtty_precopy_ioctl, .compat_ioctl = compat_ptr_ioctl, .release = mtty_release_migf, - .llseek = no_llseek, }; static void mtty_save_state(struct mdev_state *mdev_state) @@ -1082,7 +1081,6 @@ static const struct file_operations mtty_resume_fops = { .owner = THIS_MODULE, .write = mtty_resume_write, .release = mtty_release_migf, - .llseek = no_llseek, }; static struct mtty_migration_file * diff --git a/scripts/coccinelle/api/stream_open.cocci b/scripts/coccinelle/api/stream_open.cocci index df00d6619b06..50ab60c81f13 100644 --- a/scripts/coccinelle/api/stream_open.cocci +++ b/scripts/coccinelle/api/stream_open.cocci @@ -131,7 +131,6 @@ identifier llseek_f; identifier fops0.fops; @@ struct file_operations fops = { - .llseek = no_llseek, }; @ has_noop_llseek @ diff --git a/sound/core/control.c b/sound/core/control.c index 4f55f64c42e1..2f790a7b1e90 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -2267,7 +2267,6 @@ static const struct file_operations snd_ctl_f_ops = .read = snd_ctl_read, .open = snd_ctl_open, .release = snd_ctl_release, - .llseek = no_llseek, .poll = snd_ctl_poll, .unlocked_ioctl = snd_ctl_ioctl, .compat_ioctl = snd_ctl_ioctl_compat, diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c index 33bf9a220ada..668604d0ec9d 100644 --- a/sound/core/oss/mixer_oss.c +++ b/sound/core/oss/mixer_oss.c @@ -412,7 +412,6 @@ static const struct file_operations snd_mixer_oss_f_ops = .owner = THIS_MODULE, .open = snd_mixer_oss_open, .release = snd_mixer_oss_release, - .llseek = no_llseek, .unlocked_ioctl = snd_mixer_oss_ioctl, .compat_ioctl = snd_mixer_oss_ioctl_compat, }; diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 7386982cf40e..4683b9139c56 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -3106,7 +3106,6 @@ static const struct file_operations snd_pcm_oss_f_reg = .write = snd_pcm_oss_write, .open = snd_pcm_oss_open, .release = snd_pcm_oss_release, - .llseek = no_llseek, .poll = snd_pcm_oss_poll, .unlocked_ioctl = snd_pcm_oss_ioctl, .compat_ioctl = snd_pcm_oss_ioctl_compat, diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 99e39b5359cc..5b9076829ade 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -4115,7 +4115,6 @@ const struct file_operations snd_pcm_f_ops[2] = { .write_iter = snd_pcm_writev, .open = snd_pcm_playback_open, .release = snd_pcm_release, - .llseek = no_llseek, .poll = snd_pcm_poll, .unlocked_ioctl = snd_pcm_ioctl, .compat_ioctl = snd_pcm_ioctl_compat, @@ -4129,7 +4128,6 @@ const struct file_operations snd_pcm_f_ops[2] = { .read_iter = snd_pcm_readv, .open = snd_pcm_capture_open, .release = snd_pcm_release, - .llseek = no_llseek, .poll = snd_pcm_poll, .unlocked_ioctl = snd_pcm_ioctl, .compat_ioctl = snd_pcm_ioctl_compat, diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 7accf9a1ddf4..03306be5fa02 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -1784,7 +1784,6 @@ static const struct file_operations snd_rawmidi_f_ops = { .write = snd_rawmidi_write, .open = snd_rawmidi_open, .release = snd_rawmidi_release, - .llseek = no_llseek, .poll = snd_rawmidi_poll, .unlocked_ioctl = snd_rawmidi_ioctl, .compat_ioctl = snd_rawmidi_ioctl_compat, diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 6437193e42bf..3930e2f9082f 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -2722,7 +2722,6 @@ static const struct file_operations snd_seq_f_ops = .write = snd_seq_write, .open = snd_seq_open, .release = snd_seq_release, - .llseek = no_llseek, .poll = snd_seq_poll, .unlocked_ioctl = snd_seq_ioctl, .compat_ioctl = snd_seq_ioctl_compat, diff --git a/sound/core/timer.c b/sound/core/timer.c index 668c40bac318..fbada79380f9 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -2436,7 +2436,6 @@ static const struct file_operations snd_timer_f_ops = .read = snd_timer_user_read, .open = snd_timer_user_open, .release = snd_timer_user_release, - .llseek = no_llseek, .poll = snd_timer_user_poll, .unlocked_ioctl = snd_timer_user_ioctl, .compat_ioctl = snd_timer_user_ioctl_compat, diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c index 4b1baf4dd50e..dea2d9b18fc9 100644 --- a/sound/oss/dmasound/dmasound_core.c +++ b/sound/oss/dmasound/dmasound_core.c @@ -381,7 +381,6 @@ static long mixer_unlocked_ioctl(struct file *file, u_int cmd, u_long arg) static const struct file_operations mixer_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = mixer_unlocked_ioctl, .compat_ioctl = compat_ptr_ioctl, .open = mixer_open, @@ -1155,7 +1154,6 @@ static long sq_unlocked_ioctl(struct file *file, u_int cmd, u_long arg) static const struct file_operations sq_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = sq_write, .poll = sq_poll, .unlocked_ioctl = sq_unlocked_ioctl, @@ -1351,7 +1349,6 @@ static ssize_t state_read(struct file *file, char __user *buf, size_t count, static const struct file_operations state_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = state_read, .open = state_open, .release = state_release, diff --git a/sound/soc/intel/avs/debugfs.c b/sound/soc/intel/avs/debugfs.c index 3fc2bbb63369..1767ded4d983 100644 --- a/sound/soc/intel/avs/debugfs.c +++ b/sound/soc/intel/avs/debugfs.c @@ -68,7 +68,6 @@ static ssize_t fw_regs_read(struct file *file, char __user *to, size_t count, lo static const struct file_operations fw_regs_fops = { .open = simple_open, .read = fw_regs_read, - .llseek = no_llseek, }; static ssize_t debug_window_read(struct file *file, char __user *to, size_t count, loff_t *ppos) @@ -93,7 +92,6 @@ static ssize_t debug_window_read(struct file *file, char __user *to, size_t coun static const struct file_operations debug_window_fops = { .open = simple_open, .read = debug_window_read, - .llseek = no_llseek, }; static ssize_t probe_points_read(struct file *file, char __user *to, size_t count, loff_t *ppos) @@ -170,7 +168,6 @@ static const struct file_operations probe_points_fops = { .open = simple_open, .read = probe_points_read, .write = probe_points_write, - .llseek = no_llseek, }; static ssize_t probe_points_disconnect_write(struct file *file, const char __user *from, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f416d5e3f9c0..4f81366f8b61 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -6186,7 +6186,6 @@ static const struct file_operations stat_fops_per_vm = { .release = kvm_debugfs_release, .read = simple_attr_read, .write = simple_attr_write, - .llseek = no_llseek, }; static int vm_stat_get(void *_offset, u64 *val) From efbc6bd090f48ccf64f7a8dd5daea775821d57ec Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 27 Sep 2024 11:45:45 -0400 Subject: [PATCH 541/655] Documentation: KVM: fix warning in "make htmldocs" The warning Documentation/virt/kvm/locking.rst:31: ERROR: Unexpected indentation. is caused by incorrectly treating a line as the continuation of a paragraph, rather than as the first line in a bullet list. Fixed: 44d174596260 ("KVM: Use dedicated mutex to protect kvm_usage_count to avoid deadlock") Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/locking.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index be3c323888b1..20a9a37d1cdd 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -27,6 +27,7 @@ The acquisition orders for mutexes are as follows: must not take either kvm->slots_lock or kvm->slots_arch_lock. cpus_read_lock() vs kvm_lock: + - Taking cpus_read_lock() outside of kvm_lock is problematic, despite that being the official ordering, as it is quite easy to unknowingly trigger cpus_read_lock() while holding kvm_lock. Use caution when walking vm_list, From da6ef2dffe6056aad3435e6cf7c6471c2a62187c Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 29 Aug 2024 16:34:09 +0800 Subject: [PATCH 542/655] cachefiles: fix dentry leak in cachefiles_open_file() A dentry leak may be caused when a lookup cookie and a cull are concurrent: P1 | P2 ----------------------------------------------------------- cachefiles_lookup_cookie cachefiles_look_up_object lookup_one_positive_unlocked // get dentry cachefiles_cull inode->i_flags |= S_KERNEL_FILE; cachefiles_open_file cachefiles_mark_inode_in_use __cachefiles_mark_inode_in_use can_use = false if (!(inode->i_flags & S_KERNEL_FILE)) can_use = true return false return false // Returns an error but doesn't put dentry After that the following WARNING will be triggered when the backend folder is umounted: ================================================================== BUG: Dentry 000000008ad87947{i=7a,n=Dx_1_1.img} still in use (1) [unmount of ext4 sda] WARNING: CPU: 4 PID: 359261 at fs/dcache.c:1767 umount_check+0x5d/0x70 CPU: 4 PID: 359261 Comm: umount Not tainted 6.6.0-dirty #25 RIP: 0010:umount_check+0x5d/0x70 Call Trace: d_walk+0xda/0x2b0 do_one_tree+0x20/0x40 shrink_dcache_for_umount+0x2c/0x90 generic_shutdown_super+0x20/0x160 kill_block_super+0x1a/0x40 ext4_kill_sb+0x22/0x40 deactivate_locked_super+0x35/0x80 cleanup_mnt+0x104/0x160 ================================================================== Whether cachefiles_open_file() returns true or false, the reference count obtained by lookup_positive_unlocked() in cachefiles_look_up_object() should be released. Therefore release that reference count in cachefiles_look_up_object() to fix the above issue and simplify the code. Fixes: 1f08c925e7a3 ("cachefiles: Implement backing file wrangling") Cc: stable@kernel.org Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240829083409.3788142-1-libaokun@huaweicloud.com Acked-by: David Howells Signed-off-by: Christian Brauner --- fs/cachefiles/namei.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index f53977169db4..2b3f9935dbb4 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -595,14 +595,12 @@ static bool cachefiles_open_file(struct cachefiles_object *object, * write and readdir but not lookup or open). */ touch_atime(&file->f_path); - dput(dentry); return true; check_failed: fscache_cookie_lookup_negative(object->cookie); cachefiles_unmark_inode_in_use(object, file); fput(file); - dput(dentry); if (ret == -ESTALE) return cachefiles_create_file(object); return false; @@ -611,7 +609,6 @@ static bool cachefiles_open_file(struct cachefiles_object *object, fput(file); error: cachefiles_do_unmark_inode_in_use(object, d_inode(dentry)); - dput(dentry); return false; } @@ -654,7 +651,9 @@ bool cachefiles_look_up_object(struct cachefiles_object *object) goto new_file; } - if (!cachefiles_open_file(object, dentry)) + ret = cachefiles_open_file(object, dentry); + dput(dentry); + if (!ret) return false; _leave(" = t [%lu]", file_inode(object->file)->i_ino); From 2cf36327ee1e47733aba96092d7bd082a4056ff5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 14 Sep 2024 21:40:02 +0100 Subject: [PATCH 543/655] afs: Fix missing wire-up of afs_retry_request() afs_retry_request() is supposed to be pointed to by the afs_req_ops netfs operations table, but the pointer got lost somewhere. The function is used during writeback to rotate through the authentication keys that were in force when the file was modified locally. Fix this by adding the pointer to the function. Fixes: 1ecb146f7cd8 ("netfs, afs: Use writeback retry to deal with alternate keys") Reported-by: Dr. David Alan Gilbert Signed-off-by: David Howells Link: https://lore.kernel.org/r/1690847.1726346402@warthog.procyon.org.uk cc: Marc Dionne cc: Jeff Layton cc: linux-afs@lists.infradead.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/afs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/file.c b/fs/afs/file.c index 492d857a3fa0..6762eff97517 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -420,6 +420,7 @@ const struct netfs_request_ops afs_req_ops = { .begin_writeback = afs_begin_writeback, .prepare_write = afs_prepare_write, .issue_write = afs_issue_write, + .retry_request = afs_retry_request, }; static void afs_add_open_mmap(struct afs_vnode *vnode) From 8a46067783bdff222d1fb8f8c20e3b7b711e3ce5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 26 Sep 2024 18:51:46 +0200 Subject: [PATCH 544/655] pidfs: check for valid pid namespace When we access a no-current task's pid namespace we need check that the task hasn't been reaped in the meantime and it's pid namespace isn't accessible anymore. The user namespace is fine because it is only released when the last reference to struct task_struct is put and exit_creds() is called. Link: https://lore.kernel.org/r/20240926-klebt-altgedienten-0415ad4d273c@brauner Fixes: 5b08bd408534 ("pidfs: allow retrieval of namespace file descriptors") CC: stable@vger.kernel.org # v6.11 Signed-off-by: Christian Brauner --- fs/pidfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index 7ffdc88dfb52..80675b6bf884 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -120,6 +120,7 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct nsproxy *nsp __free(put_nsproxy) = NULL; struct pid *pid = pidfd_pid(file); struct ns_common *ns_common = NULL; + struct pid_namespace *pid_ns; if (arg) return -EINVAL; @@ -202,7 +203,9 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PIDFD_GET_PID_NAMESPACE: if (IS_ENABLED(CONFIG_PID_NS)) { rcu_read_lock(); - ns_common = to_ns_common( get_pid_ns(task_active_pid_ns(task))); + pid_ns = task_active_pid_ns(task); + if (pid_ns) + ns_common = to_ns_common(get_pid_ns(pid_ns)); rcu_read_unlock(); } break; From f94d54208f25dc93f3bcae9e725a582380a503b1 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Mon, 23 Sep 2024 16:07:49 +0100 Subject: [PATCH 545/655] afs: Fix possible infinite loop with unresponsive servers A return code of 0 from afs_wait_for_one_fs_probe is an indication that the endpoint state attached to the operation is stale and has been superseded. In that case the iteration needs to be restarted so that the newer probe result state gets used. Failure to do so can result in an tight infinite loop around the iterate_address label, where all addresses are thought to be responsive and have been tried, with nothing to refresh the endpoint state. Fixes: 495f2ae9e355 ("afs: Fix fileserver rotation") Reported-by: Markus Suvanto Link: https://lists.infradead.org/pipermail/linux-afs/2024-July/008628.html cc: linux-afs@lists.infradead.org Signed-off-by: Marc Dionne Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240906134019.131553-1-marc.dionne@auristor.com/ Link: https://lore.kernel.org/r/20240923150756.902363-6-dhowells@redhat.com Signed-off-by: Christian Brauner --- fs/afs/fs_probe.c | 4 ++-- fs/afs/rotate.c | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 580de4adaaf6..b516d05b0fef 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -506,10 +506,10 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_sta finish_wait(&server->probe_wq, &wait); dont_wait: - if (estate->responsive_set & ~exclude) - return 1; if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) return 0; + if (estate->responsive_set & ~exclude) + return 1; if (is_intr && signal_pending(current)) return -ERESTARTSYS; if (timo == 0) diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index ed09d4d4c211..d612983d6f38 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -632,8 +632,10 @@ bool afs_select_fileserver(struct afs_operation *op) wait_for_more_probe_results: error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried, !(op->flags & AFS_OPERATION_UNINTR)); - if (!error) + if (error == 1) goto iterate_address; + if (!error) + goto restart_from_beginning; /* We've now had a failure to respond on all of a server's addresses - * immediately probe them again and consider retrying the server. @@ -644,10 +646,13 @@ bool afs_select_fileserver(struct afs_operation *op) error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried, !(op->flags & AFS_OPERATION_UNINTR)); switch (error) { - case 0: + case 1: op->flags &= ~AFS_OPERATION_RETRY_SERVER; - trace_afs_rotate(op, afs_rotate_trace_retry_server, 0); + trace_afs_rotate(op, afs_rotate_trace_retry_server, 1); goto retry_server; + case 0: + trace_afs_rotate(op, afs_rotate_trace_retry_server, 0); + goto restart_from_beginning; case -ERESTARTSYS: afs_op_set_error(op, error); goto failed; From 19dcfb9c1685d45ba96852e021415134865b0a95 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 23 Sep 2024 16:07:48 +0100 Subject: [PATCH 546/655] afs: Remove unused struct and function prototype The struct afs_address_list and the function prototype afs_put_address_list() are not used anymore and can be removed. Remove them. Signed-off-by: Thorsten Blum Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240911095046.3749-2-thorsten.blum@toblux.com/ Link: https://lore.kernel.org/r/20240923150756.902363-5-dhowells@redhat.com cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/afs/afs_vl.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h index 9c65ffb8a523..a06296c8827d 100644 --- a/fs/afs/afs_vl.h +++ b/fs/afs/afs_vl.h @@ -134,13 +134,4 @@ struct afs_uvldbentry__xdr { __be32 spares9; }; -struct afs_address_list { - refcount_t usage; - unsigned int version; - unsigned int nr_addrs; - struct sockaddr_rxrpc addrs[]; -}; - -extern void afs_put_address_list(struct afs_address_list *alist); - #endif /* AFS_VL_H */ From ff98751bae40faed1ba9c6a7287e84430f7dec64 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 23 Sep 2024 16:07:50 +0100 Subject: [PATCH 547/655] afs: Fix the setting of the server responding flag In afs_wait_for_operation(), we set transcribe the call responded flag to the server record that we used after doing the fileserver iteration loop - but it's possible to exit the loop having had a response from the server that we've discarded (e.g. it returned an abort or we started receiving data, but the call didn't complete). This means that op->server might be NULL, but we don't check that before attempting to set the server flag. Fixes: 98f9fda2057b ("afs: Fold the afs_addr_cursor struct in") Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240923150756.902363-7-dhowells@redhat.com cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Christian Brauner --- fs/afs/fs_operation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index 3546b087e791..428721bbe4f6 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -201,7 +201,7 @@ void afs_wait_for_operation(struct afs_operation *op) } } - if (op->call_responded) + if (op->call_responded && op->server) set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags); if (!afs_op_error(op)) { From 9fffa4e9b3b158f63334e603e610da7d529a0f9a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 27 Sep 2024 09:08:42 +0100 Subject: [PATCH 548/655] netfs: Advance iterator correctly rather than jumping it In netfs_write_folio(), use iov_iter_advance() to advance the folio as we split bits of it off to subrequests rather than manually jumping the ->iov_offset value around. This becomes more problematic when we use a bounce buffer made out of single-page folios to cover a multipage pagecache folio. Signed-off-by: David Howells Link: https://lore.kernel.org/r/2238548.1727424522@warthog.procyon.org.uk cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/write_issue.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 04e66d587f77..f9761d11791d 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -307,6 +307,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, struct netfs_io_stream *stream; struct netfs_group *fgroup; /* TODO: Use this with ceph */ struct netfs_folio *finfo; + size_t iter_off = 0; size_t fsize = folio_size(folio), flen = fsize, foff = 0; loff_t fpos = folio_pos(folio), i_size; bool to_eof = false, streamw = false; @@ -462,7 +463,12 @@ static int netfs_write_folio(struct netfs_io_request *wreq, if (choose_s < 0) break; stream = &wreq->io_streams[choose_s]; - wreq->io_iter.iov_offset = stream->submit_off; + + /* Advance the iterator(s). */ + if (stream->submit_off > iter_off) { + iov_iter_advance(&wreq->io_iter, stream->submit_off - iter_off); + iter_off = stream->submit_off; + } atomic64_set(&wreq->issued_to, fpos + stream->submit_off); stream->submit_extendable_to = fsize - stream->submit_off; @@ -477,8 +483,8 @@ static int netfs_write_folio(struct netfs_io_request *wreq, debug = true; } - wreq->io_iter.iov_offset = 0; - iov_iter_advance(&wreq->io_iter, fsize); + if (fsize > iter_off) + iov_iter_advance(&wreq->io_iter, fsize - iter_off); atomic64_set(&wreq->issued_to, fpos + fsize); if (!debug) From acd5f76fd5292c91628e04da83e8b78c986cfa2b Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 27 Sep 2024 16:17:41 +0200 Subject: [PATCH 549/655] HID: bpf: fix cfi stubs for hid_bpf_ops With the introduction of commit e42ac1418055 ("bpf: Check unsupported ops from the bpf_struct_ops's cfi_stubs"), a HID-BPF struct_ops containing a .hid_hw_request() or a .hid_hw_output_report() was failing to load as the cfi stubs were not defined. Fix that by defining those simple static functions and restore HID-BPF functionality. This was detected with the HID selftests suddenly failing on Linus' tree. Cc: stable@vger.kernel.org # v6.11+ Fixes: 9286675a2aed ("HID: bpf: add HID-BPF hooks for hid_hw_output_report") Fixes: 8bd0488b5ea5 ("HID: bpf: add HID-BPF hooks for hid_hw_raw_requests") Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/bpf/hid_bpf_struct_ops.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/hid/bpf/hid_bpf_struct_ops.c b/drivers/hid/bpf/hid_bpf_struct_ops.c index cd696c59ba0f..702c22fae136 100644 --- a/drivers/hid/bpf/hid_bpf_struct_ops.c +++ b/drivers/hid/bpf/hid_bpf_struct_ops.c @@ -276,9 +276,23 @@ static int __hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx) return 0; } +static int __hid_bpf_hw_request(struct hid_bpf_ctx *ctx, unsigned char reportnum, + enum hid_report_type rtype, enum hid_class_request reqtype, + u64 source) +{ + return 0; +} + +static int __hid_bpf_hw_output_report(struct hid_bpf_ctx *ctx, u64 source) +{ + return 0; +} + static struct hid_bpf_ops __bpf_hid_bpf_ops = { .hid_device_event = __hid_bpf_device_event, .hid_rdesc_fixup = __hid_bpf_rdesc_fixup, + .hid_hw_request = __hid_bpf_hw_request, + .hid_hw_output_report = __hid_bpf_hw_output_report, }; static struct bpf_struct_ops bpf_hid_bpf_ops = { From 4b721fcc094e9eb6dd4702df8d79ab11e120833d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 24 Sep 2024 13:47:23 +0200 Subject: [PATCH 550/655] selftests: vDSO: align stack for O2-optimized memcpy When switching on -O2, gcc generates SSE2 instructions that assume a 16-byte aligned stack, which the standalone test's start point wasn't aligning. Fix this with the usual alignment sequence. Fixes: ecb8bd70d51 ("selftests: vDSO: build tests with O2 optimization") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202409241558.98e13f6f-oliver.sang@intel.com Signed-off-by: Jason A. Donenfeld Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_standalone_test_x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c index 27f6fdf11969..644915862af8 100644 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -131,6 +131,8 @@ asm ( "_start:\n\t" #ifdef __x86_64__ "mov %rsp,%rdi\n\t" + "and $-16,%rsp\n\t" + "sub $8,%rsp\n\t" "jmp c_main" #else "push %esp\n\t" From 0c33037c825e47f64f426999db7192604e6d3188 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 27 Sep 2024 13:54:23 +0200 Subject: [PATCH 551/655] ovl: fix file leak in ovl_real_fdget_meta() ovl_open_realfile() is wrongly called twice after conversion to new struct fd. Fixes: 88a2f6468d01 ("struct fd: representation change") Reported-by: syzbot+d9efec94dcbfa0de1c07@syzkaller.appspotmail.com Signed-off-by: Amir Goldstein Signed-off-by: Linus Torvalds --- fs/overlayfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 2b7a5a3a7a2f..4504493b20be 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -117,7 +117,7 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real, struct file *f = ovl_open_realfile(file, &realpath); if (IS_ERR(f)) return PTR_ERR(f); - real->word = (unsigned long)ovl_open_realfile(file, &realpath) | FDPUT_FPUT; + real->word = (unsigned long)f | FDPUT_FPUT; return 0; } From 1bbcfe620e03aafc542b1c649dea0a9499a4490f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 27 Sep 2024 10:02:39 -1000 Subject: [PATCH 552/655] sched_ext: Relocate check_hotplug_seq() call in scx_ops_enable() check_hotplug_seq() is used to detect CPU hotplug event which occurred while the BPF scheduler is being loaded so that initialization can be retried if CPU hotplug events take place before the CPU hotplug callbacks are online. As such, the best place to call it is in the same cpu_read_lock() section that enables the CPU hotplug ops. Currently, it is called in the next cpus_read_lock() block in scx_ops_enable(). The side effect of this placement is a small window in which hotplug sequence detection can trigger unnecessarily, which isn't critical. Move check_hotplug_seq() invocation to the same cpus_read_lock() block as the hotplug operation enablement to close the window and get the invocation out of the way for planned locking updates. Signed-off-by: Tejun Heo Cc: David Vernet --- kernel/sched/ext.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index d6f6bf6caecc..e8ab7e5ee2dd 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5050,6 +5050,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (((void (**)(void))ops)[i]) static_branch_enable_cpuslocked(&scx_has_op[i]); + check_hotplug_seq(ops); cpus_read_unlock(); ret = validate_ops(ops); @@ -5098,8 +5099,6 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) cpus_read_lock(); scx_cgroup_lock(); - check_hotplug_seq(ops); - for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++) if (((void (**)(void))ops)[i]) static_branch_enable_cpuslocked(&scx_has_op[i]); From fc1fcebead344360979ea9407029f9c8a99d718f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 27 Sep 2024 10:02:39 -1000 Subject: [PATCH 553/655] sched_ext: Remove SCX_OPS_PREPPING The distinction between SCX_OPS_PREPPING and SCX_OPS_ENABLING is not used anywhere and only adds confusion. Drop SCX_OPS_PREPPING. Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index e8ab7e5ee2dd..daeb8446ff32 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -779,7 +779,6 @@ enum scx_tg_flags { }; enum scx_ops_enable_state { - SCX_OPS_PREPPING, SCX_OPS_ENABLING, SCX_OPS_ENABLED, SCX_OPS_DISABLING, @@ -787,7 +786,6 @@ enum scx_ops_enable_state { }; static const char *scx_ops_enable_state_str[] = { - [SCX_OPS_PREPPING] = "prepping", [SCX_OPS_ENABLING] = "enabling", [SCX_OPS_ENABLED] = "enabled", [SCX_OPS_DISABLING] = "disabling", @@ -5016,12 +5014,12 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) } /* - * Set scx_ops, transition to PREPPING and clear exit info to arm the + * Set scx_ops, transition to ENABLING and clear exit info to arm the * disable path. Failure triggers full disabling from here on. */ scx_ops = *ops; - WARN_ON_ONCE(scx_ops_set_enable_state(SCX_OPS_PREPPING) != + WARN_ON_ONCE(scx_ops_set_enable_state(SCX_OPS_ENABLING) != SCX_OPS_DISABLED); atomic_set(&scx_exit_kind, SCX_EXIT_NONE); @@ -5174,23 +5172,6 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) */ preempt_disable(); - /* - * From here on, the disable path must assume that tasks have ops - * enabled and need to be recovered. - * - * Transition to ENABLING fails iff the BPF scheduler has already - * triggered scx_bpf_error(). Returning an error code here would lose - * the recorded error information. Exit indicating success so that the - * error is notified through ops.exit() with all the details. - */ - if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLING, SCX_OPS_PREPPING)) { - preempt_enable(); - spin_unlock_irq(&scx_tasks_lock); - WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE); - ret = 0; - goto err_disable_unlock_all; - } - /* * We're fully committed and can't fail. The PREPPED -> ENABLED * transitions here are synchronized against sched_ext_free() through @@ -5221,7 +5202,11 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) cpus_read_unlock(); percpu_up_write(&scx_fork_rwsem); - /* see above ENABLING transition for the explanation on exiting with 0 */ + /* + * Returning an error code here would lose the recorded error + * information. Exit indicating success so that the error is notified + * through ops.exit() with all the details. + */ if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) { WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE); ret = 0; From 8c2090c504e998c8f34ec870bae71dafcc96a6e0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 27 Sep 2024 10:02:40 -1000 Subject: [PATCH 554/655] sched_ext: Initialize in bypass mode scx_ops_enable() used preempt_disable() around the task iteration loop to switch tasks into SCX to guarantee forward progress of the task which is running scx_ops_enable(). However, in the gap between setting __scx_ops_enabled and preeempt_disable(), an external entity can put tasks including the enabling one into SCX prematurely, which can lead to malfunctions including stalls. The bypass mode can wrap the entire enabling operation and guarantee forward progress no matter what the BPF scheduler does. Use the bypass mode instead to guarantee forward progress while enabling. While at it, release and regrab scx_tasks_lock between the two task iteration locks in scx_ops_enable() for clarity as there is no reason to keep holding the lock between them. Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index daeb8446ff32..00883f3ef647 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5075,6 +5075,14 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) queue_delayed_work(system_unbound_wq, &scx_watchdog_work, scx_watchdog_timeout / 2); + /* + * Once __scx_ops_enabled is set, %current can be switched to SCX + * anytime. This can lead to stalls as some BPF schedulers (e.g. + * userspace scheduling) may not function correctly before all tasks are + * switched. Init in bypass mode to guarantee forward progress. + */ + scx_ops_bypass(true); + /* * Lock out forks, cgroup on/offlining and moves before opening the * floodgate so that they don't wander into the operations prematurely. @@ -5134,7 +5142,6 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) * disabled. */ spin_lock_irq(&scx_tasks_lock); - scx_task_iter_init(&sti); while ((p = scx_task_iter_next_locked(&sti))) { /* @@ -5163,22 +5170,19 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) spin_lock_irq(&scx_tasks_lock); } scx_task_iter_exit(&sti); + spin_unlock_irq(&scx_tasks_lock); /* - * All tasks are prepped but are still ops-disabled. Ensure that - * %current can't be scheduled out and switch everyone. - * preempt_disable() is necessary because we can't guarantee that - * %current won't be starved if scheduled out while switching. + * All tasks are prepped but the tasks are not enabled. Switch everyone. */ - preempt_disable(); + WRITE_ONCE(scx_switching_all, !(ops->flags & SCX_OPS_SWITCH_PARTIAL)); /* * We're fully committed and can't fail. The PREPPED -> ENABLED * transitions here are synchronized against sched_ext_free() through * scx_tasks_lock. */ - WRITE_ONCE(scx_switching_all, !(ops->flags & SCX_OPS_SWITCH_PARTIAL)); - + spin_lock_irq(&scx_tasks_lock); scx_task_iter_init(&sti); while ((p = scx_task_iter_next_locked(&sti))) { const struct sched_class *old_class = p->sched_class; @@ -5195,12 +5199,12 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) check_class_changed(task_rq(p), p, old_class, p->prio); } scx_task_iter_exit(&sti); - spin_unlock_irq(&scx_tasks_lock); - preempt_enable(); + scx_cgroup_unlock(); cpus_read_unlock(); percpu_up_write(&scx_fork_rwsem); + scx_ops_bypass(false); /* * Returning an error code here would lose the recorded error @@ -5241,6 +5245,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) err_disable_unlock_all: scx_cgroup_unlock(); percpu_up_write(&scx_fork_rwsem); + scx_ops_bypass(false); err_disable_unlock_cpus: cpus_read_unlock(); err_disable: From 9753358a6a2b011478e8efdabbb489216252426f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 27 Sep 2024 10:02:40 -1000 Subject: [PATCH 555/655] sched_ext: Fix SCX_TASK_INIT -> SCX_TASK_READY transitions in scx_ops_enable() scx_ops_enable() has two task iteration loops. The first one calls scx_ops_init_task() on every task and the latter switches the eligible ones into SCX. The first loop left the tasks in SCX_TASK_INIT state and then the second loop switched it into READY before switching the task into SCX. The distinction between INIT and READY is only meaningful in the fork path where it's used to tell whether the task finished forking so that we can tell ops.exit_task() accordingly. Leaving task in INIT state between the two loops is incosistent with the fork path and incorrect. The following can be triggered by running a program which keeps toggling a task between SCHED_OTHER and SCHED_SCX while enabling a task: sched_ext: Invalid task state transition 1 -> 3 for fish[1526] WARNING: CPU: 2 PID: 1615 at kernel/sched/ext.c:3393 scx_ops_enable_task+0x1a1/0x200 ... Sched_ext: qmap (enabling+all) RIP: 0010:scx_ops_enable_task+0x1a1/0x200 ... switching_to_scx+0x13/0xa0 __sched_setscheduler+0x850/0xa50 do_sched_setscheduler+0x104/0x1c0 __x64_sys_sched_setscheduler+0x18/0x30 do_syscall_64+0x7b/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fix it by transitioning to READY in the first loop right after scx_ops_init_task() succeeds. Signed-off-by: Tejun Heo Cc: David Vernet --- kernel/sched/ext.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 00883f3ef647..e83af19de59d 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5166,6 +5166,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) goto err_disable_unlock_all; } + scx_set_task_state(p, SCX_TASK_READY); + put_task_struct(p); spin_lock_irq(&scx_tasks_lock); } @@ -5178,7 +5180,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) WRITE_ONCE(scx_switching_all, !(ops->flags & SCX_OPS_SWITCH_PARTIAL)); /* - * We're fully committed and can't fail. The PREPPED -> ENABLED + * We're fully committed and can't fail. The task READY -> ENABLED * transitions here are synchronized against sched_ext_free() through * scx_tasks_lock. */ @@ -5190,7 +5192,6 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx); - scx_set_task_state(p, SCX_TASK_READY); __setscheduler_prio(p, p->prio); check_class_changing(task_rq(p), p, old_class); From 4269c603cc26df154e0db303a9347e6ec3cc805e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 27 Sep 2024 10:02:40 -1000 Subject: [PATCH 556/655] sched_ext: Enable scx_ops_init_task() separately scx_ops_init_task() and the follow-up scx_ops_enable_task() in the fork path were gated by scx_enabled() test and thus __scx_ops_enabled had to be turned on before the first scx_ops_init_task() loop in scx_ops_enable(). However, if an external entity causes sched_class switch before the loop is complete, tasks which are not initialized could be switched to SCX. The following can be reproduced by running a program which keeps toggling a process between SCHED_OTHER and SCHED_EXT using sched_setscheduler(2). sched_ext: Invalid task state transition 0 -> 3 for fish[1623] WARNING: CPU: 1 PID: 1650 at kernel/sched/ext.c:3392 scx_ops_enable_task+0x1a1/0x200 ... Sched_ext: simple (enabling) RIP: 0010:scx_ops_enable_task+0x1a1/0x200 ... switching_to_scx+0x13/0xa0 __sched_setscheduler+0x850/0xa50 do_sched_setscheduler+0x104/0x1c0 __x64_sys_sched_setscheduler+0x18/0x30 do_syscall_64+0x7b/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fix it by gating scx_ops_init_task() separately using scx_ops_init_task_enabled. __scx_ops_enabled is now set after all tasks are finished with scx_ops_init_task(). Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index e83af19de59d..7729594882d9 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -853,6 +853,7 @@ DEFINE_STATIC_KEY_FALSE(__scx_ops_enabled); DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem); static atomic_t scx_ops_enable_state_var = ATOMIC_INIT(SCX_OPS_DISABLED); static atomic_t scx_ops_bypass_depth = ATOMIC_INIT(0); +static bool scx_ops_init_task_enabled; static bool scx_switching_all; DEFINE_STATIC_KEY_FALSE(__scx_switched_all); @@ -3565,7 +3566,7 @@ int scx_fork(struct task_struct *p) { percpu_rwsem_assert_held(&scx_fork_rwsem); - if (scx_enabled()) + if (scx_ops_init_task_enabled) return scx_ops_init_task(p, task_group(p), true); else return 0; @@ -3573,7 +3574,7 @@ int scx_fork(struct task_struct *p) void scx_post_fork(struct task_struct *p) { - if (scx_enabled()) { + if (scx_ops_init_task_enabled) { scx_set_task_state(p, SCX_TASK_READY); /* @@ -4453,6 +4454,8 @@ static void scx_ops_disable_workfn(struct kthread_work *work) cpus_read_lock(); scx_cgroup_lock(); + scx_ops_init_task_enabled = false; + spin_lock_irq(&scx_tasks_lock); scx_task_iter_init(&sti); /* @@ -5132,7 +5135,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (ret) goto err_disable_unlock_all; - static_branch_enable_cpuslocked(&__scx_ops_enabled); + WARN_ON_ONCE(scx_ops_init_task_enabled); + scx_ops_init_task_enabled = true; /* * Enable ops for every task. Fork is excluded by scx_fork_rwsem @@ -5175,9 +5179,11 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) spin_unlock_irq(&scx_tasks_lock); /* - * All tasks are prepped but the tasks are not enabled. Switch everyone. + * All tasks are READY. It's safe to turn on scx_enabled() and switch + * all eligible tasks. */ WRITE_ONCE(scx_switching_all, !(ops->flags & SCX_OPS_SWITCH_PARTIAL)); + static_branch_enable_cpuslocked(&__scx_ops_enabled); /* * We're fully committed and can't fail. The task READY -> ENABLED From 568894edbe48f0878f787ed533dc9dbfd09c0fbe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 27 Sep 2024 10:02:40 -1000 Subject: [PATCH 557/655] sched_ext: Add scx_cgroup_enabled to gate cgroup operations and fix scx_tg_online() If the BPF scheduler does not implement ops.cgroup_init(), scx_tg_online() didn't set SCX_TG_INITED which meant that ops.cgroup_exit(), even if implemented, won't be called from scx_tg_offline(). This is because SCX_HAS_OP(cgroupt_init) is used to test both whether SCX cgroup operations are enabled and ops.cgroup_init() exists. Fix it by introducing a separate bool scx_cgroup_enabled to gate cgroup operations and use SCX_HAS_OP(cgroup_init) only to test whether ops.cgroup_init() exists. Make all cgroup operations consistently use scx_cgroup_enabled to test whether cgroup operations are enabled. scx_cgroup_enabled is added instead of using scx_enabled() to ease planned locking updates. Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7729594882d9..f9675ced75e8 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3706,6 +3706,7 @@ bool scx_can_stop_tick(struct rq *rq) #ifdef CONFIG_EXT_GROUP_SCHED DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem); +static bool scx_cgroup_enabled; static bool cgroup_warned_missing_weight; static bool cgroup_warned_missing_idle; @@ -3725,8 +3726,7 @@ static void scx_cgroup_warn_missing_weight(struct task_group *tg) static void scx_cgroup_warn_missing_idle(struct task_group *tg) { - if (scx_ops_enable_state() == SCX_OPS_DISABLED || - cgroup_warned_missing_idle) + if (!scx_cgroup_enabled || cgroup_warned_missing_idle) return; if (!tg->idle) @@ -3747,15 +3747,18 @@ int scx_tg_online(struct task_group *tg) scx_cgroup_warn_missing_weight(tg); - if (SCX_HAS_OP(cgroup_init)) { - struct scx_cgroup_init_args args = { .weight = tg->scx_weight }; + if (scx_cgroup_enabled) { + if (SCX_HAS_OP(cgroup_init)) { + struct scx_cgroup_init_args args = + { .weight = tg->scx_weight }; - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, - tg->css.cgroup, &args); - if (!ret) + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, + tg->css.cgroup, &args); + if (ret) + ret = ops_sanitize_err("cgroup_init", ret); + } + if (ret == 0) tg->scx_flags |= SCX_TG_ONLINE | SCX_TG_INITED; - else - ret = ops_sanitize_err("cgroup_init", ret); } else { tg->scx_flags |= SCX_TG_ONLINE; } @@ -3786,7 +3789,7 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset) /* released in scx_finish/cancel_attach() */ percpu_down_read(&scx_cgroup_rwsem); - if (!scx_enabled()) + if (!scx_cgroup_enabled) return 0; cgroup_taskset_for_each(p, css, tset) { @@ -3829,7 +3832,7 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset) void scx_move_task(struct task_struct *p) { - if (!scx_enabled()) + if (!scx_cgroup_enabled) return; /* @@ -3865,7 +3868,7 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) struct cgroup_subsys_state *css; struct task_struct *p; - if (!scx_enabled()) + if (!scx_cgroup_enabled) goto out_unlock; cgroup_taskset_for_each(p, css, tset) { @@ -3882,7 +3885,7 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight) { percpu_down_read(&scx_cgroup_rwsem); - if (tg->scx_weight != weight) { + if (scx_cgroup_enabled && tg->scx_weight != weight) { if (SCX_HAS_OP(cgroup_set_weight)) SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight, tg_cgrp(tg), weight); @@ -4054,6 +4057,9 @@ static void scx_cgroup_exit(void) percpu_rwsem_assert_held(&scx_cgroup_rwsem); + WARN_ON_ONCE(!scx_cgroup_enabled); + scx_cgroup_enabled = false; + /* * scx_tg_on/offline() are excluded through scx_cgroup_rwsem. If we walk * cgroups and exit all the inited ones, all online cgroups are exited. @@ -4129,6 +4135,9 @@ static int scx_cgroup_init(void) } rcu_read_unlock(); + WARN_ON_ONCE(scx_cgroup_enabled); + scx_cgroup_enabled = true; + return 0; } From 160216568cddc9d6e7f36133ba41d25459d90de4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 27 Sep 2024 10:02:40 -1000 Subject: [PATCH 558/655] sched_ext: Decouple locks in scx_ops_disable_workfn() The disable path uses three big locks - scx_fork_rwsem, scx_cgroup_rwsem and cpus_read_lock. Currently, the locks are grabbed together which is prone to locking order problems. With the preceding scx_cgroup_enabled change, we can decouple them: - As cgroup disabling no longer requires modifying a static_key which requires cpus_read_lock(), no need to grab cpus_read_lock() before grabbing scx_cgroup_rwsem. - cgroup can now be independently disabled before tasks are moved back to the fair class. Relocate scx_cgroup_exit() invocation before scx_fork_rwsem is grabbed, drop now unnecessary cpus_read_lock() and move static_key operations out of scx_fork_rwsem. This decouples all three locks in the disable path. Signed-off-by: Tejun Heo Reported-and-tested-by: Aboorva Devarajan Link: http://lkml.kernel.org/r/8cd0ec0c4c7c1bc0119e61fbef0bee9d5e24022d.camel@linux.ibm.com --- kernel/sched/ext.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index f9675ced75e8..7d0ce7e6ea1f 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4456,21 +4456,23 @@ static void scx_ops_disable_workfn(struct kthread_work *work) WRITE_ONCE(scx_switching_all, false); /* - * Avoid racing against fork and cgroup changes. See scx_ops_enable() - * for explanation on the locking order. + * Shut down cgroup support before tasks so that the cgroup attach path + * doesn't race against scx_ops_exit_task(). + */ + scx_cgroup_lock(); + scx_cgroup_exit(); + scx_cgroup_unlock(); + + /* + * The BPF scheduler is going away. All tasks including %TASK_DEAD ones + * must be switched out and exited synchronously. */ percpu_down_write(&scx_fork_rwsem); - cpus_read_lock(); - scx_cgroup_lock(); scx_ops_init_task_enabled = false; spin_lock_irq(&scx_tasks_lock); scx_task_iter_init(&sti); - /* - * The BPF scheduler is going away. All tasks including %TASK_DEAD ones - * must be switched out and exited synchronously. - */ while ((p = scx_task_iter_next_locked(&sti))) { const struct sched_class *old_class = p->sched_class; struct sched_enq_and_set_ctx ctx; @@ -4488,23 +4490,18 @@ static void scx_ops_disable_workfn(struct kthread_work *work) } scx_task_iter_exit(&sti); spin_unlock_irq(&scx_tasks_lock); + percpu_up_write(&scx_fork_rwsem); /* no task is on scx, turn off all the switches and flush in-progress calls */ - static_branch_disable_cpuslocked(&__scx_ops_enabled); + static_branch_disable(&__scx_ops_enabled); for (i = SCX_OPI_BEGIN; i < SCX_OPI_END; i++) - static_branch_disable_cpuslocked(&scx_has_op[i]); - static_branch_disable_cpuslocked(&scx_ops_enq_last); - static_branch_disable_cpuslocked(&scx_ops_enq_exiting); - static_branch_disable_cpuslocked(&scx_ops_cpu_preempt); - static_branch_disable_cpuslocked(&scx_builtin_idle_enabled); + static_branch_disable(&scx_has_op[i]); + static_branch_disable(&scx_ops_enq_last); + static_branch_disable(&scx_ops_enq_exiting); + static_branch_disable(&scx_ops_cpu_preempt); + static_branch_disable(&scx_builtin_idle_enabled); synchronize_rcu(); - scx_cgroup_exit(); - - scx_cgroup_unlock(); - cpus_read_unlock(); - percpu_up_write(&scx_fork_rwsem); - if (ei->kind >= SCX_EXIT_ERROR) { pr_err("sched_ext: BPF scheduler \"%s\" disabled (%s)\n", scx_ops.name, ei->reason); From efe231d9debf6db812bebb262407c95b21cdb8a2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 27 Sep 2024 10:02:40 -1000 Subject: [PATCH 559/655] sched_ext: Decouple locks in scx_ops_enable() The enable path uses three big locks - scx_fork_rwsem, scx_cgroup_rwsem and cpus_read_lock. Currently, the locks are grabbed together which is prone to locking order problems. For example, currently, there is a possible deadlock involving scx_fork_rwsem and cpus_read_lock. cpus_read_lock has to nest inside scx_fork_rwsem due to locking order existing in other subsystems. However, there exists a dependency in the other direction during hotplug if hotplug needs to fork a new task, which happens in some cases. This leads to the following deadlock: scx_ops_enable() hotplug percpu_down_write(&cpu_hotplug_lock) percpu_down_write(&scx_fork_rwsem) block on cpu_hotplug_lock kthread_create() waits for kthreadd kthreadd blocks on scx_fork_rwsem Note that this doesn't trigger lockdep because the hotplug side dependency bounces through kthreadd. With the preceding scx_cgroup_enabled change, this can be solved by decoupling cpus_read_lock, which is needed for static_key manipulations, from the other two locks. - Move the first block of static_key manipulations outside of scx_fork_rwsem and scx_cgroup_rwsem. This is now safe with the preceding scx_cgroup_enabled change. - Drop scx_cgroup_rwsem and scx_fork_rwsem between the two task iteration blocks so that __scx_ops_enabled static_key enabling is outside the two rwsems. Signed-off-by: Tejun Heo Reported-and-tested-by: Aboorva Devarajan Link: http://lkml.kernel.org/r/8cd0ec0c4c7c1bc0119e61fbef0bee9d5e24022d.camel@linux.ibm.com --- kernel/sched/ext.c | 67 +++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 40 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7d0ce7e6ea1f..0c398ecdb938 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5049,7 +5049,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init); if (ret) { ret = ops_sanitize_err("init", ret); - goto err_disable_unlock_cpus; + cpus_read_unlock(); + goto err_disable; } } @@ -5092,54 +5093,30 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) */ scx_ops_bypass(true); - /* - * Lock out forks, cgroup on/offlining and moves before opening the - * floodgate so that they don't wander into the operations prematurely. - * - * We don't need to keep the CPUs stable but static_branch_*() requires - * cpus_read_lock() and scx_cgroup_rwsem must nest inside - * cpu_hotplug_lock because of the following dependency chain: - * - * cpu_hotplug_lock --> cgroup_threadgroup_rwsem --> scx_cgroup_rwsem - * - * So, we need to do cpus_read_lock() before scx_cgroup_lock() and use - * static_branch_*_cpuslocked(). - * - * Note that cpu_hotplug_lock must nest inside scx_fork_rwsem due to the - * following dependency chain: - * - * scx_fork_rwsem --> pernet_ops_rwsem --> cpu_hotplug_lock - */ - percpu_down_write(&scx_fork_rwsem); - cpus_read_lock(); - scx_cgroup_lock(); - for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++) if (((void (**)(void))ops)[i]) - static_branch_enable_cpuslocked(&scx_has_op[i]); + static_branch_enable(&scx_has_op[i]); if (ops->flags & SCX_OPS_ENQ_LAST) - static_branch_enable_cpuslocked(&scx_ops_enq_last); + static_branch_enable(&scx_ops_enq_last); if (ops->flags & SCX_OPS_ENQ_EXITING) - static_branch_enable_cpuslocked(&scx_ops_enq_exiting); + static_branch_enable(&scx_ops_enq_exiting); if (scx_ops.cpu_acquire || scx_ops.cpu_release) - static_branch_enable_cpuslocked(&scx_ops_cpu_preempt); + static_branch_enable(&scx_ops_cpu_preempt); if (!ops->update_idle || (ops->flags & SCX_OPS_KEEP_BUILTIN_IDLE)) { reset_idle_masks(); - static_branch_enable_cpuslocked(&scx_builtin_idle_enabled); + static_branch_enable(&scx_builtin_idle_enabled); } else { - static_branch_disable_cpuslocked(&scx_builtin_idle_enabled); + static_branch_disable(&scx_builtin_idle_enabled); } /* - * All cgroups should be initialized before letting in tasks. cgroup - * on/offlining and task migrations are already locked out. + * Lock out forks, cgroup on/offlining and moves before opening the + * floodgate so that they don't wander into the operations prematurely. */ - ret = scx_cgroup_init(); - if (ret) - goto err_disable_unlock_all; + percpu_down_write(&scx_fork_rwsem); WARN_ON_ONCE(scx_ops_init_task_enabled); scx_ops_init_task_enabled = true; @@ -5150,7 +5127,18 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) * leaving as sched_ext_free() can handle both prepped and enabled * tasks. Prep all tasks first and then enable them with preemption * disabled. + * + * All cgroups should be initialized before scx_ops_init_task() so that + * the BPF scheduler can reliably track each task's cgroup membership + * from scx_ops_init_task(). Lock out cgroup on/offlining and task + * migrations while tasks are being initialized so that + * scx_cgroup_can_attach() never sees uninitialized tasks. */ + scx_cgroup_lock(); + ret = scx_cgroup_init(); + if (ret) + goto err_disable_unlock_all; + spin_lock_irq(&scx_tasks_lock); scx_task_iter_init(&sti); while ((p = scx_task_iter_next_locked(&sti))) { @@ -5183,19 +5171,22 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) } scx_task_iter_exit(&sti); spin_unlock_irq(&scx_tasks_lock); + scx_cgroup_unlock(); + percpu_up_write(&scx_fork_rwsem); /* * All tasks are READY. It's safe to turn on scx_enabled() and switch * all eligible tasks. */ WRITE_ONCE(scx_switching_all, !(ops->flags & SCX_OPS_SWITCH_PARTIAL)); - static_branch_enable_cpuslocked(&__scx_ops_enabled); + static_branch_enable(&__scx_ops_enabled); /* * We're fully committed and can't fail. The task READY -> ENABLED * transitions here are synchronized against sched_ext_free() through * scx_tasks_lock. */ + percpu_down_write(&scx_fork_rwsem); spin_lock_irq(&scx_tasks_lock); scx_task_iter_init(&sti); while ((p = scx_task_iter_next_locked(&sti))) { @@ -5213,10 +5204,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) } scx_task_iter_exit(&sti); spin_unlock_irq(&scx_tasks_lock); - - scx_cgroup_unlock(); - cpus_read_unlock(); percpu_up_write(&scx_fork_rwsem); + scx_ops_bypass(false); /* @@ -5259,8 +5248,6 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) scx_cgroup_unlock(); percpu_up_write(&scx_fork_rwsem); scx_ops_bypass(false); -err_disable_unlock_cpus: - cpus_read_unlock(); err_disable: mutex_unlock(&scx_ops_enable_mutex); /* must be fully disabled before returning */ From 95b873693a0841e02b812e693296a884362fdd51 Mon Sep 17 00:00:00 2001 From: Zhang Qiao Date: Thu, 26 Sep 2024 18:39:49 +0800 Subject: [PATCH 560/655] sched_ext: Remove redundant p->nr_cpus_allowed checker select_rq_task() already checked that 'p->nr_cpus_allowed > 1', 'p->nr_cpus_allowed == 1' checker in scx_select_cpu_dfl() is redundant. Signed-off-by: Zhang Qiao Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 0c398ecdb938..3cd7c50a51c5 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3074,22 +3074,13 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, * there is an idle core elsewhere on the system. */ cpu = smp_processor_id(); - if ((wake_flags & SCX_WAKE_SYNC) && p->nr_cpus_allowed > 1 && + if ((wake_flags & SCX_WAKE_SYNC) && !cpumask_empty(idle_masks.cpu) && !(current->flags & PF_EXITING) && cpu_rq(cpu)->scx.local_dsq.nr == 0) { if (cpumask_test_cpu(cpu, p->cpus_ptr)) goto cpu_found; } - if (p->nr_cpus_allowed == 1) { - if (test_and_clear_cpu_idle(prev_cpu)) { - cpu = prev_cpu; - goto cpu_found; - } else { - return prev_cpu; - } - } - /* * If CPU has SMT, any wholly idle CPU is likely a better pick than * partially idle @prev_cpu. From 9cf14f5a2746c19455ce9cb44341b5527b5e19c3 Mon Sep 17 00:00:00 2001 From: Andrey Shumilin Date: Fri, 27 Sep 2024 22:34:24 +0300 Subject: [PATCH 561/655] fbdev: sisfb: Fix strbuf array overflow The values of the variables xres and yres are placed in strbuf. These variables are obtained from strbuf1. The strbuf1 array contains digit characters and a space if the array contains non-digit characters. Then, when executing sprintf(strbuf, "%ux%ux8", xres, yres); more than 16 bytes will be written to strbuf. It is suggested to increase the size of the strbuf array to 24. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Andrey Shumilin Signed-off-by: Helge Deller --- drivers/video/fbdev/sis/sis_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index 009bf1d92644..75033e6be15a 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -183,7 +183,7 @@ static void sisfb_search_mode(char *name, bool quiet) { unsigned int j = 0, xres = 0, yres = 0, depth = 0, rate = 0; int i = 0; - char strbuf[16], strbuf1[20]; + char strbuf[24], strbuf1[20]; char *nameptr = name; /* We don't know the hardware specs yet and there is no ivideo */ From f890c8513f45e06c96ac225db3cdfa34e3be5f45 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 16:40:47 -0400 Subject: [PATCH 562/655] bcachefs: Mark inode errors as autofix Most or all errors will be autofix in the future, we're currently just doing the ones that we know are well tested. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index f0c14702f9e6..4e4e132d6d23 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -213,19 +213,19 @@ enum bch_fsck_flags { x(inode_checksum_type_invalid, 199, 0) \ x(inode_compression_type_invalid, 200, 0) \ x(inode_subvol_root_but_not_dir, 201, 0) \ - x(inode_i_size_dirty_but_clean, 202, 0) \ - x(inode_i_sectors_dirty_but_clean, 203, 0) \ - x(inode_i_sectors_wrong, 204, 0) \ - x(inode_dir_wrong_nlink, 205, 0) \ - x(inode_dir_multiple_links, 206, 0) \ - x(inode_multiple_links_but_nlink_0, 207, 0) \ - x(inode_wrong_backpointer, 208, 0) \ - x(inode_wrong_nlink, 209, 0) \ - x(inode_unreachable, 210, 0) \ - x(deleted_inode_but_clean, 211, 0) \ - x(deleted_inode_missing, 212, 0) \ - x(deleted_inode_is_dir, 213, 0) \ - x(deleted_inode_not_unlinked, 214, 0) \ + x(inode_i_size_dirty_but_clean, 202, FSCK_AUTOFIX) \ + x(inode_i_sectors_dirty_but_clean, 203, FSCK_AUTOFIX) \ + x(inode_i_sectors_wrong, 204, FSCK_AUTOFIX) \ + x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \ + x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ + x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ + x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ + x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ + x(inode_unreachable, 210, FSCK_AUTOFIX) \ + x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ + x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ + x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ + x(deleted_inode_not_unlinked, 214, FSCK_AUTOFIX) \ x(extent_overlapping, 215, 0) \ x(key_in_missing_inode, 216, 0) \ x(key_in_wrong_inode_type, 217, 0) \ @@ -255,7 +255,7 @@ enum bch_fsck_flags { x(dir_loop, 241, 0) \ x(hash_table_key_duplicate, 242, 0) \ x(hash_table_key_wrong_offset, 243, 0) \ - x(unlinked_inode_not_on_deleted_list, 244, 0) \ + x(unlinked_inode_not_on_deleted_list, 244, FSCK_AUTOFIX) \ x(reflink_p_front_pad_bad, 245, 0) \ x(journal_entry_dup_same_device, 246, 0) \ x(inode_bi_subvol_missing, 247, 0) \ @@ -282,8 +282,8 @@ enum bch_fsck_flags { x(btree_ptr_v2_written_0, 268, 0) \ x(subvol_snapshot_bad, 269, 0) \ x(subvol_inode_bad, 270, 0) \ - x(alloc_key_stripe_sectors_wrong, 271, 0) \ - x(accounting_mismatch, 272, 0) \ + x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \ + x(accounting_mismatch, 272, FSCK_AUTOFIX) \ x(accounting_replicas_not_marked, 273, 0) \ x(invalid_btree_id, 274, 0) \ x(alloc_key_io_time_bad, 275, 0) \ From 4a8f8fafbd6ba6f3433c986b00195e0a8dee96bf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 17:30:59 -0400 Subject: [PATCH 563/655] bcachefs: Add extra padding in bkey_make_mut_noupdate() This fixes a kasan splat in propagate_key_to_snapshot_leaves() - varint_decode_fast() does reads (that it never uses) up to 7 bytes past the end of the integer. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 60393e98084d..6a454f2fa005 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -220,7 +220,8 @@ static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *t if (type && k.k->type != type) return ERR_PTR(-ENOENT); - mut = bch2_trans_kmalloc_nomemzero(trans, bytes); + /* extra padding for varint_decode_fast... */ + mut = bch2_trans_kmalloc_nomemzero(trans, bytes + 8); if (!IS_ERR(mut)) { bkey_reassemble(mut, k); From 51b7cc7c0f964fed976399a3ab876ae4a308fb1b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 17:33:02 -0400 Subject: [PATCH 564/655] bcachefs: Improve bch2_is_inode_open() warning message Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 9b3470a97546..89129548aebb 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -963,7 +963,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c i return ret; } -static bool bch2_inode_open(struct bch_fs *c, struct bpos p) +static bool bch2_inode_is_open(struct bch_fs *c, struct bpos p) { subvol_inum inum = { .subvol = snapshot_t(c, p.snapshot)->subvol, @@ -972,7 +972,7 @@ static bool bch2_inode_open(struct bch_fs *c, struct bpos p) /* snapshot tree corruption, can't safely delete */ if (!inum.subvol) { - bch_err_ratelimited(c, "%s(): snapshot %u has no subvol", __func__, p.snapshot); + bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot); return true; } @@ -1057,7 +1057,7 @@ static int check_inode(struct btree_trans *trans, } if (u.bi_flags & BCH_INODE_unlinked && - !bch2_inode_open(c, k.k->p) && + !bch2_inode_is_open(c, k.k->p) && (!c->sb.clean || fsck_err(trans, inode_unlinked_but_clean, "filesystem marked clean, but inode %llu unlinked", From 0696a18a8cc3f0941efe64008a997dc4701f9587 Mon Sep 17 00:00:00 2001 From: Piotr Zalewski Date: Sun, 22 Sep 2024 15:18:01 +0000 Subject: [PATCH 565/655] bcachefs: memset bounce buffer portion to 0 after key_sort_fix_overlapping Zero-initialize part of allocated bounce buffer which wasn't touched by subsequent bch2_key_sort_fix_overlapping to mitigate later uinit-value use KMSAN bug[1]. After applying the patch reproducer still triggers stack overflow[2] but it seems unrelated to the uninit-value use warning. After further investigation it was found that stack overflow occurs because KMSAN adds too many function calls[3]. Backtrace of where the stack magic number gets smashed was added as a reply to syzkaller thread[3]. It was confirmed that task's stack magic number gets smashed after the code path where KSMAN detects uninit-value use is executed, so it can be assumed that it doesn't contribute in any way to uninit-value use detection. [1] https://syzkaller.appspot.com/bug?extid=6f655a60d3244d0c6718 [2] https://lore.kernel.org/lkml/66e57e46.050a0220.115905.0002.GAE@google.com [3] https://lore.kernel.org/all/rVaWgPULej8K7HqMPNIu8kVNyXNjjCiTB-QBtItLFBmk0alH6fV2tk4joVPk97Evnuv4ZRDd8HB5uDCkiFG6u81xKdzDj-KrtIMJSlF6Kt8=@proton.me Reported-by: syzbot+6f655a60d3244d0c6718@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6f655a60d3244d0c6718 Fixes: ec4edd7b9d20 ("bcachefs: Prep work for variable size btree node buffers") Suggested-by: Kent Overstreet Signed-off-by: Piotr Zalewski Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index cb48a9477514..2a0420605bd7 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1195,6 +1195,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, set_btree_bset(b, b->set, &b->data->keys); b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter); + memset((uint8_t *)(sorted + 1) + b->nr.live_u64s * sizeof(u64), 0, + btree_buf_bytes(b) - + sizeof(struct btree_node) - + b->nr.live_u64s * sizeof(u64)); u64s = le16_to_cpu(sorted->keys.u64s); *sorted = *b->data; From 18c520f408fa8f4b7379a108b1676052e82677aa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 18:41:46 -0400 Subject: [PATCH 566/655] bcachefs: Fix error path in check_dirent_inode_dirent() fsck_err() jumps to the fsck_err label when bailing out; need to make sure bp_iter was initialized... Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 89129548aebb..3508f8b5f06e 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1758,6 +1758,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; + struct btree_iter bp_iter = { NULL }; int ret = 0; if (inode_points_to_dirent(target, d)) @@ -1770,7 +1771,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, prt_printf(&buf, "\n "), bch2_inode_unpacked_to_text(&buf, target), buf.buf))) - goto out_noiter; + goto err; if (!target->bi_dir && !target->bi_dir_offset) { @@ -1779,7 +1780,6 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, return __bch2_fsck_write_inode(trans, target, target_snapshot); } - struct btree_iter bp_iter = { NULL }; struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); ret = bkey_err(bp_dirent); @@ -1840,7 +1840,6 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, err: fsck_err: bch2_trans_iter_exit(trans, &bp_iter); -out_noiter: printbuf_exit(&buf); bch_err_fn(c, ret); return ret; From c6040447c56496f4929db2d73ee445d898dd8a98 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 18:42:39 -0400 Subject: [PATCH 567/655] bcachefs: Fix srcu warning in check_topology check_topology doesn't need the srcu lock and doesn't use normal btree transactions - we can just drop the srcu lock. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index b5e0692f03c6..40b08fef3c39 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -513,6 +513,8 @@ int bch2_check_topology(struct bch_fs *c) struct bpos pulled_from_scan = POS_MIN; int ret = 0; + bch2_trans_srcu_unlock(trans); + for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { struct btree_root *r = bch2_btree_id_root(c, i); bool reconstructed_root = false; From 40d40c6bea19ff1e40fb3d33b35b354a5b35025f Mon Sep 17 00:00:00 2001 From: Diogo Jahchan Koike Date: Mon, 23 Sep 2024 19:22:14 -0300 Subject: [PATCH 568/655] bcachefs: assign return error when iterating through layout syzbot reported a null ptr deref in __copy_user [0] In __bch2_read_super, when a corrupt backup superblock matches the default opts offset, no error is assigned to ret and the freed superblock gets through, possibly being assigned as the best sb in bch2_fs_open and being later dereferenced, causing a fault. Assign EINVALID to ret when iterating through layout. [0]: https://syzkaller.appspot.com/bug?extid=18a5c5e8a9c856944876 Reported-by: syzbot+18a5c5e8a9c856944876@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=18a5c5e8a9c856944876 Signed-off-by: Diogo Jahchan Koike Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index d86d5dae54c9..b2e914841b5f 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -799,8 +799,10 @@ static int __bch2_read_super(const char *path, struct bch_opts *opts, i < layout.sb_offset + layout.nr_superblocks; i++) { offset = le64_to_cpu(*i); - if (offset == opt_get(*opts, sb)) + if (offset == opt_get(*opts, sb)) { + ret = -BCH_ERR_invalid; continue; + } ret = read_one_super(sb, offset, &err); if (!ret) From 2a1df873463a28fe5a053d6245290f9a907a5a17 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 22:06:04 -0400 Subject: [PATCH 569/655] bcachefs: Add snapshot to bch_inode_unpacked this allows for various cleanups in fsck Signed-off-by: Kent Overstreet --- fs/bcachefs/inode.c | 10 ++++++---- fs/bcachefs/inode.h | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 6ac0ff7e074b..1116db239708 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -320,9 +320,11 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k, int bch2_inode_unpack(struct bkey_s_c k, struct bch_inode_unpacked *unpacked) { - if (likely(k.k->type == KEY_TYPE_inode_v3)) - return bch2_inode_unpack_v3(k, unpacked); - return bch2_inode_unpack_slowpath(k, unpacked); + unpacked->bi_snapshot = k.k->p.snapshot; + + return likely(k.k->type == KEY_TYPE_inode_v3) + ? bch2_inode_unpack_v3(k, unpacked) + : bch2_inode_unpack_slowpath(k, unpacked); } int bch2_inode_peek_nowarn(struct btree_trans *trans, @@ -557,7 +559,7 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out, void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) { - prt_printf(out, "inum: %llu ", inode->bi_inum); + prt_printf(out, "inum: %llu:%u ", inode->bi_inum, inode->bi_snapshot); __bch2_inode_unpacked_to_text(out, inode); } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index f1fcb4c58039..695abd707cb6 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -69,6 +69,7 @@ typedef u64 u96; struct bch_inode_unpacked { u64 bi_inum; + u32 bi_snapshot; u64 bi_journal_seq; __le64 bi_hash_seed; u64 bi_size; From 951dd86e7c59a6490d8b6d056d8afd5f0cd49293 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 22:05:14 -0400 Subject: [PATCH 570/655] bcachefs: Fix iterator leak in check_subvol() A couple small error handling fixes Signed-off-by: Kent Overstreet --- fs/bcachefs/subvolume.c | 54 ++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index dbe834cb349f..6845dde1b339 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -92,34 +92,32 @@ static int check_subvol(struct btree_trans *trans, } struct bch_inode_unpacked inode; - struct btree_iter inode_iter = {}; - ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode, + ret = bch2_inode_find_by_inum_nowarn_trans(trans, (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) }, - 0); - bch2_trans_iter_exit(trans, &inode_iter); - - if (ret && !bch2_err_matches(ret, ENOENT)) - return ret; - - if (fsck_err_on(ret, - trans, subvol_to_missing_root, - "subvolume %llu points to missing subvolume root %llu:%u", - k.k->p.offset, le64_to_cpu(subvol.v->inode), - le32_to_cpu(subvol.v->snapshot))) { - ret = bch2_subvolume_delete(trans, iter->pos.offset); - bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); - return ret ?: -BCH_ERR_transaction_restart_nested; - } - - if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset, - trans, subvol_root_wrong_bi_subvol, - "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu", - inode.bi_inum, inode_iter.k.p.snapshot, - inode.bi_subvol, subvol.k->p.offset)) { - inode.bi_subvol = subvol.k->p.offset; - ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot)); - if (ret) + &inode); + if (!ret) { + if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset, + trans, subvol_root_wrong_bi_subvol, + "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu", + inode.bi_inum, inode.bi_snapshot, + inode.bi_subvol, subvol.k->p.offset)) { + inode.bi_subvol = subvol.k->p.offset; + ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot)); + if (ret) + goto err; + } + } else if (bch2_err_matches(ret, ENOENT)) { + if (fsck_err(trans, subvol_to_missing_root, + "subvolume %llu points to missing subvolume root %llu:%u", + k.k->p.offset, le64_to_cpu(subvol.v->inode), + le32_to_cpu(subvol.v->snapshot))) { + ret = bch2_subvolume_delete(trans, iter->pos.offset); + bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); + ret = ret ?: -BCH_ERR_transaction_restart_nested; goto err; + } + } else { + goto err; } if (!BCH_SUBVOLUME_SNAP(subvol.v)) { @@ -137,7 +135,7 @@ static int check_subvol(struct btree_trans *trans, "%s: snapshot tree %u not found", __func__, snapshot_tree); if (ret) - return ret; + goto err; if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, trans, subvol_not_master_and_not_snapshot, @@ -147,7 +145,7 @@ static int check_subvol(struct btree_trans *trans, bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); ret = PTR_ERR_OR_ZERO(s); if (ret) - return ret; + goto err; SET_BCH_SUBVOLUME_SNAP(&s->v, true); } From 3125c95ea69141c4cbbde854674c90531034ec47 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Tue, 24 Sep 2024 09:42:24 +0800 Subject: [PATCH 571/655] bcachefs: fast exit when darray_make_room failed In downgrade_table_extra, the return value is needed. When it return failed, we should exit immediately. Fixes: 7773df19c35f ("bcachefs: metadata version bucket_stripe_sectors") Signed-off-by: Hongbo Li Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-downgrade.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index c7e4cdd3f6a5..6f0493f79959 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -353,7 +353,9 @@ int bch2_sb_downgrade_update(struct bch_fs *c) for (unsigned i = 0; i < src->nr_errors; i++) dst->errors[i] = cpu_to_le16(src->errors[i]); - downgrade_table_extra(c, &table); + ret = downgrade_table_extra(c, &table); + if (ret) + goto out; if (!dst->recovery_passes[0] && !dst->recovery_passes[1] && From dc5bfdf8eaed76cf527c9477952c535f75e0e499 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Tue, 24 Sep 2024 09:41:46 +0800 Subject: [PATCH 572/655] bcachefs: fix the memory leak in exception case The pointer clean points the memory allocated by kmemdup, when the return value of bch2_sb_clean_validate_late is not zero. The memory pointed by clean is leaked. So we should free it in this case. Fixes: a37ad1a3aba9 ("bcachefs: sb-clean.c") Signed-off-by: Hongbo Li Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-clean.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 025848a9c4c0..005275281804 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -167,6 +167,7 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c) ret = bch2_sb_clean_validate_late(c, clean, READ); if (ret) { + kfree(clean); mutex_unlock(&c->sb_lock); return ERR_PTR(ret); } From b29c30ab48e0395a22ecf0b94443d16a8f493fb6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 24 Sep 2024 19:31:22 -0400 Subject: [PATCH 573/655] bcachefs: Fix incorrect IS_ERR_OR_NULL usage Returning a positive integer instead of an error code causes error paths to become very confused. Closes: syzbot+c0360e8367d6d8d04a66@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_node_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index b28c649c6838..1e694fedc5da 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -275,7 +275,7 @@ static int read_btree_nodes(struct find_btree_nodes *f) w->ca = ca; t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name); - ret = IS_ERR_OR_NULL(t); + ret = PTR_ERR_OR_ZERO(t); if (ret) { percpu_ref_put(&ca->io_ref); closure_put(&cl); From 22a507d68eb8dc9d05b7e91e9a7c9b2566d48c81 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 22:29:05 -0400 Subject: [PATCH 574/655] bcachefs: kill inode_walker_entry.seen_this_pos dead code Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 3508f8b5f06e..0b5a009a0c2e 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -626,7 +626,6 @@ static int ref_visible2(struct bch_fs *c, struct inode_walker_entry { struct bch_inode_unpacked inode; u32 snapshot; - bool seen_this_pos; u64 count; }; @@ -740,9 +739,6 @@ static struct inode_walker_entry *walk_inode(struct btree_trans *trans, int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); if (ret) return ERR_PTR(ret); - } else if (bkey_cmp(w->last_pos, k.k->p)) { - darray_for_each(w->inodes, i) - i->seen_this_pos = false; } w->last_pos = k.k->p; @@ -1634,8 +1630,6 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, if (bkey_extent_is_allocation(k.k)) i->count += k.k->size; } - - i->seen_this_pos = true; } if (k.k->type != KEY_TYPE_whiteout) { From 3672bda8f5edd8ed23c745965500ceb53286d48d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 22:32:58 -0400 Subject: [PATCH 575/655] bcachefs: fix transaction restart handling in check_extents(), check_dirents() Dealing with outside state within a btree transaction is always tricky. check_extents() and check_dirents() have to accumulate counters for i_sectors and i_nlink (for subdirectories). There were two bugs: - transaction commit may return a restart; therefore we have to commit before accumulating to those counters - get_inode_all_snapshots() may return a transaction restart, before updating w->last_pos; then, on the restart, check_i_sectors()/check_subdir_count() would see inodes that were not for w->last_pos Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 94 +++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 0b5a009a0c2e..8fe67abae36e 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -631,6 +631,7 @@ struct inode_walker_entry { struct inode_walker { bool first_this_inode; + bool have_inodes; bool recalculate_sums; struct bpos last_pos; @@ -668,6 +669,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, struct bkey_s_c k; int ret; + /* + * We no longer have inodes for w->last_pos; clear this to avoid + * screwing up check_i_sectors/check_subdir_count if we take a + * transaction restart here: + */ + w->have_inodes = false; w->recalculate_sums = false; w->inodes.nr = 0; @@ -685,6 +692,7 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, return ret; w->first_this_inode = true; + w->have_inodes = true; return 0; } @@ -1551,10 +1559,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, struct inode_walker *inode, struct snapshots_seen *s, - struct extent_ends *extent_ends) + struct extent_ends *extent_ends, + struct disk_reservation *res) { struct bch_fs *c = trans->c; - struct inode_walker_entry *i; struct printbuf buf = PRINTBUF; int ret = 0; @@ -1564,7 +1572,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, goto out; } - if (inode->last_pos.inode != k.k->p.inode) { + if (inode->last_pos.inode != k.k->p.inode && inode->have_inodes) { ret = check_i_sectors(trans, inode); if (ret) goto err; @@ -1574,12 +1582,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, if (ret) goto err; - i = walk_inode(trans, inode, k); - ret = PTR_ERR_OR_ZERO(i); + struct inode_walker_entry *extent_i = walk_inode(trans, inode, k); + ret = PTR_ERR_OR_ZERO(extent_i); if (ret) goto err; - ret = check_key_has_inode(trans, iter, inode, i, k); + ret = check_key_has_inode(trans, iter, inode, extent_i, k); if (ret) goto err; @@ -1588,24 +1596,19 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, &inode->recalculate_sums); if (ret) goto err; - } - /* - * Check inodes in reverse order, from oldest snapshots to newest, - * starting from the inode that matches this extent's snapshot. If we - * didn't have one, iterate over all inodes: - */ - if (!i) - i = &darray_last(inode->inodes); + /* + * Check inodes in reverse order, from oldest snapshots to + * newest, starting from the inode that matches this extent's + * snapshot. If we didn't have one, iterate over all inodes: + */ + for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); + inode->inodes.data && i >= inode->inodes.data; + --i) { + if (i->snapshot > k.k->p.snapshot || + !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) + continue; - for (; - inode->inodes.data && i >= inode->inodes.data; - --i) { - if (i->snapshot > k.k->p.snapshot || - !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) - continue; - - if (k.k->type != KEY_TYPE_whiteout) { if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && !bkey_extent_is_reservation(k), @@ -1625,10 +1628,24 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, goto err; iter->k.type = KEY_TYPE_whiteout; + break; } + } + } - if (bkey_extent_is_allocation(k.k)) - i->count += k.k->size; + ret = bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto err; + + if (bkey_extent_is_allocation(k.k)) { + for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); + inode->inodes.data && i >= inode->inodes.data; + --i) { + if (i->snapshot > k.k->p.snapshot || + !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) + continue; + + i->count += k.k->size; } } @@ -1660,13 +1677,11 @@ int bch2_check_extents(struct bch_fs *c) extent_ends_init(&extent_ends); int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_extents, + for_each_btree_key(trans, iter, BTREE_ID_extents, POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - &res, NULL, - BCH_TRANS_COMMIT_no_enospc, ({ + BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ bch2_disk_reservation_put(c, &res); - check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: + check_extent(trans, &iter, k, &w, &s, &extent_ends, &res) ?: check_extent_overbig(trans, &iter, k); })) ?: check_i_sectors_notnested(trans, &w)); @@ -2068,7 +2083,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (k.k->type == KEY_TYPE_whiteout) goto out; - if (dir->last_pos.inode != k.k->p.inode) { + if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) { ret = check_subdir_count(trans, dir); if (ret) goto err; @@ -2130,11 +2145,15 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret) goto err; } - - if (d.v->d_type == DT_DIR) - for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) - i->count++; } + + ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto err; + + if (d.v->d_type == DT_DIR) + for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) + i->count++; out: err: fsck_err: @@ -2157,12 +2176,9 @@ int bch2_check_dirents(struct bch_fs *c) snapshots_seen_init(&s); int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, + for_each_btree_key(trans, iter, BTREE_ID_dirents, POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, - k, - NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, + BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: check_subdir_count_notnested(trans, &dir)); From 1e0272ef4774eed8314e8d10a8856c979deeaf35 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 24 Sep 2024 22:53:56 -0400 Subject: [PATCH 576/655] bcachefs: bch_accounting_mode Minor refactoring - replace multiple bool arguments with an enum; prep work for fixing a bug in accounting read. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 4 ++-- fs/bcachefs/disk_accounting.c | 10 +++++++--- fs/bcachefs/disk_accounting.h | 25 +++++++++++++++++-------- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 91884da4e30a..4a131e3b6225 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -712,7 +712,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, a->k.version = journal_pos_to_bversion(&trans->journal_res, (u64 *) entry - (u64 *) trans->journal_entries); BUG_ON(bversion_zero(a->k.version)); - ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false, false); + ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal); if (ret) goto revert_fs_usage; } @@ -798,7 +798,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); bch2_accounting_neg(a); - bch2_accounting_mem_mod_locked(trans, a.c, false, false); + bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); bch2_accounting_neg(a); } percpu_up_read(&c->mark_lock); diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index e972e2bca546..e9a7f1dab450 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -319,7 +319,8 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun return -BCH_ERR_ENOMEM_disk_accounting; } -int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) +int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, + enum bch_accounting_mode mode) { struct bch_replicas_padded r; @@ -566,7 +567,9 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; accounting_key_init(&k_i.k, &acc_k, src_v, nr); - bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false, false); + bch2_accounting_mem_mod_locked(trans, + bkey_i_to_s_c_accounting(&k_i.k), + BCH_ACCOUNTING_normal); preempt_disable(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); @@ -595,7 +598,8 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) return 0; percpu_down_read(&c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), false, true); + int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), + BCH_ACCOUNTING_read); percpu_up_read(&c->mark_lock); if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) && diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index f29fd0dd9581..243bfb0975ea 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -103,23 +103,35 @@ static inline int accounting_pos_cmp(const void *_l, const void *_r) return bpos_cmp(*l, *r); } -int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, bool); +enum bch_accounting_mode { + BCH_ACCOUNTING_normal, + BCH_ACCOUNTING_gc, + BCH_ACCOUNTING_read, +}; + +int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode); void bch2_accounting_mem_gc(struct bch_fs *); /* * Update in memory counters so they match the btree update we're doing; called * from transaction commit path */ -static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc, bool read) +static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, + struct bkey_s_c_accounting a, + enum bch_accounting_mode mode) { struct bch_fs *c = trans->c; + struct bch_accounting_mem *acc = &c->accounting; struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, a.k->p); + bool gc = mode == BCH_ACCOUNTING_gc; + + EBUG_ON(gc && !acc->gc_running); if (acc_k.type == BCH_DISK_ACCOUNTING_inum) return 0; - if (!gc && !read) { + if (mode == BCH_ACCOUNTING_normal) { switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; @@ -140,14 +152,11 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru } } - struct bch_accounting_mem *acc = &c->accounting; unsigned idx; - EBUG_ON(gc && !acc->gc_running); - while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { - int ret = bch2_accounting_mem_insert(c, a, gc); + int ret = bch2_accounting_mem_insert(c, a, mode); if (ret) return ret; } @@ -164,7 +173,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { percpu_down_read(&trans->c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, a, gc, false); + int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal); percpu_up_read(&trans->c->mark_lock); return ret; } From 9104fc1928704ee5708ec7f43ab8dfbdc66e2ce8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 25 Sep 2024 16:46:06 -0400 Subject: [PATCH 577/655] bcachefs: Fix accounting read + device removal accounting read was checking if accounting replicas entries were marked in the superblock prior to applying accounting from the journal, which meant that a recently removed device could spuriously trigger a "not marked in superblocked" error (when journal entries zero out the offending counter). Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 47 +++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index e9a7f1dab450..6e5e8b6f7182 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -324,7 +324,8 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, { struct bch_replicas_padded r; - if (accounting_to_replicas(&r.e, a.k->p) && + if (mode != BCH_ACCOUNTING_read && + accounting_to_replicas(&r.e, a.k->p) && !bch2_replicas_marked_locked(c, &r.e)) return -BCH_ERR_btree_insert_need_mark_replicas; @@ -592,7 +593,6 @@ int bch2_gc_accounting_done(struct bch_fs *c) static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; if (k.k->type != KEY_TYPE_accounting) return 0; @@ -601,22 +601,6 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), BCH_ACCOUNTING_read); percpu_up_read(&c->mark_lock); - - if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) && - ret == -BCH_ERR_btree_insert_need_mark_replicas) - ret = 0; - - struct disk_accounting_pos acc; - bpos_to_disk_accounting_pos(&acc, k.k->p); - - if (fsck_err_on(ret == -BCH_ERR_btree_insert_need_mark_replicas, - trans, accounting_replicas_not_marked, - "accounting not marked in superblock replicas\n %s", - (bch2_accounting_key_to_text(&buf, &acc), - buf.buf))) - ret = bch2_accounting_update_sb_one(c, k.k->p); -fsck_err: - printbuf_exit(&buf); return ret; } @@ -628,6 +612,7 @@ int bch2_accounting_read(struct bch_fs *c) { struct bch_accounting_mem *acc = &c->accounting; struct btree_trans *trans = bch2_trans_get(c); + struct printbuf buf = PRINTBUF; int ret = for_each_btree_key(trans, iter, BTREE_ID_accounting, POS_MIN, @@ -678,6 +663,30 @@ int bch2_accounting_read(struct bch_fs *c) keys->gap = keys->nr = dst - keys->data; percpu_down_read(&c->mark_lock); + for (unsigned i = 0; i < acc->k.nr; i++) { + u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; + bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); + + if (bch2_is_zero(v, sizeof(v[0]) * acc->k.data[i].nr_counters)) + continue; + + struct bch_replicas_padded r; + + if (!accounting_to_replicas(&r.e, acc->k.data[i].pos)) + continue; + + struct disk_accounting_pos k; + bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); + + if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e), + trans, accounting_replicas_not_marked, + "accounting not marked in superblock replicas\n %s", + (printbuf_reset(&buf), + bch2_accounting_key_to_text(&buf, &k), + buf.buf))) + ret = bch2_accounting_update_sb_one(c, acc->k.data[i].pos); + } + preempt_disable(); struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); @@ -713,8 +722,10 @@ int bch2_accounting_read(struct bch_fs *c) } } preempt_enable(); +fsck_err: percpu_up_read(&c->mark_lock); err: + printbuf_exit(&buf); bch2_trans_put(trans); bch_err_fn(c, ret); return ret; From 49fd90b2cc332b8607a616d99d4bb792f18208b9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 25 Sep 2024 18:17:31 -0400 Subject: [PATCH 578/655] bcachefs: Fix unlocked access to c->disk_sb.sb in bch2_replicas_entry_validate() Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 2 +- fs/bcachefs/replicas.c | 18 ++++++++++++++---- fs/bcachefs/replicas.h | 2 +- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 30460bce04be..954f6a96e0f4 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -605,7 +605,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, goto out; } - if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err), + if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c, &err), c, version, jset, entry, journal_entry_data_usage_bad_size, "invalid journal entry usage: %s", err.buf)) { diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 998c0bd06802..bcb3276747e0 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -66,9 +66,9 @@ void bch2_replicas_entry_to_text(struct printbuf *out, prt_printf(out, "]"); } -int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, - struct bch_sb *sb, - struct printbuf *err) +static int bch2_replicas_entry_validate_locked(struct bch_replicas_entry_v1 *r, + struct bch_sb *sb, + struct printbuf *err) { if (!r->nr_devs) { prt_printf(err, "no devices in entry "); @@ -94,6 +94,16 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, return -BCH_ERR_invalid_replicas_entry; } +int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, + struct bch_fs *c, + struct printbuf *err) +{ + mutex_lock(&c->sb_lock); + int ret = bch2_replicas_entry_validate_locked(r, c->disk_sb.sb, err); + mutex_unlock(&c->sb_lock); + return ret; +} + void bch2_cpu_replicas_to_text(struct printbuf *out, struct bch_replicas_cpu *r) { @@ -676,7 +686,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, struct bch_replicas_entry_v1 *e = cpu_replicas_entry(cpu_r, i); - int ret = bch2_replicas_entry_validate(e, sb, err); + int ret = bch2_replicas_entry_validate_locked(e, sb, err); if (ret) return ret; diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index 622482559c3d..5aba2c1ce133 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -10,7 +10,7 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *); void bch2_replicas_entry_to_text(struct printbuf *, struct bch_replicas_entry_v1 *); int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *, - struct bch_sb *, struct printbuf *); + struct bch_fs *, struct printbuf *); void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); static inline struct bch_replicas_entry_v1 * From 431312b59cf54f9cc99352d0c3d80ed30e9b7df5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 25 Sep 2024 18:17:52 -0400 Subject: [PATCH 579/655] bcachefs: Fix disk accounting attempting to mark invalid replicas entry This fixes the following bug, where a disk accounting key has an invalid replicas entry, and we attempt to add it to the superblock: bcachefs (3c0860e8-07ca-4276-8954-11c1774be868): starting version 1.12: rebalance_work_acct_fix opts=metadata_replicas=2,data_replicas=2,foreground_target=ssd,background_target=hdd,nopromote_whole_extents,verbose,fsck,fix_errors=yes bcachefs (3c0860e8-07ca-4276-8954-11c1774be868): recovering from clean shutdown, journal seq 15211644 bcachefs (3c0860e8-07ca-4276-8954-11c1774be868): accounting_read... accounting not marked in superblock replicas replicas cached: 1/1 [0], fixing bcachefs (3c0860e8-07ca-4276-8954-11c1774be868): sb invalid before write: Invalid superblock section replicas_v0: invalid device 0 in entry cached: 1/1 [0] replicas_v0 (size 88): user: 2 [3 5] user: 2 [1 4] cached: 1 [2] btree: 2 [1 2] user: 2 [2 5] cached: 1 [0] cached: 1 [4] journal: 2 [1 5] user: 2 [1 2] user: 2 [2 3] user: 2 [3 4] user: 2 [4 5] cached: 1 [1] cached: 1 [3] cached: 1 [5] journal: 2 [1 2] journal: 2 [2 5] btree: 2 [2 5] user: 2 [1 3] user: 2 [1 5] user: 2 [2 4] bcachefs (3c0860e8-07ca-4276-8954-11c1774be868): inconsistency detected - emergency read only at journal seq 15211644 accounting not marked in superblock replicas replicas user: 1/1 [3], fixing bcachefs (3c0860e8-07ca-4276-8954-11c1774be868): sb invalid before write: Invalid superblock section replicas_v0: invalid device 0 in entry cached: 1/1 [0] replicas_v0 (size 96): user: 2 [3 5] user: 2 [1 3] cached: 1 [2] btree: 2 [1 2] user: 2 [2 4] cached: 1 [0] cached: 1 [4] journal: 2 [1 5] user: 1 [3] user: 2 [1 5] user: 2 [3 4] user: 2 [4 5] cached: 1 [1] cached: 1 [3] cached: 1 [5] journal: 2 [1 2] journal: 2 [2 5] btree: 2 [2 5] user: 2 [1 2] user: 2 [1 4] user: 2 [2 3] user: 2 [2 5] accounting not marked in superblock replicas replicas user: 1/2 [3 7], fixing bcachefs (3c0860e8-07ca-4276-8954-11c1774be868): sb invalid before write: Invalid superblock section replicas_v0: invalid device 7 in entry user: 1/2 [3 7] replicas_v0 (size 96): user: 2 [3 7] user: 2 [1 3] cached: 1 [2] btree: 2 [1 2] user: 2 [2 4] cached: 1 [0] cached: 1 [4] journal: 2 [1 5] user: 1 [3] user: 2 [1 5] user: 2 [3 4] user: 2 [4 5] cached: 1 [1] cached: 1 [3] cached: 1 [5] journal: 2 [1 2] journal: 2 [2 5] btree: 2 [2 5] user: 2 [1 2] user: 2 [1 4] user: 2 [2 3] user: 2 [2 5] user: 2 [3 5] done bcachefs (3c0860e8-07ca-4276-8954-11c1774be868): alloc_read... done bcachefs (3c0860e8-07ca-4276-8954-11c1774be868): stripes_read... done bcachefs (3c0860e8-07ca-4276-8954-11c1774be868): snapshots_read... done Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 6e5e8b6f7182..669214127c16 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -671,10 +671,16 @@ int bch2_accounting_read(struct bch_fs *c) continue; struct bch_replicas_padded r; - if (!accounting_to_replicas(&r.e, acc->k.data[i].pos)) continue; + /* + * If the replicas entry is invalid it'll get cleaned up by + * check_allocations: + */ + if (bch2_replicas_entry_validate(&r.e, c, &buf)) + continue; + struct disk_accounting_pos k; bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); @@ -683,8 +689,17 @@ int bch2_accounting_read(struct bch_fs *c) "accounting not marked in superblock replicas\n %s", (printbuf_reset(&buf), bch2_accounting_key_to_text(&buf, &k), - buf.buf))) - ret = bch2_accounting_update_sb_one(c, acc->k.data[i].pos); + buf.buf))) { + /* + * We're not RW yet and still single threaded, dropping + * and retaking lock is ok: + */ + percpu_up_read(&c->mark_lock); + ret = bch2_mark_replicas(c, &r.e); + if (ret) + goto fsck_err; + percpu_down_read(&c->mark_lock); + } } preempt_disable(); From 7c980a43e936e32741a62bf5a047c5f5ad572ec8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Sep 2024 16:50:29 -0400 Subject: [PATCH 580/655] bcachefs: Move transaction commit path validation to as late as possible In order to check for accounting keys with version=0, we need to run validation after they've been assigned version numbers. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 68 ++++++++++++++++---------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 4a131e3b6225..145d82aed4ec 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -735,6 +735,40 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, goto fatal_err; } + trans_for_each_update(trans, i) { + enum bch_validate_flags invalid_flags = 0; + + if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) + invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; + + ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), + i->bkey_type, invalid_flags); + if (unlikely(ret)){ + bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", + trans->fn, (void *) i->ip_allocated); + goto fatal_err; + } + btree_insert_entry_checks(trans, i); + } + + for (struct jset_entry *i = trans->journal_entries; + i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); + i = vstruct_next(i)) { + enum bch_validate_flags invalid_flags = 0; + + if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) + invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; + + ret = bch2_journal_entry_validate(c, NULL, i, + bcachefs_metadata_version_current, + CPU_BIG_ENDIAN, invalid_flags); + if (unlikely(ret)) { + bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", + trans->fn); + goto fatal_err; + } + } + if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) { struct journal *j = &c->journal; struct jset_entry *entry; @@ -1019,40 +1053,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (ret) goto out_reset; - trans_for_each_update(trans, i) { - enum bch_validate_flags invalid_flags = 0; - - if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) - invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - - ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), - i->bkey_type, invalid_flags); - if (unlikely(ret)){ - bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", - trans->fn, (void *) i->ip_allocated); - return ret; - } - btree_insert_entry_checks(trans, i); - } - - for (struct jset_entry *i = trans->journal_entries; - i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); - i = vstruct_next(i)) { - enum bch_validate_flags invalid_flags = 0; - - if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) - invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - - ret = bch2_journal_entry_validate(c, NULL, i, - bcachefs_metadata_version_current, - CPU_BIG_ENDIAN, invalid_flags); - if (unlikely(ret)) { - bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", - trans->fn); - return ret; - } - } - if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { ret = do_bch2_trans_commit_to_journal_replay(trans); goto out_reset; From 5612daafb76420c6793dc48ce6d0c20f36cc7981 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Sep 2024 16:51:19 -0400 Subject: [PATCH 581/655] bcachefs: Fix fsck warnings from bkey validation __bch2_fsck_err() warns if the current task has a btree_trans object and it wasn't passed in, because if it has to prompt for user input it has to be able to unlock it. But plumbing the btree_trans through bkey_validate(), as well as transaction restarts, is problematic - so instead make bkey fsck errors FSCK_AUTOFIX, which doesn't need to warn. Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 14 +++++++++++++- fs/bcachefs/error.h | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 95afa7bf2020..3a16b535b6c3 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -239,7 +239,19 @@ int __bch2_fsck_err(struct bch_fs *c, if (!c) c = trans->c; - WARN_ON(!trans && bch2_current_has_btree_trans(c)); + /* + * Ugly: if there's a transaction in the current task it has to be + * passed in to unlock if we prompt for user input. + * + * But, plumbing a transaction and transaction restarts into + * bkey_validate() is problematic. + * + * So: + * - make all bkey errors AUTOFIX, they're simple anyways (we just + * delete the key) + * - and we don't need to warn if we're not prompting + */ + WARN_ON(!(flags & FSCK_AUTOFIX) && !trans && bch2_current_has_btree_trans(c)); if ((flags & FSCK_CAN_FIX) && test_bit(err, c->sb.errors_silent)) diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 2f1b86978f36..21ee7211b03e 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -184,7 +184,7 @@ do { \ ret = -BCH_ERR_fsck_delete_bkey; \ goto fsck_err; \ } \ - int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX, \ + int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX|FSCK_AUTOFIX,\ BCH_FSCK_ERR_##_err_type, \ _err_msg, ##__VA_ARGS__); \ if (_ret != -BCH_ERR_fsck_fix && \ From 8d65b15f8d93638cfa9dae20a4274d5059c3b9d2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Sep 2024 15:30:17 -0400 Subject: [PATCH 582/655] bcachefs: Fix BCH_SB_ERRS() so we can reorder BCH_SB_ERRS() has a field for the actual enum val so that we can reorder to reorganize, but the way BCH_SB_ERR_MAX was defined didn't allow for this. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 2 +- fs/bcachefs/sb-downgrade.c | 5 ++--- fs/bcachefs/sb-errors.c | 6 +++--- fs/bcachefs/sb-errors.h | 2 ++ fs/bcachefs/sb-errors_format.h | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index c711d4c27a03..e6dd4812c020 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -776,7 +776,7 @@ struct bch_fs { unsigned nsec_per_time_unit; u64 features; u64 compat; - unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)]; + unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)]; u64 btrees_lost_data; } sb; diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 6f0493f79959..5102059a0f1d 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -312,8 +312,7 @@ static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb, if (!first) prt_char(out, ','); first = false; - unsigned e = le16_to_cpu(i->errors[j]); - prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)"); + bch2_sb_error_id_to_text(out, le16_to_cpu(i->errors[j])); } prt_newline(out); } @@ -401,7 +400,7 @@ void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_mi for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { unsigned e = le16_to_cpu(i->errors[j]); - if (e < BCH_SB_ERR_MAX) + if (e < BCH_FSCK_ERR_MAX) __set_bit(e, c->sb.errors_silent); if (e < sizeof(ext->errors_silent) * 8) __set_bit_le64(e, ext->errors_silent); diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c index c1270d790e43..013a96883b4e 100644 --- a/fs/bcachefs/sb-errors.c +++ b/fs/bcachefs/sb-errors.c @@ -7,12 +7,12 @@ const char * const bch2_sb_error_strs[] = { #define x(t, n, ...) [n] = #t, BCH_SB_ERRS() - NULL +#undef x }; -static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id) +void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id) { - if (id < BCH_SB_ERR_MAX) + if (id < BCH_FSCK_ERR_MAX) prt_str(out, bch2_sb_error_strs[id]); else prt_printf(out, "(unknown error %u)", id); diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h index 8889001e7db4..b2357b8e6107 100644 --- a/fs/bcachefs/sb-errors.h +++ b/fs/bcachefs/sb-errors.h @@ -6,6 +6,8 @@ extern const char * const bch2_sb_error_strs[]; +void bch2_sb_error_id_to_text(struct printbuf *, enum bch_sb_error_id); + extern const struct bch_sb_field_ops bch_sb_field_ops_errors; void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 4e4e132d6d23..49d8609c4a08 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -292,12 +292,12 @@ enum bch_fsck_flags { x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \ x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \ x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ + x(MAX, 281, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, BCH_SB_ERRS() #undef x - BCH_SB_ERR_MAX }; struct bch_sb_field_errors { From fd65378db9998a6deafdc4910ee1b01b377d6fee Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Sep 2024 15:19:17 -0400 Subject: [PATCH 583/655] bcachefs: Don't delete unlinked inodes before logged op resume Previously, check_inode() would delete unlinked inodes if they weren't on the deleted list - this code dating from before there was a deleted list. But, if we crash during a logged op (truncate or finsert/fcollapse) of an unlinked file, logged op resume will get confused if the inode has already been deleted - instead, just add it to the deleted list if it needs to be there; delete_dead_inodes runs after logged op resume. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 49 ++++++++++++++++++----------- fs/bcachefs/recovery_passes_types.h | 2 +- fs/bcachefs/sb-errors_format.h | 3 +- fs/bcachefs/super-io.c | 3 +- 4 files changed, 36 insertions(+), 21 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 8fe67abae36e..10e2930b62da 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1049,26 +1049,39 @@ static int check_inode(struct btree_trans *trans, } if (u.bi_flags & BCH_INODE_unlinked) { - ret = check_inode_deleted_list(trans, k.k->p); - if (ret < 0) - return ret; + if (!test_bit(BCH_FS_started, &c->flags)) { + /* + * If we're not in online fsck, don't delete unlinked + * inodes, just make sure they're on the deleted list. + * + * They might be referred to by a logged operation - + * i.e. we might have crashed in the middle of a + * truncate on an unlinked but open file - so we want to + * let the delete_dead_inodes kill it after resuming + * logged ops. + */ + ret = check_inode_deleted_list(trans, k.k->p); + if (ret < 0) + return ret; - fsck_err_on(!ret, - trans, unlinked_inode_not_on_deleted_list, - "inode %llu:%u unlinked, but not on deleted list", - u.bi_inum, k.k->p.snapshot); - ret = 0; - } + fsck_err_on(!ret, + trans, unlinked_inode_not_on_deleted_list, + "inode %llu:%u unlinked, but not on deleted list", + u.bi_inum, k.k->p.snapshot); - if (u.bi_flags & BCH_INODE_unlinked && - !bch2_inode_is_open(c, k.k->p) && - (!c->sb.clean || - fsck_err(trans, inode_unlinked_but_clean, - "filesystem marked clean, but inode %llu unlinked", - u.bi_inum))) { - ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); - bch_err_msg(c, ret, "in fsck deleting inode"); - return ret; + ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, k.k->p, 1); + if (ret) + goto err; + } else { + if (fsck_err_on(bch2_inode_is_open(c, k.k->p), + trans, inode_unlinked_and_not_open, + "inode %llu%u unlinked and not open", + u.bi_inum, u.bi_snapshot)) { + ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); + bch_err_msg(c, ret, "in fsck deleting inode"); + return ret; + } + } } if (u.bi_flags & BCH_INODE_i_size_dirty && diff --git a/fs/bcachefs/recovery_passes_types.h b/fs/bcachefs/recovery_passes_types.h index 8c7dee5983d2..50406ce0e4ef 100644 --- a/fs/bcachefs/recovery_passes_types.h +++ b/fs/bcachefs/recovery_passes_types.h @@ -50,7 +50,7 @@ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ x(check_nlinks, 31, PASS_FSCK) \ x(resume_logged_ops, 23, PASS_ALWAYS) \ - x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \ + x(delete_dead_inodes, 32, PASS_ALWAYS) \ x(fix_reflink_p, 33, 0) \ x(set_fs_needs_rebalance, 34, 0) \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 49d8609c4a08..3fca1d836318 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -210,6 +210,7 @@ enum bch_fsck_flags { x(inode_snapshot_mismatch, 196, 0) \ x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \ + x(inode_unlinked_and_not_open, 281, 0) \ x(inode_checksum_type_invalid, 199, 0) \ x(inode_compression_type_invalid, 200, 0) \ x(inode_subvol_root_but_not_dir, 201, 0) \ @@ -292,7 +293,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \ x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \ x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ - x(MAX, 281, 0) + x(MAX, 282, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index b2e914841b5f..ce7410d72089 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1190,7 +1190,8 @@ static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb, le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8); prt_printf(out, "Errors to silently fix:\t"); - prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, sizeof(e->errors_silent) * 8); + prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, + min(BCH_FSCK_ERR_MAX, sizeof(e->errors_silent) * 8)); prt_newline(out); kfree(errors_silent); From cf49f8a8c277f9f2b78e2a56189a741a508a9820 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Sep 2024 15:49:17 -0400 Subject: [PATCH 584/655] bcachefs: rename version -> bversion give bversions a more distinct name, to aid in grepping Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 2 +- fs/bcachefs/bcachefs_format.h | 6 +++--- fs/bcachefs/bkey.h | 4 ++-- fs/bcachefs/bkey_methods.c | 2 +- fs/bcachefs/bkey_methods.h | 2 +- fs/bcachefs/btree_gc.c | 6 +++--- fs/bcachefs/btree_io.c | 2 +- fs/bcachefs/btree_trans_commit.c | 8 ++++---- fs/bcachefs/data_update.c | 2 +- fs/bcachefs/disk_accounting.c | 6 +++--- fs/bcachefs/disk_accounting.h | 4 ++-- fs/bcachefs/disk_accounting_types.h | 2 +- fs/bcachefs/io_read.c | 4 ++-- fs/bcachefs/io_write.c | 4 ++-- fs/bcachefs/recovery.c | 2 +- fs/bcachefs/reflink.c | 2 +- fs/bcachefs/tests.c | 2 +- 17 files changed, 30 insertions(+), 30 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index e11989a57ca0..47455a85c909 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -501,7 +501,7 @@ static int check_extent_checksum(struct btree_trans *trans, prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); bch2_bkey_val_to_text(&buf, c, extent2); - struct nonce nonce = extent_nonce(extent.k->version, p.crc); + struct nonce nonce = extent_nonce(extent.k->bversion, p.crc); struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), trans, dup_backpointer_to_bad_csum_extent, diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 8c4addddd07e..203ee627cab5 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -217,7 +217,7 @@ struct bkey { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ __u8 pad[1]; - struct bversion version; + struct bversion bversion; __u32 size; /* extent size, in sectors */ struct bpos p; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ @@ -328,8 +328,8 @@ enum bch_bkey_fields { bkey_format_field(OFFSET, p.offset), \ bkey_format_field(SNAPSHOT, p.snapshot), \ bkey_format_field(SIZE, size), \ - bkey_format_field(VERSION_HI, version.hi), \ - bkey_format_field(VERSION_LO, version.lo), \ + bkey_format_field(VERSION_HI, bversion.hi), \ + bkey_format_field(VERSION_LO, bversion.lo), \ }, \ }) diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index e34cb2bf329c..6e5092d4c62e 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -554,8 +554,8 @@ static inline void bch2_bkey_pack_test(void) {} x(BKEY_FIELD_OFFSET, p.offset) \ x(BKEY_FIELD_SNAPSHOT, p.snapshot) \ x(BKEY_FIELD_SIZE, size) \ - x(BKEY_FIELD_VERSION_HI, version.hi) \ - x(BKEY_FIELD_VERSION_LO, version.lo) + x(BKEY_FIELD_VERSION_HI, bversion.hi) \ + x(BKEY_FIELD_VERSION_LO, bversion.lo) struct bkey_format_state { u64 field_min[BKEY_NR_FIELDS]; diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 88d8958281e8..e7ac227ba7e8 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -289,7 +289,7 @@ void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k) bch2_bpos_to_text(out, k->p); - prt_printf(out, " len %u ver %llu", k->size, k->version.lo); + prt_printf(out, " len %u ver %llu", k->size, k->bversion.lo); } else { prt_printf(out, "(null)"); } diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index 3df3dd2723a1..018fb72e32d3 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -70,7 +70,7 @@ bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s); static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r) { return l->type == r->type && - !bversion_cmp(l->version, r->version) && + !bversion_cmp(l->bversion, r->bversion) && bpos_eq(l->p, bkey_start_pos(r)); } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 40b08fef3c39..660d2fa02da2 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -601,15 +601,15 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, if (initial) { BUG_ON(bch2_journal_seq_verify && - k.k->version.lo > atomic64_read(&c->journal.seq)); + k.k->bversion.lo > atomic64_read(&c->journal.seq)); if (fsck_err_on(btree_id != BTREE_ID_accounting && - k.k->version.lo > atomic64_read(&c->key_version), + k.k->bversion.lo > atomic64_read(&c->key_version), trans, bkey_version_in_future, "key version number higher than recorded %llu\n %s", atomic64_read(&c->key_version), (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - atomic64_set(&c->key_version, k.k->version.lo); + atomic64_set(&c->key_version, k.k->bversion.lo); } if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k), diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 2a0420605bd7..1c1448b52207 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1223,7 +1223,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ret = bch2_bkey_val_validate(c, u.s_c, READ); if (ret == -BCH_ERR_fsck_delete_bkey || (bch2_inject_invalid_keys && - !bversion_cmp(u.k->version, MAX_VERSION))) { + !bversion_cmp(u.k->bversion, MAX_VERSION))) { btree_keys_account_key_drop(&b->nr, 0, k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 145d82aed4ec..0989e2da6e56 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -684,10 +684,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, !(flags & BCH_TRANS_COMMIT_no_journal_res)) { if (bch2_journal_seq_verify) trans_for_each_update(trans, i) - i->k->k.version.lo = trans->journal_res.seq; + i->k->k.bversion.lo = trans->journal_res.seq; else if (bch2_inject_invalid_keys) trans_for_each_update(trans, i) - i->k->k.version = MAX_VERSION; + i->k->k.bversion = MAX_VERSION; } h = trans->hooks; @@ -709,9 +709,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, if (jset_entry_is_key(entry) && entry->start->k.type == KEY_TYPE_accounting) { struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start); - a->k.version = journal_pos_to_bversion(&trans->journal_res, + a->k.bversion = journal_pos_to_bversion(&trans->journal_res, (u64 *) entry - (u64 *) trans->journal_entries); - BUG_ON(bversion_zero(a->k.version)); + BUG_ON(bversion_zero(a->k.bversion)); ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal); if (ret) goto revert_fs_usage; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 757b9884ef55..462b1a2fe1ad 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -639,7 +639,7 @@ int bch2_data_update_init(struct btree_trans *trans, bch2_write_op_init(&m->op, c, io_opts); m->op.pos = bkey_start_pos(k.k); - m->op.version = k.k->version; + m->op.version = k.k->bversion; m->op.target = data_opts.target; m->op.write_point = wp; m->op.nr_replicas = 0; diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 669214127c16..9ac45a607b93 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -291,7 +291,7 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun struct accounting_mem_entry n = { .pos = a.k->p, - .version = a.k->version, + .bversion = a.k->bversion, .nr_counters = bch2_accounting_counters(a.k), .v[0] = __alloc_percpu_gfp(n.nr_counters * sizeof(u64), sizeof(u64), GFP_KERNEL), @@ -636,7 +636,7 @@ int bch2_accounting_read(struct bch_fs *c) accounting_pos_cmp, &k.k->p); bool applied = idx < acc->k.nr && - bversion_cmp(acc->k.data[idx].version, k.k->version) >= 0; + bversion_cmp(acc->k.data[idx].bversion, k.k->bversion) >= 0; if (applied) continue; @@ -644,7 +644,7 @@ int bch2_accounting_read(struct bch_fs *c) if (i + 1 < &darray_top(*keys) && i[1].k->k.type == KEY_TYPE_accounting && !journal_key_cmp(i, i + 1)) { - BUG_ON(bversion_cmp(i[0].k->k.version, i[1].k->k.version) >= 0); + BUG_ON(bversion_cmp(i[0].k->k.bversion, i[1].k->k.bversion) >= 0); i[1].journal_seq = i[0].journal_seq; diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 243bfb0975ea..4ea6c8a092bc 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -36,8 +36,8 @@ static inline void bch2_accounting_accumulate(struct bkey_i_accounting *dst, for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++) dst->v.d[i] += src.v->d[i]; - if (bversion_cmp(dst->k.version, src.k->version) < 0) - dst->k.version = src.k->version; + if (bversion_cmp(dst->k.bversion, src.k->bversion) < 0) + dst->k.bversion = src.k->bversion; } static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage, diff --git a/fs/bcachefs/disk_accounting_types.h b/fs/bcachefs/disk_accounting_types.h index 1687a45177a7..b1982131b206 100644 --- a/fs/bcachefs/disk_accounting_types.h +++ b/fs/bcachefs/disk_accounting_types.h @@ -6,7 +6,7 @@ struct accounting_mem_entry { struct bpos pos; - struct bversion version; + struct bversion bversion; unsigned nr_counters; u64 __percpu *v[2]; }; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index b2f50e74bb76..e4fc17c548fd 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -517,7 +517,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if ((ret = bkey_err(k))) goto out; - if (bversion_cmp(k.k->version, rbio->version) || + if (bversion_cmp(k.k->bversion, rbio->version) || !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) goto out; @@ -1031,7 +1031,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, rbio->read_pos = read_pos; rbio->data_btree = data_btree; rbio->data_pos = data_pos; - rbio->version = k.k->version; + rbio->version = k.k->bversion; rbio->promote = promote; INIT_WORK(&rbio->work, NULL); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index d3b5be7fd9bf..b5fe9e0dc155 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -697,7 +697,7 @@ static void init_append_extent(struct bch_write_op *op, e = bkey_extent_init(op->insert_keys.top); e->k.p = op->pos; e->k.size = crc.uncompressed_size; - e->k.version = version; + e->k.bversion = version; if (crc.csum_type || crc.compression_type || @@ -1544,7 +1544,7 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) id = bkey_inline_data_init(op->insert_keys.top); id->k.p = op->pos; - id->k.version = op->version; + id->k.bversion = op->version; id->k.size = sectors; iter = bio->bi_iter; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d1699aecc9fb..c84295a07232 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -151,7 +151,7 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans, struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u); /* Has this delta already been applied to the btree? */ - if (bversion_cmp(old.k->version, k->k->k.version) >= 0) { + if (bversion_cmp(old.k->bversion, k->k->k.bversion) >= 0) { ret = 0; goto out; } diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index e59c0abb4772..f457925fa362 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -367,7 +367,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, r_v->k.type = bkey_type_to_indirect(&orig->k); r_v->k.p = reflink_iter.pos; bch2_key_resize(&r_v->k, orig->k.size); - r_v->k.version = orig->k.version; + r_v->k.bversion = orig->k.bversion; set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k)); diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 01b768c9b767..b2f209743afe 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -394,7 +394,7 @@ static int insert_test_extent(struct bch_fs *c, k.k_i.k.p.offset = end; k.k_i.k.p.snapshot = U32_MAX; k.k_i.k.size = end - start; - k.k_i.k.version.lo = test_version++; + k.k_i.k.bversion.lo = test_version++; ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0); bch_err_fn(c, ret); From f8911ad88de3acea7a67451f59649bb54da0741b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Sep 2024 15:58:02 -0400 Subject: [PATCH 585/655] bcachefs: Check for accounting keys with bversion=0 Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey.h | 4 ++-- fs/bcachefs/disk_accounting.c | 4 ++++ fs/bcachefs/sb-errors_format.h | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 6e5092d4c62e..41df24a53d97 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -214,9 +214,9 @@ static __always_inline int bversion_cmp(struct bversion l, struct bversion r) #define ZERO_VERSION ((struct bversion) { .hi = 0, .lo = 0 }) #define MAX_VERSION ((struct bversion) { .hi = ~0, .lo = ~0ULL }) -static __always_inline int bversion_zero(struct bversion v) +static __always_inline bool bversion_zero(struct bversion v) { - return !bversion_cmp(v, ZERO_VERSION); + return bversion_cmp(v, ZERO_VERSION) == 0; } #ifdef CONFIG_BCACHEFS_DEBUG diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 9ac45a607b93..59897b347c62 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -134,6 +134,10 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, void *end = &acc_k + 1; int ret = 0; + bkey_fsck_err_on(bversion_zero(k.k->bversion), + c, accounting_key_version_0, + "accounting key with version=0"); + switch (acc_k.type) { case BCH_DISK_ACCOUNTING_nr_inodes: end = field_end(acc_k, nr_inodes); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 3fca1d836318..6955bb4ea4c5 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -293,7 +293,8 @@ enum bch_fsck_flags { x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \ x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \ x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ - x(MAX, 282, 0) + x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ + x(MAX, 283, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From a3581ca35d2b7e854e071dec2df7de7152aaa5c3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Sep 2024 15:59:29 -0400 Subject: [PATCH 586/655] bcachefs: Fix BCH_TRANS_COMMIT_skip_accounting_apply This was added to avoid double-counting accounting keys in journal replay. But applied incorrectly (easily done since it applies to the transaction commit, not a particular update), it leads to skipping in-mem accounting for real accounting updates, and failure to give them a version number - which leads to journal replay becoming very confused the next time around. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 0989e2da6e56..1a74a1a252ee 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -700,27 +700,31 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, struct jset_entry *entry = trans->journal_entries; - if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { - percpu_down_read(&c->mark_lock); + percpu_down_read(&c->mark_lock); - for (entry = trans->journal_entries; - entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); - entry = vstruct_next(entry)) - if (jset_entry_is_key(entry) && entry->start->k.type == KEY_TYPE_accounting) { - struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start); + for (entry = trans->journal_entries; + entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); + entry = vstruct_next(entry)) + if (entry->type == BCH_JSET_ENTRY_write_buffer_keys && + entry->start->k.type == KEY_TYPE_accounting) { + BUG_ON(!trans->journal_res.ref); - a->k.bversion = journal_pos_to_bversion(&trans->journal_res, - (u64 *) entry - (u64 *) trans->journal_entries); - BUG_ON(bversion_zero(a->k.bversion)); + struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start); + + a->k.bversion = journal_pos_to_bversion(&trans->journal_res, + (u64 *) entry - (u64 *) trans->journal_entries); + BUG_ON(bversion_zero(a->k.bversion)); + + if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal); if (ret) goto revert_fs_usage; } - percpu_up_read(&c->mark_lock); + } + percpu_up_read(&c->mark_lock); - /* XXX: we only want to run this if deltas are nonzero */ - bch2_trans_account_disk_usage_change(trans); - } + /* XXX: we only want to run this if deltas are nonzero */ + bch2_trans_account_disk_usage_change(trans); trans_for_each_update(trans, i) if (btree_node_type_has_atomic_triggers(i->bkey_type)) { From 9773547b16b1e1b27f002733623cd0e8e6d0f69c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 27 Sep 2024 21:05:59 -0400 Subject: [PATCH 587/655] bcachefs: Convert disk accounting BUG_ON() to WARN_ON() We had a bug where disk accounting keys didn't always have their version field set in journal replay; change the BUG_ON() to a WARN(), and exclude this case since it's now checked for elsewhere (in the bkey validate function). Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 59897b347c62..9f3133e3e7e5 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -648,7 +648,7 @@ int bch2_accounting_read(struct bch_fs *c) if (i + 1 < &darray_top(*keys) && i[1].k->k.type == KEY_TYPE_accounting && !journal_key_cmp(i, i + 1)) { - BUG_ON(bversion_cmp(i[0].k->k.bversion, i[1].k->k.bversion) >= 0); + WARN_ON(bversion_cmp(i[0].k->k.bversion, i[1].k->k.bversion) >= 0); i[1].journal_seq = i[0].journal_seq; From 1c0ee43b2c9057473e551e2464f24f717accabf6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Sep 2024 16:19:58 -0400 Subject: [PATCH 588/655] bcachefs: BCH_FS_clean_recovery Add a filesystem flag to indicate whether we did a clean recovery - using c->sb.clean after we've got rw is incorrect, since c->sb is updated whenever we write the superblock. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/fsck.c | 6 ++++-- fs/bcachefs/inode.c | 2 +- fs/bcachefs/recovery.c | 2 ++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index e6dd4812c020..f4151ee51b03 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -594,6 +594,7 @@ struct bch_dev { #define BCH_FS_FLAGS() \ x(new_fs) \ x(started) \ + x(clean_recovery) \ x(btree_running) \ x(accounting_replay_done) \ x(may_go_rw) \ diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 10e2930b62da..9c10eeeb0bcb 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1084,8 +1084,9 @@ static int check_inode(struct btree_trans *trans, } } + /* i_size_dirty is vestigal, since we now have logged ops for truncate * */ if (u.bi_flags & BCH_INODE_i_size_dirty && - (!c->sb.clean || + (!test_bit(BCH_FS_clean_recovery, &c->flags) || fsck_err(trans, inode_i_size_dirty_but_clean, "filesystem marked clean, but inode %llu has i_size dirty", u.bi_inum))) { @@ -1114,8 +1115,9 @@ static int check_inode(struct btree_trans *trans, do_update = true; } + /* i_sectors_dirty is vestigal, i_sectors is always updated transactionally */ if (u.bi_flags & BCH_INODE_i_sectors_dirty && - (!c->sb.clean || + (!test_bit(BCH_FS_clean_recovery, &c->flags) || fsck_err(trans, inode_i_sectors_dirty_but_clean, "filesystem marked clean, but inode %llu has i_sectors dirty", u.bi_inum))) { diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 1116db239708..753c208896c3 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1113,7 +1113,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, pos.offset, pos.snapshot)) goto delete; - if (c->sb.clean && + if (test_bit(BCH_FS_clean_recovery, &c->flags) && !fsck_err(trans, deleted_inode_but_clean, "filesystem marked as clean but have deleted inode %llu:%u", pos.offset, pos.snapshot)) { diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index c84295a07232..6db72d3bad7d 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -717,6 +717,8 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.fsck) set_bit(BCH_FS_fsck_running, &c->flags); + if (c->sb.clean) + set_bit(BCH_FS_clean_recovery, &c->flags); ret = bch2_blacklist_table_initialize(c); if (ret) { From d50d7a5fa4df3190b6b6c6d6551b631fda4a4ed2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Sep 2024 16:23:30 -0400 Subject: [PATCH 589/655] bcachefs: Check for logged ops when clean If we shut down successfully, there shouldn't be any logged ops to resume. Signed-off-by: Kent Overstreet --- fs/bcachefs/logged_ops.c | 13 +++++++++++-- fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index f49fdca1d07d..6f4a4e1083c9 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -37,6 +37,14 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type); struct bkey_buf sk; u32 restart_count = trans->restart_count; + struct printbuf buf = PRINTBUF; + int ret = 0; + + fsck_err_on(test_bit(BCH_FS_clean_recovery, &c->flags), + trans, logged_op_but_clean, + "filesystem marked as clean but have logged op\n%s", + (bch2_bkey_val_to_text(&buf, c, k), + buf.buf)); if (!fn) return 0; @@ -47,8 +55,9 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, fn->resume(trans, sk.k); bch2_bkey_buf_exit(&sk, c); - - return trans_was_restarted(trans, restart_count); +fsck_err: + printbuf_exit(&buf); + return ret ?: trans_was_restarted(trans, restart_count); } int bch2_resume_logged_ops(struct bch_fs *c) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 6955bb4ea4c5..c15c52ebf699 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -294,7 +294,8 @@ enum bch_fsck_flags { x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \ x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ - x(MAX, 283, 0) + x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ + x(MAX, 284, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From e057a290ef715d2765560778625e1660b7352994 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Tue, 27 Aug 2024 23:14:48 +0800 Subject: [PATCH 590/655] bcachefs: Fix lost wake up If the reader acquires the read lock and then the writer enters the slow path, while the reader proceeds to the unlock path, the following scenario can occur without the change: writer: pcpu_read_count(lock) return 1 (so __do_six_trylock will return 0) reader: this_cpu_dec(*lock->readers) reader: smp_mb() reader: state = atomic_read(&lock->state) (there is no waiting flag set) writer: six_set_bitmask() then the writer will sleep forever. Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/six.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c index 3a494c5d1247..b3583c50ef04 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c @@ -169,11 +169,17 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type, ret = -1 - SIX_LOCK_write; } } else if (type == SIX_LOCK_write && lock->readers) { - if (try) { + if (try) atomic_add(SIX_LOCK_HELD_write, &lock->state); - smp_mb__after_atomic(); - } + /* + * Make sure atomic_add happens before pcpu_read_count and + * six_set_bitmask in slow path happens before pcpu_read_count. + * + * Paired with the smp_mb() in read lock fast path (per-cpu mode) + * and the one before atomic_read in read unlock path. + */ + smp_mb(); ret = !pcpu_read_count(lock); if (try && !ret) { From a6508079b1b6b231d16c438c384d718d3508573c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 22:22:00 -0400 Subject: [PATCH 591/655] bcachefs: dirent_points_to_inode() now warns on mismatch if an inode backpointer points to a dirent that doesn't point back, that's an error we should warn about. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 84 ++++++++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 9c10eeeb0bcb..0e397d0ef3dd 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -21,6 +21,49 @@ #include #include /* struct qstr */ +static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, + struct bkey_s_c_dirent d) +{ + return inode->bi_dir == d.k->p.inode && + inode->bi_dir_offset == d.k->p.offset; +} + +static bool dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, + struct bch_inode_unpacked *inode) +{ + if (d.v->d_type == DT_SUBVOL + ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol + : le64_to_cpu(d.v->d_inum) == inode->bi_inum) + return 0; + return -BCH_ERR_ENOENT_dirent_doesnt_match_inode; +} + +static void dirent_inode_mismatch_msg(struct printbuf *out, + struct bch_fs *c, + struct bkey_s_c_dirent dirent, + struct bch_inode_unpacked *inode) +{ + prt_str(out, "inode points to dirent that does not point back:"); + prt_newline(out); + bch2_bkey_val_to_text(out, c, dirent.s_c); + prt_newline(out); + bch2_inode_unpacked_to_text(out, inode); +} + +static int dirent_points_to_inode(struct bch_fs *c, + struct bkey_s_c_dirent dirent, + struct bch_inode_unpacked *inode) +{ + int ret = dirent_points_to_inode_nowarn(dirent, inode); + if (ret) { + struct printbuf buf = PRINTBUF; + dirent_inode_mismatch_msg(&buf, c, dirent, inode); + bch_warn(c, "%s", buf.buf); + printbuf_exit(&buf); + } + return ret; +} + /* * XXX: this is handling transaction restarts without returning * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: @@ -371,7 +414,8 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume return ret; ret = remove_backpointer(trans, &inode); - bch_err_msg(c, ret, "removing dirent"); + if (!bch2_err_matches(ret, ENOENT)) + bch_err_msg(c, ret, "removing dirent"); if (ret) return ret; @@ -900,21 +944,6 @@ static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot)); } -static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, - struct bkey_s_c_dirent d) -{ - return inode->bi_dir == d.k->p.inode && - inode->bi_dir_offset == d.k->p.offset; -} - -static bool dirent_points_to_inode(struct bkey_s_c_dirent d, - struct bch_inode_unpacked *inode) -{ - return d.v->d_type == DT_SUBVOL - ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol - : le64_to_cpu(d.v->d_inum) == inode->bi_inum; -} - static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) { struct btree_iter iter; @@ -924,13 +953,14 @@ static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) return ret; } -static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k, +static int check_inode_dirent_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, - u32 inode_snapshot, bool *write_inode) + bool *write_inode) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; + u32 inode_snapshot = inode->bi_snapshot; struct btree_iter dirent_iter = {}; struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot); int ret = bkey_err(d); @@ -940,13 +970,13 @@ static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c i if (fsck_err_on(ret, trans, inode_points_to_missing_dirent, "inode points to missing dirent\n%s", - (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) || - fsck_err_on(!ret && !dirent_points_to_inode(d, inode), + (bch2_inode_unpacked_to_text(&buf, inode), buf.buf)) || + fsck_err_on(!ret && dirent_points_to_inode_nowarn(d, inode), trans, inode_points_to_wrong_dirent, - "inode points to dirent that does not point back:\n%s", - (bch2_bkey_val_to_text(&buf, c, inode_k), - prt_newline(&buf), - bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { + "%s", + (printbuf_reset(&buf), + dirent_inode_mismatch_msg(&buf, c, d, inode), + buf.buf))) { /* * We just clear the backpointer fields for now. If we find a * dirent that points to this inode in check_dirents(), we'll @@ -1145,7 +1175,7 @@ static int check_inode(struct btree_trans *trans, } if (u.bi_dir || u.bi_dir_offset) { - ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update); + ret = check_inode_dirent_inode(trans, &u, &do_update); if (ret) goto err; } @@ -2474,10 +2504,8 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino if (ret && !bch2_err_matches(ret, ENOENT)) break; - if (!ret && !dirent_points_to_inode(d, &inode)) { + if (!ret && (ret = dirent_points_to_inode(c, d, &inode))) bch2_trans_iter_exit(trans, &dirent_iter); - ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; - } if (bch2_err_matches(ret, ENOENT)) { ret = 0; From 0b0f0ad93c0833ed3b5457eb308274f340535988 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 22:27:13 -0400 Subject: [PATCH 592/655] bcachefs: remove_backpointer() now checks if dirent points to inode Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 0e397d0ef3dd..8f52911558a7 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -389,14 +389,17 @@ static int reattach_inode(struct btree_trans *trans, static int remove_backpointer(struct btree_trans *trans, struct bch_inode_unpacked *inode) { - struct btree_iter iter; - struct bkey_s_c_dirent d; - int ret; + if (!inode->bi_dir) + return 0; - d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, - POS(inode->bi_dir, inode->bi_dir_offset), 0, + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c_dirent d = + bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, + SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot), 0, dirent); - ret = bkey_err(d) ?: + int ret = bkey_err(d) ?: + dirent_points_to_inode(c, d, inode) ?: __remove_dirent(trans, d.k->p); bch2_trans_iter_exit(trans, &iter); return ret; From 3a5895e3ac2bb4b252a4e816575eeec6ac3deeec Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 22:32:47 -0400 Subject: [PATCH 593/655] bcachefs: check_subvol_path() now prints subvol root inode Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 32 +++++++++++++------------------- fs/bcachefs/sb-errors_format.h | 2 +- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 8f52911558a7..0d8b782b63fb 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2371,22 +2371,6 @@ static bool darray_u32_has(darray_u32 *d, u32 v) return false; } -/* - * We've checked that inode backpointers point to valid dirents; here, it's - * sufficient to check that the subvolume root has a dirent: - */ -static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s) -{ - struct bch_inode_unpacked inode; - int ret = bch2_inode_find_by_inum_trans(trans, - (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, - &inode); - if (ret) - return ret; - - return inode.bi_dir != 0; -} - static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { struct bch_fs *c = trans->c; @@ -2405,14 +2389,24 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); - ret = subvol_has_dirent(trans, s); - if (ret < 0) + struct bch_inode_unpacked subvol_root; + ret = bch2_inode_find_by_inum_trans(trans, + (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, + &subvol_root); + if (ret) break; - if (fsck_err_on(!ret, + /* + * We've checked that inode backpointers point to valid dirents; + * here, it's sufficient to check that the subvolume root has a + * dirent: + */ + if (fsck_err_on(!subvol_root.bi_dir, trans, subvol_unreachable, "unreachable subvolume %s", (bch2_bkey_val_to_text(&buf, c, s.s_c), + prt_newline(&buf), + bch2_inode_unpacked_to_text(&buf, &subvol_root), buf.buf))) { ret = reattach_subvol(trans, s); break; diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index c15c52ebf699..ed5dca5e1161 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -271,7 +271,7 @@ enum bch_fsck_flags { x(subvol_children_not_set, 256, 0) \ x(subvol_children_bad, 257, 0) \ x(subvol_loop, 258, 0) \ - x(subvol_unreachable, 259, 0) \ + x(subvol_unreachable, 259, FSCK_AUTOFIX) \ x(btree_node_bkey_bad_u64s, 260, 0) \ x(btree_node_topology_empty_interior_node, 261, 0) \ x(btree_ptr_v2_min_key_bad, 262, 0) \ From 716bf84ef39218a56fadaa413f70da008ad85888 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 11 Sep 2024 09:09:18 +0800 Subject: [PATCH 594/655] coccinelle: Add rules to find str_true_false() replacements After str_true_false() has been introduced in the tree, we can add rules for finding places where str_true_false() can be used. A simple test can find over 10 locations. Signed-off-by: Hongbo Li Signed-off-by: Julia Lawall --- scripts/coccinelle/api/string_choices.cocci | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci index 5e729f187f22..6942ad7c4224 100644 --- a/scripts/coccinelle/api/string_choices.cocci +++ b/scripts/coccinelle/api/string_choices.cocci @@ -85,3 +85,22 @@ e << str_down_up_r.E; @@ coccilib.report.print_report(p[0], "opportunity for str_down_up(%s)" % e) + +@str_true_false depends on patch@ +expression E; +@@ +- ((E) ? "true" : "false") ++ str_true_false(E) + +@str_true_false_r depends on !patch exists@ +expression E; +position P; +@@ +* ((E@P) ? "true" : "false") + +@script:python depends on report@ +p << str_true_false_r.P; +e << str_true_false_r.E; +@@ + +coccilib.report.print_report(p[0], "opportunity for str_true_false(%s)" % e) From 8a0236bab4d6f0761f29eae4d55d1ba32c1ecd47 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 11 Sep 2024 09:09:19 +0800 Subject: [PATCH 595/655] coccinelle: Add rules to find str_false_true() replacements As done with str_true_false(), add checks for str_false_true() opportunities. A simple test can find over 9 cases currently exist in the tree. Signed-off-by: Hongbo Li Signed-off-by: Julia Lawall --- scripts/coccinelle/api/string_choices.cocci | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci index 6942ad7c4224..c3c5bc94fab0 100644 --- a/scripts/coccinelle/api/string_choices.cocci +++ b/scripts/coccinelle/api/string_choices.cocci @@ -104,3 +104,22 @@ e << str_true_false_r.E; @@ coccilib.report.print_report(p[0], "opportunity for str_true_false(%s)" % e) + +@str_false_true depends on patch@ +expression E; +@@ +- ((E) ? "false" : "true") ++ str_false_true(E) + +@str_false_true_r depends on !patch exists@ +expression E; +position P; +@@ +* ((E@P) ? "false" : "true") + +@script:python depends on report@ +p << str_false_true_r.P; +e << str_false_true_r.E; +@@ + +coccilib.report.print_report(p[0], "opportunity for str_false_true(%s)" % e) From d4c7544002db32dedbb162b2b89e655ac437ab08 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 11 Sep 2024 09:09:20 +0800 Subject: [PATCH 596/655] coccinelle: Add rules to find str_hi{gh}_lo{w}() replacements As other rules done, we add rules for str_hi{gh}_lo{w}() to check the relative opportunities. Signed-off-by: Hongbo Li Signed-off-by: Julia Lawall --- scripts/coccinelle/api/string_choices.cocci | 42 +++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci index c3c5bc94fab0..7c631fd8abe2 100644 --- a/scripts/coccinelle/api/string_choices.cocci +++ b/scripts/coccinelle/api/string_choices.cocci @@ -123,3 +123,45 @@ e << str_false_true_r.E; @@ coccilib.report.print_report(p[0], "opportunity for str_false_true(%s)" % e) + +@str_hi_lo depends on patch@ +expression E; +@@ +( +- ((E) ? "hi" : "lo") ++ str_hi_lo(E) +) + +@str_hi_lo_r depends on !patch exists@ +expression E; +position P; +@@ +( +* ((E@P) ? "hi" : "lo") +) + +@script:python depends on report@ +p << str_hi_lo_r.P; +e << str_hi_lo_r.E; +@@ + +coccilib.report.print_report(p[0], "opportunity for str_hi_lo(%s)" % e) + +@str_high_low depends on patch@ +expression E; +@@ +- ((E) ? "high" : "low") ++ str_high_low(E) + +@str_high_low_r depends on !patch exists@ +expression E; +position P; +@@ +* ((E@P) ? "high" : "low") + +@script:python depends on report@ +p << str_high_low_r.P; +e << str_high_low_r.E; +@@ + +coccilib.report.print_report(p[0], "opportunity for str_high_low(%s)" % e) From 5b7ca4507d64de729011c145d8ee9a2731f9502f Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 11 Sep 2024 09:09:21 +0800 Subject: [PATCH 597/655] coccinelle: Add rules to find str_lo{w}_hi{gh}() replacements As other rules done, we add rules for str_lo{w}_hi{gh}() to check the relative opportunities. Signed-off-by: Hongbo Li Signed-off-by: Julia Lawall --- scripts/coccinelle/api/string_choices.cocci | 38 +++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci index 7c631fd8abe2..e91d4eb30161 100644 --- a/scripts/coccinelle/api/string_choices.cocci +++ b/scripts/coccinelle/api/string_choices.cocci @@ -165,3 +165,41 @@ e << str_high_low_r.E; @@ coccilib.report.print_report(p[0], "opportunity for str_high_low(%s)" % e) + +@str_lo_hi depends on patch@ +expression E; +@@ +- ((E) ? "lo" : "hi") ++ str_lo_hi(E) + +@str_lo_hi_r depends on !patch exists@ +expression E; +position P; +@@ +* ((E@P) ? "lo" : "hi") + +@script:python depends on report@ +p << str_lo_hi_r.P; +e << str_lo_hi_r.E; +@@ + +coccilib.report.print_report(p[0], "opportunity for str_lo_hi(%s)" % e) + +@str_low_high depends on patch@ +expression E; +@@ +- ((E) ? "low" : "high") ++ str_low_high(E) + +@str_low_high_r depends on !patch exists@ +expression E; +position P; +@@ +* ((E@P) ? "low" : "high") + +@script:python depends on report@ +p << str_low_high_r.P; +e << str_low_high_r.E; +@@ + +coccilib.report.print_report(p[0], "opportunity for str_low_high(%s)" % e) From dd2275d349c2f02ceb6cd37f89b8b9920c602488 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 11 Sep 2024 09:09:22 +0800 Subject: [PATCH 598/655] coccinelle: Add rules to find str_enable{d}_disable{d}() replacements As other rules done, we add rules for str_enable{d}_ disable{d}() to check the relative opportunities. Signed-off-by: Hongbo Li Signed-off-by: Julia Lawall --- scripts/coccinelle/api/string_choices.cocci | 38 +++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci index e91d4eb30161..bb6b851ee2aa 100644 --- a/scripts/coccinelle/api/string_choices.cocci +++ b/scripts/coccinelle/api/string_choices.cocci @@ -203,3 +203,41 @@ e << str_low_high_r.E; @@ coccilib.report.print_report(p[0], "opportunity for str_low_high(%s)" % e) + +@str_enable_disable depends on patch@ +expression E; +@@ +- ((E) ? "enable" : "disable") ++ str_enable_disable(E) + +@str_enable_disable_r depends on !patch exists@ +expression E; +position P; +@@ +* ((E@P) ? "enable" : "disable") + +@script:python depends on report@ +p << str_enable_disable_r.P; +e << str_enable_disable_r.E; +@@ + +coccilib.report.print_report(p[0], "opportunity for str_enable_disable(%s)" % e) + +@str_enabled_disabled depends on patch@ +expression E; +@@ +- ((E) ? "enabled" : "disabled") ++ str_enabled_disabled(E) + +@str_enabled_disabled_r depends on !patch exists@ +expression E; +position P; +@@ +* ((E@P) ? "enabled" : "disabled") + +@script:python depends on report@ +p << str_enabled_disabled_r.P; +e << str_enabled_disabled_r.E; +@@ + +coccilib.report.print_report(p[0], "opportunity for str_enabled_disabled(%s)" % e) From ba4b514a6f4ac420d872b8f16245e3ffa05c10a5 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 11 Sep 2024 09:09:23 +0800 Subject: [PATCH 599/655] coccinelle: Add rules to find str_read_write() replacements As other rules done, we add rules for str_read_write() to check the relative opportunities. Signed-off-by: Hongbo Li Signed-off-by: Julia Lawall --- scripts/coccinelle/api/string_choices.cocci | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci index bb6b851ee2aa..29c507d86ffd 100644 --- a/scripts/coccinelle/api/string_choices.cocci +++ b/scripts/coccinelle/api/string_choices.cocci @@ -241,3 +241,22 @@ e << str_enabled_disabled_r.E; @@ coccilib.report.print_report(p[0], "opportunity for str_enabled_disabled(%s)" % e) + +@str_read_write depends on patch@ +expression E; +@@ +- ((E) ? "read" : "write") ++ str_read_write(E) + +@str_read_write_r depends on !patch exists@ +expression E; +position P; +@@ +* ((E@P) ? "read" : "write") + +@script:python depends on report@ +p << str_read_write_r.P; +e << str_read_write_r.E; +@@ + +coccilib.report.print_report(p[0], "opportunity for str_read_write(%s)" % e) From c81ca023c30691dcae543bd770e7a3a4c63263ff Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 11 Sep 2024 09:09:24 +0800 Subject: [PATCH 600/655] coccinelle: Add rules to find str_write_read() replacements As other rules done, we add rules for str_write_read() to check the relative opportunities. Signed-off-by: Hongbo Li Signed-off-by: Julia Lawall --- scripts/coccinelle/api/string_choices.cocci | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci index 29c507d86ffd..64f04da2e9f4 100644 --- a/scripts/coccinelle/api/string_choices.cocci +++ b/scripts/coccinelle/api/string_choices.cocci @@ -260,3 +260,22 @@ e << str_read_write_r.E; @@ coccilib.report.print_report(p[0], "opportunity for str_read_write(%s)" % e) + +@str_write_read depends on patch@ +expression E; +@@ +- ((E) ? "write" : "read") ++ str_write_read(E) + +@str_write_read_r depends on !patch exists@ +expression E; +position P; +@@ +* ((E@P) ? "write" : "read") + +@script:python depends on report@ +p << str_write_read_r.P; +e << str_write_read_r.E; +@@ + +coccilib.report.print_report(p[0], "opportunity for str_write_read(%s)" % e) From 9b5b4810559d3716aef9fdc8d555e4db1a031749 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 11 Sep 2024 09:09:25 +0800 Subject: [PATCH 601/655] coccinelle: Add rules to find str_on_off() replacements As other rules done, we add rules for str_on_off() to check the relative opportunities. Signed-off-by: Hongbo Li Signed-off-by: Julia Lawall --- scripts/coccinelle/api/string_choices.cocci | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci index 64f04da2e9f4..34ff8f48965c 100644 --- a/scripts/coccinelle/api/string_choices.cocci +++ b/scripts/coccinelle/api/string_choices.cocci @@ -279,3 +279,22 @@ e << str_write_read_r.E; @@ coccilib.report.print_report(p[0], "opportunity for str_write_read(%s)" % e) + +@str_on_off depends on patch@ +expression E; +@@ +- ((E) ? "on" : "off") ++ str_on_off(E) + +@str_on_off_r depends on !patch exists@ +expression E; +position P; +@@ +* ((E@P) ? "on" : "off") + +@script:python depends on report@ +p << str_on_off_r.P; +e << str_on_off_r.E; +@@ + +coccilib.report.print_report(p[0], "opportunity for str_on_off(%s)" % e) From 253244cdf16a755039f9078b0a785176712f2584 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 11 Sep 2024 09:09:26 +0800 Subject: [PATCH 602/655] coccinelle: Add rules to find str_yes_no() replacements As other rules done, we add rules for str_yes_no() to check the relative opportunities. Signed-off-by: Hongbo Li Signed-off-by: Julia Lawall --- scripts/coccinelle/api/string_choices.cocci | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci index 34ff8f48965c..96dc7090395d 100644 --- a/scripts/coccinelle/api/string_choices.cocci +++ b/scripts/coccinelle/api/string_choices.cocci @@ -298,3 +298,22 @@ e << str_on_off_r.E; @@ coccilib.report.print_report(p[0], "opportunity for str_on_off(%s)" % e) + +@str_yes_no depends on patch@ +expression E; +@@ +- ((E) ? "yes" : "no") ++ str_yes_no(E) + +@str_yes_no_r depends on !patch exists@ +expression E; +position P; +@@ +* ((E@P) ? "yes" : "no") + +@script:python depends on report@ +p << str_yes_no_r.P; +e << str_yes_no_r.E; +@@ + +coccilib.report.print_report(p[0], "opportunity for str_yes_no(%s)" % e) From f584e3752ca7bb1f8849a85816b3c974f1aa67ec Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 11 Sep 2024 09:09:27 +0800 Subject: [PATCH 603/655] coccinelle: Remove unnecessary parentheses for only one possible change. The parentheses are only needed if there is a disjunction, ie a set of possible changes. If there is only one pattern, we can remove these parentheses. Just like the format: - x + y not: ( - x + y ) Signed-off-by: Hongbo Li Signed-off-by: Julia Lawall --- scripts/coccinelle/api/string_choices.cocci | 8 -------- 1 file changed, 8 deletions(-) diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci index 96dc7090395d..95e9a3b31f86 100644 --- a/scripts/coccinelle/api/string_choices.cocci +++ b/scripts/coccinelle/api/string_choices.cocci @@ -43,18 +43,14 @@ coccilib.report.print_report(p[0], "opportunity for str_plural(%s)" % e) @str_up_down depends on patch@ expression E; @@ -( - ((E) ? "up" : "down") + str_up_down(E) -) @str_up_down_r depends on !patch exists@ expression E; position P; @@ -( * ((E@P) ? "up" : "down") -) @script:python depends on report@ p << str_up_down_r.P; @@ -66,18 +62,14 @@ coccilib.report.print_report(p[0], "opportunity for str_up_down(%s)" % e) @str_down_up depends on patch@ expression E; @@ -( - ((E) ? "down" : "up") + str_down_up(E) -) @str_down_up_r depends on !patch exists@ expression E; position P; @@ -( * ((E@P) ? "down" : "up") -) @script:python depends on report@ p << str_down_up_r.P; From 4003ba664bd16f5a969cc883295a9eb5a5aef19e Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 28 Sep 2024 21:26:22 +0200 Subject: [PATCH 604/655] Reduce Coccinelle choices in string_choices.cocci The isomorphism neg_if_exp negates the test of a ?: conditional, making it unnecessary to have an explicit case for a negated test with the branches inverted. At the same time, we can disable neg_if_exp in cases where a different API function may be more suitable for a negated test. Finally, in the non-patch cases, E matches an expression with parentheses around it, so there is no need to mention () explicitly in the pattern. The () are still needed in the patch cases, because we want to drop them, if they are present. Signed-off-by: Julia Lawall --- scripts/coccinelle/api/string_choices.cocci | 91 ++++++++++----------- 1 file changed, 41 insertions(+), 50 deletions(-) diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci index 95e9a3b31f86..375045086912 100644 --- a/scripts/coccinelle/api/string_choices.cocci +++ b/scripts/coccinelle/api/string_choices.cocci @@ -14,23 +14,18 @@ expression E; - ((E == 1) ? "" : "s") + str_plural(E) | -- ((E != 1) ? "s" : "") -+ str_plural(E) -| - ((E > 1) ? "s" : "") + str_plural(E) ) -@str_plural_r depends on !patch exists@ +@str_plural_r depends on !patch@ expression E; position P; @@ ( -* ((E@P == 1) ? "" : "s") +* (E@P == 1) ? "" : "s" | -* ((E@P != 1) ? "s" : "") -| -* ((E@P > 1) ? "s" : "") +* (E@P > 1) ? "s" : "" ) @script:python depends on report@ @@ -40,17 +35,17 @@ e << str_plural_r.E; coccilib.report.print_report(p[0], "opportunity for str_plural(%s)" % e) -@str_up_down depends on patch@ +@str_up_down depends on patch disable neg_if_exp@ expression E; @@ - ((E) ? "up" : "down") + str_up_down(E) -@str_up_down_r depends on !patch exists@ +@str_up_down_r depends on !patch disable neg_if_exp@ expression E; position P; @@ -* ((E@P) ? "up" : "down") +* E@P ? "up" : "down" @script:python depends on report@ p << str_up_down_r.P; @@ -59,17 +54,17 @@ e << str_up_down_r.E; coccilib.report.print_report(p[0], "opportunity for str_up_down(%s)" % e) -@str_down_up depends on patch@ +@str_down_up depends on patch disable neg_if_exp@ expression E; @@ - ((E) ? "down" : "up") + str_down_up(E) -@str_down_up_r depends on !patch exists@ +@str_down_up_r depends on !patch disable neg_if_exp@ expression E; position P; @@ -* ((E@P) ? "down" : "up") +* E@P ? "down" : "up" @script:python depends on report@ p << str_down_up_r.P; @@ -78,17 +73,17 @@ e << str_down_up_r.E; coccilib.report.print_report(p[0], "opportunity for str_down_up(%s)" % e) -@str_true_false depends on patch@ +@str_true_false depends on patch disable neg_if_exp@ expression E; @@ - ((E) ? "true" : "false") + str_true_false(E) -@str_true_false_r depends on !patch exists@ +@str_true_false_r depends on !patch disable neg_if_exp@ expression E; position P; @@ -* ((E@P) ? "true" : "false") +* E@P ? "true" : "false" @script:python depends on report@ p << str_true_false_r.P; @@ -97,17 +92,17 @@ e << str_true_false_r.E; coccilib.report.print_report(p[0], "opportunity for str_true_false(%s)" % e) -@str_false_true depends on patch@ +@str_false_true depends on patch disable neg_if_exp@ expression E; @@ - ((E) ? "false" : "true") + str_false_true(E) -@str_false_true_r depends on !patch exists@ +@str_false_true_r depends on !patch disable neg_if_exp@ expression E; position P; @@ -* ((E@P) ? "false" : "true") +* E@P ? "false" : "true" @script:python depends on report@ p << str_false_true_r.P; @@ -116,21 +111,17 @@ e << str_false_true_r.E; coccilib.report.print_report(p[0], "opportunity for str_false_true(%s)" % e) -@str_hi_lo depends on patch@ +@str_hi_lo depends on patch disable neg_if_exp@ expression E; @@ -( - ((E) ? "hi" : "lo") + str_hi_lo(E) -) -@str_hi_lo_r depends on !patch exists@ +@str_hi_lo_r depends on !patch disable neg_if_exp@ expression E; position P; @@ -( -* ((E@P) ? "hi" : "lo") -) +* E@P ? "hi" : "lo" @script:python depends on report@ p << str_hi_lo_r.P; @@ -139,17 +130,17 @@ e << str_hi_lo_r.E; coccilib.report.print_report(p[0], "opportunity for str_hi_lo(%s)" % e) -@str_high_low depends on patch@ +@str_high_low depends on patch disable neg_if_exp@ expression E; @@ - ((E) ? "high" : "low") + str_high_low(E) -@str_high_low_r depends on !patch exists@ +@str_high_low_r depends on !patch disable neg_if_exp@ expression E; position P; @@ -* ((E@P) ? "high" : "low") +* E@P ? "high" : "low" @script:python depends on report@ p << str_high_low_r.P; @@ -158,17 +149,17 @@ e << str_high_low_r.E; coccilib.report.print_report(p[0], "opportunity for str_high_low(%s)" % e) -@str_lo_hi depends on patch@ +@str_lo_hi depends on patch disable neg_if_exp@ expression E; @@ - ((E) ? "lo" : "hi") + str_lo_hi(E) -@str_lo_hi_r depends on !patch exists@ +@str_lo_hi_r depends on !patch disable neg_if_exp@ expression E; position P; @@ -* ((E@P) ? "lo" : "hi") +* E@P ? "lo" : "hi" @script:python depends on report@ p << str_lo_hi_r.P; @@ -177,17 +168,17 @@ e << str_lo_hi_r.E; coccilib.report.print_report(p[0], "opportunity for str_lo_hi(%s)" % e) -@str_low_high depends on patch@ +@str_low_high depends on patch disable neg_if_exp@ expression E; @@ - ((E) ? "low" : "high") + str_low_high(E) -@str_low_high_r depends on !patch exists@ +@str_low_high_r depends on !patch disable neg_if_exp@ expression E; position P; @@ -* ((E@P) ? "low" : "high") +* E@P ? "low" : "high" @script:python depends on report@ p << str_low_high_r.P; @@ -202,11 +193,11 @@ expression E; - ((E) ? "enable" : "disable") + str_enable_disable(E) -@str_enable_disable_r depends on !patch exists@ +@str_enable_disable_r depends on !patch@ expression E; position P; @@ -* ((E@P) ? "enable" : "disable") +* E@P ? "enable" : "disable" @script:python depends on report@ p << str_enable_disable_r.P; @@ -221,11 +212,11 @@ expression E; - ((E) ? "enabled" : "disabled") + str_enabled_disabled(E) -@str_enabled_disabled_r depends on !patch exists@ +@str_enabled_disabled_r depends on !patch@ expression E; position P; @@ -* ((E@P) ? "enabled" : "disabled") +* E@P ? "enabled" : "disabled" @script:python depends on report@ p << str_enabled_disabled_r.P; @@ -234,17 +225,17 @@ e << str_enabled_disabled_r.E; coccilib.report.print_report(p[0], "opportunity for str_enabled_disabled(%s)" % e) -@str_read_write depends on patch@ +@str_read_write depends on patch disable neg_if_exp@ expression E; @@ - ((E) ? "read" : "write") + str_read_write(E) -@str_read_write_r depends on !patch exists@ +@str_read_write_r depends on !patch disable neg_if_exp@ expression E; position P; @@ -* ((E@P) ? "read" : "write") +* E@P ? "read" : "write" @script:python depends on report@ p << str_read_write_r.P; @@ -253,17 +244,17 @@ e << str_read_write_r.E; coccilib.report.print_report(p[0], "opportunity for str_read_write(%s)" % e) -@str_write_read depends on patch@ +@str_write_read depends on patch disable neg_if_exp@ expression E; @@ - ((E) ? "write" : "read") + str_write_read(E) -@str_write_read_r depends on !patch exists@ +@str_write_read_r depends on !patch disable neg_if_exp@ expression E; position P; @@ -* ((E@P) ? "write" : "read") +* E@P ? "write" : "read" @script:python depends on report@ p << str_write_read_r.P; @@ -278,11 +269,11 @@ expression E; - ((E) ? "on" : "off") + str_on_off(E) -@str_on_off_r depends on !patch exists@ +@str_on_off_r depends on !patch@ expression E; position P; @@ -* ((E@P) ? "on" : "off") +* E@P ? "on" : "off" @script:python depends on report@ p << str_on_off_r.P; @@ -297,11 +288,11 @@ expression E; - ((E) ? "yes" : "no") + str_yes_no(E) -@str_yes_no_r depends on !patch exists@ +@str_yes_no_r depends on !patch@ expression E; position P; @@ -* ((E@P) ? "yes" : "no") +* E@P ? "yes" : "no" @script:python depends on report@ p << str_yes_no_r.P; From 3f749befb0998472470d850b11b430477c0718cc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Sep 2024 14:47:33 -0700 Subject: [PATCH 605/655] x86: kvm: fix build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cpu_emergency_register_virt_callback() function is used unconditionally by the x86 kvm code, but it is declared (and defined) conditionally: #if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD) void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback); ... leading to a build error when neither KVM_INTEL nor KVM_AMD support is enabled: arch/x86/kvm/x86.c: In function ‘kvm_arch_enable_virtualization’: arch/x86/kvm/x86.c:12517:9: error: implicit declaration of function ‘cpu_emergency_register_virt_callback’ [-Wimplicit-function-declaration] 12517 | cpu_emergency_register_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/x86.c: In function ‘kvm_arch_disable_virtualization’: arch/x86/kvm/x86.c:12522:9: error: implicit declaration of function ‘cpu_emergency_unregister_virt_callback’ [-Wimplicit-function-declaration] 12522 | cpu_emergency_unregister_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix the build by defining empty helper functions the same way the old cpu_emergency_disable_virtualization() function was dealt with for the same situation. Maybe we could instead have made the call sites conditional, since the callers (kvm_arch_{en,dis}able_virtualization()) have an empty weak fallback. I'll leave that to the kvm people to argue about, this at least gets the build going for that particular config. Fixes: 590b09b1d88e ("KVM: x86: Register "emergency disable" callbacks when virt is enabled") Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Kai Huang Cc: Chao Gao Cc: Farrah Chen Signed-off-by: Linus Torvalds --- arch/x86/include/asm/reboot.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index d0ef2a678d66..c02183d3cdd7 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -31,6 +31,8 @@ void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback); void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback); void cpu_emergency_disable_virtualization(void); #else +static inline void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) {} +static inline void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) {} static inline void cpu_emergency_disable_virtualization(void) {} #endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */ From 9852d85ec9d492ebef56dc5f229416c925758edc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Sep 2024 15:06:19 -0700 Subject: [PATCH 606/655] Linux 6.12-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 265dd990a9b6..187a4ce2728e 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 11 +PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Baby Opossum Posse # *DOCUMENTATION* From 34820304cc2cd1804ee1f8f3504ec77813d29c8e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Sep 2024 18:20:47 +0200 Subject: [PATCH 607/655] uprobes: fix kernel info leak via "[uprobes]" vma xol_add_vma() maps the uninitialized page allocated by __create_xol_area() into userspace. On some architectures (x86) this memory is readable even without VM_READ, VM_EXEC results in the same pgprot_t as VM_EXEC|VM_READ, although this doesn't really matter, debugger can read this memory anyway. Link: https://lore.kernel.org/all/20240929162047.GA12611@redhat.com/ Reported-by: Will Deacon Fixes: d4b3b6384f98 ("uprobes/core: Allocate XOL slots for uprobes use") Cc: stable@vger.kernel.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Oleg Nesterov Signed-off-by: Masami Hiramatsu (Google) --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2ec796e2f055..4b52cb2ae6d6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1545,7 +1545,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) if (!area->bitmap) goto free_area; - area->page = alloc_page(GFP_HIGHUSER); + area->page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); if (!area->page) goto free_bitmap; From 2007d28ec0095c6db0a24fd8bb8fe280c65446cd Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 29 Sep 2024 17:39:02 -0700 Subject: [PATCH 608/655] bcachefs: rename version -> bversion for big endian builds Builds on big endian systems fail as follows. fs/bcachefs/bkey.h: In function 'bch2_bkey_format_add_key': fs/bcachefs/bkey.h:557:41: error: 'const struct bkey' has no member named 'bversion' The original commit only renamed the variable for little endian builds. Rename it for big endian builds as well to fix the problem. Fixes: cf49f8a8c277 ("bcachefs: rename version -> bversion") Cc: Kent Overstreet Signed-off-by: Guenter Roeck Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 203ee627cab5..84832c2d4df9 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -223,7 +223,7 @@ struct bkey { #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ struct bpos p; __u32 size; /* extent size, in sectors */ - struct bversion version; + struct bversion bversion; __u8 pad[1]; #endif From 28e8c5c095ec28edeedab5e976e62e0419a89fc1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 30 Sep 2024 11:14:41 +0100 Subject: [PATCH 609/655] netfs: Add folio_queue API documentation Add API documentation for folio_queue. Signed-off-by: David Howells Link: https://lore.kernel.org/r/2912369.1727691281@warthog.procyon.org.uk cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-doc@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Christian Brauner --- Documentation/core-api/folio_queue.rst | 212 +++++++++++++++++++++++++ include/linux/folio_queue.h | 168 ++++++++++++++++++++ 2 files changed, 380 insertions(+) create mode 100644 Documentation/core-api/folio_queue.rst diff --git a/Documentation/core-api/folio_queue.rst b/Documentation/core-api/folio_queue.rst new file mode 100644 index 000000000000..1fe7a9bc4b8d --- /dev/null +++ b/Documentation/core-api/folio_queue.rst @@ -0,0 +1,212 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +=========== +Folio Queue +=========== + +:Author: David Howells + +.. Contents: + + * Overview + * Initialisation + * Adding and removing folios + * Querying information about a folio + * Querying information about a folio_queue + * Folio queue iteration + * Folio marks + * Lockless simultaneous production/consumption issues + + +Overview +======== + +The folio_queue struct forms a single segment in a segmented list of folios +that can be used to form an I/O buffer. As such, the list can be iterated over +using the ITER_FOLIOQ iov_iter type. + +The publicly accessible members of the structure are:: + + struct folio_queue { + struct folio_queue *next; + struct folio_queue *prev; + ... + }; + +A pair of pointers are provided, ``next`` and ``prev``, that point to the +segments on either side of the segment being accessed. Whilst this is a +doubly-linked list, it is intentionally not a circular list; the outward +sibling pointers in terminal segments should be NULL. + +Each segment in the list also stores: + + * an ordered sequence of folio pointers, + * the size of each folio and + * three 1-bit marks per folio, + +but hese should not be accessed directly as the underlying data structure may +change, but rather the access functions outlined below should be used. + +The facility can be made accessible by:: + + #include + +and to use the iterator:: + + #include + + +Initialisation +============== + +A segment should be initialised by calling:: + + void folioq_init(struct folio_queue *folioq); + +with a pointer to the segment to be initialised. Note that this will not +necessarily initialise all the folio pointers, so care must be taken to check +the number of folios added. + + +Adding and removing folios +========================== + +Folios can be set in the next unused slot in a segment struct by calling one +of:: + + unsigned int folioq_append(struct folio_queue *folioq, + struct folio *folio); + + unsigned int folioq_append_mark(struct folio_queue *folioq, + struct folio *folio); + +Both functions update the stored folio count, store the folio and note its +size. The second function also sets the first mark for the folio added. Both +functions return the number of the slot used. [!] Note that no attempt is made +to check that the capacity wasn't overrun and the list will not be extended +automatically. + +A folio can be excised by calling:: + + void folioq_clear(struct folio_queue *folioq, unsigned int slot); + +This clears the slot in the array and also clears all the marks for that folio, +but doesn't change the folio count - so future accesses of that slot must check +if the slot is occupied. + + +Querying information about a folio +================================== + +Information about the folio in a particular slot may be queried by the +following function:: + + struct folio *folioq_folio(const struct folio_queue *folioq, + unsigned int slot); + +If a folio has not yet been set in that slot, this may yield an undefined +pointer. The size of the folio in a slot may be queried with either of:: + + unsigned int folioq_folio_order(const struct folio_queue *folioq, + unsigned int slot); + + size_t folioq_folio_size(const struct folio_queue *folioq, + unsigned int slot); + +The first function returns the size as an order and the second as a number of +bytes. + + +Querying information about a folio_queue +======================================== + +Information may be retrieved about a particular segment with the following +functions:: + + unsigned int folioq_nr_slots(const struct folio_queue *folioq); + + unsigned int folioq_count(struct folio_queue *folioq); + + bool folioq_full(struct folio_queue *folioq); + +The first function returns the maximum capacity of a segment. It must not be +assumed that this won't vary between segments. The second returns the number +of folios added to a segments and the third is a shorthand to indicate if the +segment has been filled to capacity. + +Not that the count and fullness are not affected by clearing folios from the +segment. These are more about indicating how many slots in the array have been +initialised, and it assumed that slots won't get reused, but rather the segment +will get discarded as the queue is consumed. + + +Folio marks +=========== + +Folios within a queue can also have marks assigned to them. These marks can be +used to note information such as if a folio needs folio_put() calling upon it. +There are three marks available to be set for each folio. + +The marks can be set by:: + + void folioq_mark(struct folio_queue *folioq, unsigned int slot); + void folioq_mark2(struct folio_queue *folioq, unsigned int slot); + void folioq_mark3(struct folio_queue *folioq, unsigned int slot); + +Cleared by:: + + void folioq_unmark(struct folio_queue *folioq, unsigned int slot); + void folioq_unmark2(struct folio_queue *folioq, unsigned int slot); + void folioq_unmark3(struct folio_queue *folioq, unsigned int slot); + +And the marks can be queried by:: + + bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot); + bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot); + bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot); + +The marks can be used for any purpose and are not interpreted by this API. + + +Folio queue iteration +===================== + +A list of segments may be iterated over using the I/O iterator facility using +an ``iov_iter`` iterator of ``ITER_FOLIOQ`` type. The iterator may be +initialised with:: + + void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction, + const struct folio_queue *folioq, + unsigned int first_slot, unsigned int offset, + size_t count); + +This may be told to start at a particular segment, slot and offset within a +queue. The iov iterator functions will follow the next pointers when advancing +and prev pointers when reverting when needed. + + +Lockless simultaneous production/consumption issues +=================================================== + +If properly managed, the list can be extended by the producer at the head end +and shortened by the consumer at the tail end simultaneously without the need +to take locks. The ITER_FOLIOQ iterator inserts appropriate barriers to aid +with this. + +Care must be taken when simultaneously producing and consuming a list. If the +last segment is reached and the folios it refers to are entirely consumed by +the IOV iterators, an iov_iter struct will be left pointing to the last segment +with a slot number equal to the capacity of that segment. The iterator will +try to continue on from this if there's another segment available when it is +used again, but care must be taken lest the segment got removed and freed by +the consumer before the iterator was advanced. + +It is recommended that the queue always contain at least one segment, even if +that segment has never been filled or is entirely spent. This prevents the +head and tail pointers from collapsing. + + +API Function Reference +====================== + +.. kernel-doc:: include/linux/folio_queue.h diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index 955680c3bb5f..af871405ae55 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -3,6 +3,12 @@ * * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) + * + * See: + * + * Documentation/core-api/folio_queue.rst + * + * for a description of the API. */ #ifndef _LINUX_FOLIO_QUEUE_H @@ -33,6 +39,13 @@ struct folio_queue { #endif }; +/** + * folioq_init - Initialise a folio queue segment + * @folioq: The segment to initialise + * + * Initialise a folio queue segment. Note that the folio pointers are + * left uninitialised. + */ static inline void folioq_init(struct folio_queue *folioq) { folio_batch_init(&folioq->vec); @@ -43,62 +56,155 @@ static inline void folioq_init(struct folio_queue *folioq) folioq->marks3 = 0; } +/** + * folioq_nr_slots: Query the capacity of a folio queue segment + * @folioq: The segment to query + * + * Query the number of folios that a particular folio queue segment might hold. + * [!] NOTE: This must not be assumed to be the same for every segment! + */ static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) { return PAGEVEC_SIZE; } +/** + * folioq_count: Query the occupancy of a folio queue segment + * @folioq: The segment to query + * + * Query the number of folios that have been added to a folio queue segment. + * Note that this is not decreased as folios are removed from a segment. + */ static inline unsigned int folioq_count(struct folio_queue *folioq) { return folio_batch_count(&folioq->vec); } +/** + * folioq_count: Query if a folio queue segment is full + * @folioq: The segment to query + * + * Query if a folio queue segment is fully occupied. Note that this does not + * change if folios are removed from a segment. + */ static inline bool folioq_full(struct folio_queue *folioq) { //return !folio_batch_space(&folioq->vec); return folioq_count(folioq) >= folioq_nr_slots(folioq); } +/** + * folioq_is_marked: Check first folio mark in a folio queue segment + * @folioq: The segment to query + * @slot: The slot number of the folio to query + * + * Determine if the first mark is set for the folio in the specified slot in a + * folio queue segment. + */ static inline bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot) { return test_bit(slot, &folioq->marks); } +/** + * folioq_mark: Set the first mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Set the first mark for the folio in the specified slot in a folio queue + * segment. + */ static inline void folioq_mark(struct folio_queue *folioq, unsigned int slot) { set_bit(slot, &folioq->marks); } +/** + * folioq_unmark: Clear the first mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Clear the first mark for the folio in the specified slot in a folio queue + * segment. + */ static inline void folioq_unmark(struct folio_queue *folioq, unsigned int slot) { clear_bit(slot, &folioq->marks); } +/** + * folioq_is_marked2: Check second folio mark in a folio queue segment + * @folioq: The segment to query + * @slot: The slot number of the folio to query + * + * Determine if the second mark is set for the folio in the specified slot in a + * folio queue segment. + */ static inline bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot) { return test_bit(slot, &folioq->marks2); } +/** + * folioq_mark2: Set the second mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Set the second mark for the folio in the specified slot in a folio queue + * segment. + */ static inline void folioq_mark2(struct folio_queue *folioq, unsigned int slot) { set_bit(slot, &folioq->marks2); } +/** + * folioq_unmark2: Clear the second mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Clear the second mark for the folio in the specified slot in a folio queue + * segment. + */ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) { clear_bit(slot, &folioq->marks2); } +/** + * folioq_is_marked3: Check third folio mark in a folio queue segment + * @folioq: The segment to query + * @slot: The slot number of the folio to query + * + * Determine if the third mark is set for the folio in the specified slot in a + * folio queue segment. + */ static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot) { return test_bit(slot, &folioq->marks3); } +/** + * folioq_mark3: Set the third mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Set the third mark for the folio in the specified slot in a folio queue + * segment. + */ static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot) { set_bit(slot, &folioq->marks3); } +/** + * folioq_unmark3: Clear the third mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Clear the third mark for the folio in the specified slot in a folio queue + * segment. + */ static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot) { clear_bit(slot, &folioq->marks3); @@ -111,6 +217,19 @@ static inline unsigned int __folio_order(struct folio *folio) return folio->_flags_1 & 0xff; } +/** + * folioq_append: Add a folio to a folio queue segment + * @folioq: The segment to add to + * @folio: The folio to add + * + * Add a folio to the tail of the sequence in a folio queue segment, increasing + * the occupancy count and returning the slot number for the folio just added. + * The folio size is extracted and stored in the queue and the marks are left + * unmodified. + * + * Note that it's left up to the caller to check that the segment capacity will + * not be exceeded and to extend the queue. + */ static inline unsigned int folioq_append(struct folio_queue *folioq, struct folio *folio) { unsigned int slot = folioq->vec.nr++; @@ -120,6 +239,19 @@ static inline unsigned int folioq_append(struct folio_queue *folioq, struct foli return slot; } +/** + * folioq_append_mark: Add a folio to a folio queue segment + * @folioq: The segment to add to + * @folio: The folio to add + * + * Add a folio to the tail of the sequence in a folio queue segment, increasing + * the occupancy count and returning the slot number for the folio just added. + * The folio size is extracted and stored in the queue, the first mark is set + * and and the second and third marks are left unmodified. + * + * Note that it's left up to the caller to check that the segment capacity will + * not be exceeded and to extend the queue. + */ static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct folio *folio) { unsigned int slot = folioq->vec.nr++; @@ -130,21 +262,57 @@ static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct return slot; } +/** + * folioq_folio: Get a folio from a folio queue segment + * @folioq: The segment to access + * @slot: The folio slot to access + * + * Retrieve the folio in the specified slot from a folio queue segment. Note + * that no bounds check is made and if the slot hasn't been added into yet, the + * pointer will be undefined. If the slot has been cleared, NULL will be + * returned. + */ static inline struct folio *folioq_folio(const struct folio_queue *folioq, unsigned int slot) { return folioq->vec.folios[slot]; } +/** + * folioq_folio_order: Get the order of a folio from a folio queue segment + * @folioq: The segment to access + * @slot: The folio slot to access + * + * Retrieve the order of the folio in the specified slot from a folio queue + * segment. Note that no bounds check is made and if the slot hasn't been + * added into yet, the order returned will be 0. + */ static inline unsigned int folioq_folio_order(const struct folio_queue *folioq, unsigned int slot) { return folioq->orders[slot]; } +/** + * folioq_folio_size: Get the size of a folio from a folio queue segment + * @folioq: The segment to access + * @slot: The folio slot to access + * + * Retrieve the size of the folio in the specified slot from a folio queue + * segment. Note that no bounds check is made and if the slot hasn't been + * added into yet, the size returned will be PAGE_SIZE. + */ static inline size_t folioq_folio_size(const struct folio_queue *folioq, unsigned int slot) { return PAGE_SIZE << folioq_folio_order(folioq, slot); } +/** + * folioq_clear: Clear a folio from a folio queue segment + * @folioq: The segment to clear + * @slot: The folio slot to clear + * + * Clear a folio from a sequence in a folio queue segment and clear its marks. + * The occupancy count is left unchanged. + */ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) { folioq->vec.folios[slot] = NULL; From f801850bc263d7fa0a4e6d9a36cddf4966c79c14 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 30 Sep 2024 12:59:16 +0100 Subject: [PATCH 610/655] netfs: Fix the netfs_folio tracepoint to handle NULL mapping Fix the netfs_folio tracepoint to handle folios that have a NULL mapping pointer. In such a case, just substitute a zero inode number. Fixes: c38f4e96e605 ("netfs: Provide func to copy data to pagecache for buffered write") Signed-off-by: David Howells Link: https://lore.kernel.org/r/2917423.1727697556@warthog.procyon.org.uk cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 76bd42a96815..1d7c52821e55 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -448,7 +448,8 @@ TRACE_EVENT(netfs_folio, ), TP_fast_assign( - __entry->ino = folio->mapping->host->i_ino; + struct address_space *__m = READ_ONCE(folio->mapping); + __entry->ino = __m ? __m->host->i_ino : 0; __entry->why = why; __entry->index = folio_index(folio); __entry->nr = folio_nr_pages(folio); From 6c24a03a61a245fe34d47582898331fa034b6ccd Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 13 Sep 2024 23:35:49 +0300 Subject: [PATCH 611/655] net: dsa: improve shutdown sequence Alexander Sverdlin presents 2 problems during shutdown with the lan9303 driver. One is specific to lan9303 and the other just happens to reproduce there. The first problem is that lan9303 is unique among DSA drivers in that it calls dev_get_drvdata() at "arbitrary runtime" (not probe, not shutdown, not remove): phy_state_machine() -> ... -> dsa_user_phy_read() -> ds->ops->phy_read() -> lan9303_phy_read() -> chip->ops->phy_read() -> lan9303_mdio_phy_read() -> dev_get_drvdata() But we never stop the phy_state_machine(), so it may continue to run after dsa_switch_shutdown(). Our common pattern in all DSA drivers is to set drvdata to NULL to suppress the remove() method that may come afterwards. But in this case it will result in an NPD. The second problem is that the way in which we set dp->conduit->dsa_ptr = NULL; is concurrent with receive packet processing. dsa_switch_rcv() checks once whether dev->dsa_ptr is NULL, but afterwards, rather than continuing to use that non-NULL value, dev->dsa_ptr is dereferenced again and again without NULL checks: dsa_conduit_find_user() and many other places. In between dereferences, there is no locking to ensure that what was valid once continues to be valid. Both problems have the common aspect that closing the conduit interface solves them. In the first case, dev_close(conduit) triggers the NETDEV_GOING_DOWN event in dsa_user_netdevice_event() which closes user ports as well. dsa_port_disable_rt() calls phylink_stop(), which synchronously stops the phylink state machine, and ds->ops->phy_read() will thus no longer call into the driver after this point. In the second case, dev_close(conduit) should do this, as per Documentation/networking/driver.rst: | Quiescence | ---------- | | After the ndo_stop routine has been called, the hardware must | not receive or transmit any data. All in flight packets must | be aborted. If necessary, poll or wait for completion of | any reset commands. So it should be sufficient to ensure that later, when we zeroize conduit->dsa_ptr, there will be no concurrent dsa_switch_rcv() call on this conduit. The addition of the netif_device_detach() function is to ensure that ioctls, rtnetlinks and ethtool requests on the user ports no longer propagate down to the driver - we're no longer prepared to handle them. The race condition actually did not exist when commit 0650bf52b31f ("net: dsa: be compatible with masters which unregister on shutdown") first introduced dsa_switch_shutdown(). It was created later, when we stopped unregistering the user interfaces from a bad spot, and we just replaced that sequence with a racy zeroization of conduit->dsa_ptr (one which doesn't ensure that the interfaces aren't up). Reported-by: Alexander Sverdlin Closes: https://lore.kernel.org/netdev/2d2e3bba17203c14a5ffdabc174e3b6bbb9ad438.camel@siemens.com/ Closes: https://lore.kernel.org/netdev/c1bf4de54e829111e0e4a70e7bd1cf523c9550ff.camel@siemens.com/ Fixes: ee534378f005 ("net: dsa: fix panic when DSA master device unbinds on shutdown") Reviewed-by: Alexander Sverdlin Tested-by: Alexander Sverdlin Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20240913203549.3081071-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni --- net/dsa/dsa.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 668c729946ea..1664547deffd 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1577,6 +1577,7 @@ EXPORT_SYMBOL_GPL(dsa_unregister_switch); void dsa_switch_shutdown(struct dsa_switch *ds) { struct net_device *conduit, *user_dev; + LIST_HEAD(close_list); struct dsa_port *dp; mutex_lock(&dsa2_mutex); @@ -1586,10 +1587,16 @@ void dsa_switch_shutdown(struct dsa_switch *ds) rtnl_lock(); + dsa_switch_for_each_cpu_port(dp, ds) + list_add(&dp->conduit->close_list, &close_list); + + dev_close_many(&close_list, true); + dsa_switch_for_each_user_port(dp, ds) { conduit = dsa_port_to_conduit(dp); user_dev = dp->user; + netif_device_detach(user_dev); netdev_upper_dev_unlink(conduit, user_dev); } From e8d4d34df715133c319fabcf63fdec684be75ff8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 23 Sep 2024 23:22:41 +0200 Subject: [PATCH 612/655] net: Add netif_get_gro_max_size helper for GRO Add a small netif_get_gro_max_size() helper which returns the maximum IPv4 or IPv6 GRO size of the netdevice. We later add a netif_get_gso_max_size() equivalent as well for GSO, so that these helpers can be used consistently instead of open-coded checks. Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240923212242.15669-1-daniel@iogearbox.net Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 9 +++++++++ net/core/gro.c | 9 ++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e87b5e488325..d571451638de 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5029,6 +5029,15 @@ void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs); void netif_inherit_tso_max(struct net_device *to, const struct net_device *from); +static inline unsigned int +netif_get_gro_max_size(const struct net_device *dev, const struct sk_buff *skb) +{ + /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */ + return skb->protocol == htons(ETH_P_IPV6) ? + READ_ONCE(dev->gro_max_size) : + READ_ONCE(dev->gro_ipv4_max_size); +} + static inline bool netif_is_macsec(const struct net_device *dev) { return dev->priv_flags & IFF_MACSEC; diff --git a/net/core/gro.c b/net/core/gro.c index 802b4a062400..d1f44084e978 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -98,7 +98,6 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) unsigned int headlen = skb_headlen(skb); unsigned int len = skb_gro_len(skb); unsigned int delta_truesize; - unsigned int gro_max_size; unsigned int new_truesize; struct sk_buff *lp; int segs; @@ -112,12 +111,8 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) if (p->pp_recycle != skb->pp_recycle) return -ETOOMANYREFS; - /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */ - gro_max_size = p->protocol == htons(ETH_P_IPV6) ? - READ_ONCE(p->dev->gro_max_size) : - READ_ONCE(p->dev->gro_ipv4_max_size); - - if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush)) + if (unlikely(p->len + len >= netif_get_gro_max_size(p->dev, p) || + NAPI_GRO_CB(skb)->flush)) return -E2BIG; if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) { From e609c959a939660c7519895f853dfa5624c6827a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 23 Sep 2024 23:22:42 +0200 Subject: [PATCH 613/655] net: Fix gso_features_check to check for both dev->gso_{ipv4_,}max_size Commit 24ab059d2ebd ("net: check dev->gso_max_size in gso_features_check()") added a dev->gso_max_size test to gso_features_check() in order to fall back to GSO when needed. This was added as it was noticed that some drivers could misbehave if TSO packets get too big. However, the check doesn't respect dev->gso_ipv4_max_size limit. For instance, a device could be configured with BIG TCP for IPv4, but not IPv6. Therefore, add a netif_get_gso_max_size() equivalent to netif_get_gro_max_size() and use the helper to respect both limits before falling back to GSO engine. Fixes: 24ab059d2ebd ("net: check dev->gso_max_size in gso_features_check()") Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240923212242.15669-2-daniel@iogearbox.net Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 9 +++++++++ net/core/dev.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d571451638de..4d20c776a4ff 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5038,6 +5038,15 @@ netif_get_gro_max_size(const struct net_device *dev, const struct sk_buff *skb) READ_ONCE(dev->gro_ipv4_max_size); } +static inline unsigned int +netif_get_gso_max_size(const struct net_device *dev, const struct sk_buff *skb) +{ + /* pairs with WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ + return skb->protocol == htons(ETH_P_IPV6) ? + READ_ONCE(dev->gso_max_size) : + READ_ONCE(dev->gso_ipv4_max_size); +} + static inline bool netif_is_macsec(const struct net_device *dev) { return dev->priv_flags & IFF_MACSEC; diff --git a/net/core/dev.c b/net/core/dev.c index cd479f5f22f6..74cf78a6b512 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3512,7 +3512,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, if (gso_segs > READ_ONCE(dev->gso_max_segs)) return features & ~NETIF_F_GSO_MASK; - if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size))) + if (unlikely(skb->len >= netif_get_gso_max_size(dev, skb))) return features & ~NETIF_F_GSO_MASK; if (!skb_shinfo(skb)->gso_type) { From 45c0de18ff2dc9af01236380404bbd6a46502c69 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Mon, 23 Sep 2024 23:49:49 +0200 Subject: [PATCH 614/655] net: ethernet: lantiq_etop: fix memory disclosure When applying padding, the buffer is not zeroed, which results in memory disclosure. The mentioned data is observed on the wire. This patch uses skb_put_padto() to pad Ethernet frames properly. The mentioned function zeroes the expanded buffer. In case the packet cannot be padded it is silently dropped. Statistics are also not incremented. This driver does not support statistics in the old 32-bit format or the new 64-bit format. These will be added in the future. In its current form, the patch should be easily backported to stable versions. Ethernet MACs on Amazon-SE and Danube cannot do padding of the packets in hardware, so software padding must be applied. Fixes: 504d4721ee8e ("MIPS: Lantiq: Add ethernet driver") Signed-off-by: Aleksander Jan Bajkowski Reviewed-by: Jacob Keller Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20240923214949.231511-2-olek2@wp.pl Signed-off-by: Paolo Abeni --- drivers/net/ethernet/lantiq_etop.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 3c289bfe0a09..7179271f63b6 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -481,7 +481,9 @@ ltq_etop_tx(struct sk_buff *skb, struct net_device *dev) unsigned long flags; u32 byte_offset; - len = skb->len < ETH_ZLEN ? ETH_ZLEN : skb->len; + if (skb_put_padto(skb, ETH_ZLEN)) + return NETDEV_TX_OK; + len = skb->len; if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) || ch->skb[ch->dma.desc]) { netdev_err(dev, "tx ring full\n"); From 93ef6ee5c20e9330477930ec6347672c9e0cf5a6 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 24 Sep 2024 10:28:57 +0800 Subject: [PATCH 615/655] net: pcs: xpcs: fix the wrong register that was written back The value is read from the register TXGBE_RX_GEN_CTL3, and it should be written back to TXGBE_RX_GEN_CTL3 when it changes some fields. Cc: stable@vger.kernel.org Fixes: f629acc6f210 ("net: pcs: xpcs: support to switch mode for Wangxun NICs") Signed-off-by: Jiawen Wu Reported-by: Russell King (Oracle) Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20240924022857.865422-1-jiawenwu@trustnetic.com Signed-off-by: Paolo Abeni --- drivers/net/pcs/pcs-xpcs-wx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/pcs/pcs-xpcs-wx.c b/drivers/net/pcs/pcs-xpcs-wx.c index 19c75886f070..5f5cd3596cb8 100644 --- a/drivers/net/pcs/pcs-xpcs-wx.c +++ b/drivers/net/pcs/pcs-xpcs-wx.c @@ -109,7 +109,7 @@ static void txgbe_pma_config_1g(struct dw_xpcs *xpcs) txgbe_write_pma(xpcs, TXGBE_DFE_TAP_CTL0, 0); val = txgbe_read_pma(xpcs, TXGBE_RX_GEN_CTL3); val = u16_replace_bits(val, 0x4, TXGBE_RX_GEN_CTL3_LOS_TRSHLD0); - txgbe_write_pma(xpcs, TXGBE_RX_EQ_ATTN_CTL, val); + txgbe_write_pma(xpcs, TXGBE_RX_GEN_CTL3, val); txgbe_write_pma(xpcs, TXGBE_MPLLA_CTL0, 0x20); txgbe_write_pma(xpcs, TXGBE_MPLLA_CTL3, 0x46); From a1477dc87dc4996dcf65a4893d4e2c3a6b593002 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cs=C3=B3k=C3=A1s=2C=20Bence?= Date: Tue, 24 Sep 2024 11:37:04 +0200 Subject: [PATCH 616/655] net: fec: Restart PPS after link state change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On link state change, the controller gets reset, causing PPS to drop out. Re-enable PPS if it was enabled before the controller reset. Fixes: 6605b730c061 ("FEC: Add time stamping code and a PTP hardware clock") Signed-off-by: Csókás, Bence Link: https://patch.msgid.link/20240924093705.2897329-1-csokas.bence@prolan.hu Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/fec.h | 6 +++++ drivers/net/ethernet/freescale/fec_main.c | 11 ++++++++- drivers/net/ethernet/freescale/fec_ptp.c | 30 +++++++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index a19cb2a786fd..0552317a2554 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -691,10 +691,16 @@ struct fec_enet_private { /* XDP BPF Program */ struct bpf_prog *xdp_prog; + struct { + int pps_enable; + } ptp_saved_state; + u64 ethtool_stats[]; }; void fec_ptp_init(struct platform_device *pdev, int irq_idx); +void fec_ptp_restore_state(struct fec_enet_private *fep); +void fec_ptp_save_state(struct fec_enet_private *fep); void fec_ptp_stop(struct platform_device *pdev); void fec_ptp_start_cyclecounter(struct net_device *ndev); int fec_ptp_set(struct net_device *ndev, struct kernel_hwtstamp_config *config, diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index acbb627d51bf..31ebf6a4f973 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1077,6 +1077,8 @@ fec_restart(struct net_device *ndev) u32 rcntl = OPT_FRAME_SIZE | 0x04; u32 ecntl = FEC_ECR_ETHEREN; + fec_ptp_save_state(fep); + /* Whack a reset. We should wait for this. * For i.MX6SX SOC, enet use AXI bus, we use disable MAC * instead of reset MAC itself. @@ -1244,8 +1246,10 @@ fec_restart(struct net_device *ndev) writel(ecntl, fep->hwp + FEC_ECNTRL); fec_enet_active_rxring(ndev); - if (fep->bufdesc_ex) + if (fep->bufdesc_ex) { fec_ptp_start_cyclecounter(ndev); + fec_ptp_restore_state(fep); + } /* Enable interrupts we wish to service */ if (fep->link) @@ -1336,6 +1340,8 @@ fec_stop(struct net_device *ndev) netdev_err(ndev, "Graceful transmit stop did not complete!\n"); } + fec_ptp_save_state(fep); + /* Whack a reset. We should wait for this. * For i.MX6SX SOC, enet use AXI bus, we use disable MAC * instead of reset MAC itself. @@ -1366,6 +1372,9 @@ fec_stop(struct net_device *ndev) val = readl(fep->hwp + FEC_ECNTRL); val |= FEC_ECR_EN1588; writel(val, fep->hwp + FEC_ECNTRL); + + fec_ptp_start_cyclecounter(ndev); + fec_ptp_restore_state(fep); } } diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 4cffda363a14..df1ef023493b 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -764,6 +764,36 @@ void fec_ptp_init(struct platform_device *pdev, int irq_idx) schedule_delayed_work(&fep->time_keep, HZ); } +void fec_ptp_save_state(struct fec_enet_private *fep) +{ + unsigned long flags; + + spin_lock_irqsave(&fep->tmreg_lock, flags); + + fep->ptp_saved_state.pps_enable = fep->pps_enable; + + spin_unlock_irqrestore(&fep->tmreg_lock, flags); +} + +/* Restore PTP functionality after a reset */ +void fec_ptp_restore_state(struct fec_enet_private *fep) +{ + unsigned long flags; + + spin_lock_irqsave(&fep->tmreg_lock, flags); + + /* Reset turned it off, so adjust our status flag */ + fep->pps_enable = 0; + + spin_unlock_irqrestore(&fep->tmreg_lock, flags); + + /* Restart PPS if needed */ + if (fep->ptp_saved_state.pps_enable) { + /* Re-enable PPS */ + fec_ptp_enable_pps(fep, 1); + } +} + void fec_ptp_stop(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); From d9335d0232d2da605585eea1518ac6733518f938 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cs=C3=B3k=C3=A1s=2C=20Bence?= Date: Tue, 24 Sep 2024 11:37:06 +0200 Subject: [PATCH 617/655] net: fec: Reload PTP registers after link-state change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On link-state change, the controller gets reset, which clears all PTP registers, including PHC time, calibrated clock correction values etc. For correct IEEE 1588 operation we need to restore these after the reset. Fixes: 6605b730c061 ("FEC: Add time stamping code and a PTP hardware clock") Signed-off-by: Csókás, Bence Reviewed-by: Wei Fang Link: https://patch.msgid.link/20240924093705.2897329-2-csokas.bence@prolan.hu Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/fec.h | 3 +++ drivers/net/ethernet/freescale/fec_ptp.c | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 0552317a2554..1cca0425d493 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -693,6 +693,9 @@ struct fec_enet_private { struct { int pps_enable; + u64 ns_sys, ns_phc; + u32 at_corr; + u8 at_inc_corr; } ptp_saved_state; u64 ethtool_stats[]; diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index df1ef023493b..a4eb6edb850a 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -767,24 +767,44 @@ void fec_ptp_init(struct platform_device *pdev, int irq_idx) void fec_ptp_save_state(struct fec_enet_private *fep) { unsigned long flags; + u32 atime_inc_corr; spin_lock_irqsave(&fep->tmreg_lock, flags); fep->ptp_saved_state.pps_enable = fep->pps_enable; + fep->ptp_saved_state.ns_phc = timecounter_read(&fep->tc); + fep->ptp_saved_state.ns_sys = ktime_get_ns(); + + fep->ptp_saved_state.at_corr = readl(fep->hwp + FEC_ATIME_CORR); + atime_inc_corr = readl(fep->hwp + FEC_ATIME_INC) & FEC_T_INC_CORR_MASK; + fep->ptp_saved_state.at_inc_corr = (u8)(atime_inc_corr >> FEC_T_INC_CORR_OFFSET); + spin_unlock_irqrestore(&fep->tmreg_lock, flags); } /* Restore PTP functionality after a reset */ void fec_ptp_restore_state(struct fec_enet_private *fep) { + u32 atime_inc = readl(fep->hwp + FEC_ATIME_INC) & FEC_T_INC_MASK; unsigned long flags; + u32 counter; + u64 ns; spin_lock_irqsave(&fep->tmreg_lock, flags); /* Reset turned it off, so adjust our status flag */ fep->pps_enable = 0; + writel(fep->ptp_saved_state.at_corr, fep->hwp + FEC_ATIME_CORR); + atime_inc |= ((u32)fep->ptp_saved_state.at_inc_corr) << FEC_T_INC_CORR_OFFSET; + writel(atime_inc, fep->hwp + FEC_ATIME_INC); + + ns = ktime_get_ns() - fep->ptp_saved_state.ns_sys + fep->ptp_saved_state.ns_phc; + counter = ns & fep->cc.mask; + writel(counter, fep->hwp + FEC_ATIME); + timecounter_init(&fep->tc, &fep->cc, ns); + spin_unlock_irqrestore(&fep->tmreg_lock, flags); /* Restart PPS if needed */ From 1910bd470a0acea01b88722be61f0dfa29089730 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Sep 2024 11:59:09 +0200 Subject: [PATCH 618/655] net: microchip: Make FDMA config symbol invisible There is no need to ask the user about enabling Microchip FDMA functionality, as all drivers that use it select the FDMA symbol. Hence make the symbol invisible, unless when compile-testing. Fixes: 30e48a75df9c6ead ("net: microchip: add FDMA library") Signed-off-by: Geert Uytterhoeven Reviewed-by: Daniel Machon Link: https://patch.msgid.link/8e2bcd8899c417a962b7ee3f75b29f35b25d7933.1727171879.git.geert+renesas@glider.be Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/fdma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/fdma/Kconfig b/drivers/net/ethernet/microchip/fdma/Kconfig index 59159ad6701a..ec228c061351 100644 --- a/drivers/net/ethernet/microchip/fdma/Kconfig +++ b/drivers/net/ethernet/microchip/fdma/Kconfig @@ -6,7 +6,7 @@ if NET_VENDOR_MICROCHIP config FDMA - bool "FDMA API" + bool "FDMA API" if COMPILE_TEST help Provides the basic FDMA functionality for multiple Microchip switchcores. From e9d591b16c0ed8489aedc86cac237145815d14dc Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 24 Sep 2024 15:28:48 +0300 Subject: [PATCH 619/655] net: ethernet: ti: cpsw_ale: Fix warning on some platforms The number of register fields cannot be assumed to be ALE_FIELDS_MAX as some platforms can have lesser fields. Solve this by embedding the actual number of fields available in platform data and use that instead of ALE_FIELDS_MAX. Gets rid of the below warning on BeagleBone Black [ 1.007735] WARNING: CPU: 0 PID: 33 at drivers/base/regmap/regmap.c:1208 regmap_field_init+0x88/0x9c [ 1.007802] invalid empty mask defined [ 1.007812] Modules linked in: [ 1.007842] CPU: 0 UID: 0 PID: 33 Comm: kworker/u4:3 Not tainted 6.11.0-01459-g508403ab7b74-dirty #840 [ 1.007867] Hardware name: Generic AM33XX (Flattened Device Tree) [ 1.007890] Workqueue: events_unbound deferred_probe_work_func [ 1.007935] Call trace: [ 1.007957] unwind_backtrace from show_stack+0x10/0x14 [ 1.007999] show_stack from dump_stack_lvl+0x50/0x64 [ 1.008033] dump_stack_lvl from __warn+0x70/0x124 [ 1.008077] __warn from warn_slowpath_fmt+0x194/0x1a8 [ 1.008113] warn_slowpath_fmt from regmap_field_init+0x88/0x9c [ 1.008154] regmap_field_init from devm_regmap_field_alloc+0x48/0x64 [ 1.008193] devm_regmap_field_alloc from cpsw_ale_create+0xfc/0x320 [ 1.008251] cpsw_ale_create from cpsw_init_common+0x214/0x354 [ 1.008286] cpsw_init_common from cpsw_probe+0x4ac/0xb88 Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/netdev/CAMuHMdUf-tKRDzkz2_m8qdFTFutefddU0NTratVrEjRTzA3yQQ@mail.gmail.com/ Fixes: 11cbcfeaa79e ("net: ethernet: ti: cpsw_ale: use regfields for number of Entries and Policers") Signed-off-by: Roger Quadros Tested-by: Geert Uytterhoeven Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240924-am65-cpsw-multi-rx-fix-v1-1-0ca3fa9a1398@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/cpsw_ale.c | 12 +++++++++++- drivers/net/ethernet/ti/cpsw_ale.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 0d5d8917c70b..8d02d2b21429 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -96,6 +96,7 @@ enum { * @features: features supported by ALE * @tbl_entries: number of ALE entries * @reg_fields: pointer to array of register field configuration + * @num_fields: number of fields in the reg_fields array * @nu_switch_ale: NU Switch ALE * @vlan_entry_tbl: ALE vlan entry fields description tbl */ @@ -104,6 +105,7 @@ struct cpsw_ale_dev_id { u32 features; u32 tbl_entries; const struct reg_field *reg_fields; + int num_fields; bool nu_switch_ale; const struct ale_entry_fld *vlan_entry_tbl; }; @@ -1400,6 +1402,7 @@ static const struct cpsw_ale_dev_id cpsw_ale_id_match[] = { .dev_id = "cpsw", .tbl_entries = 1024, .reg_fields = ale_fields_cpsw, + .num_fields = ARRAY_SIZE(ale_fields_cpsw), .vlan_entry_tbl = vlan_entry_cpsw, }, { @@ -1407,12 +1410,14 @@ static const struct cpsw_ale_dev_id cpsw_ale_id_match[] = { .dev_id = "66ak2h-xgbe", .tbl_entries = 2048, .reg_fields = ale_fields_cpsw, + .num_fields = ARRAY_SIZE(ale_fields_cpsw), .vlan_entry_tbl = vlan_entry_cpsw, }, { .dev_id = "66ak2el", .features = CPSW_ALE_F_STATUS_REG, .reg_fields = ale_fields_cpsw_nu, + .num_fields = ARRAY_SIZE(ale_fields_cpsw_nu), .nu_switch_ale = true, .vlan_entry_tbl = vlan_entry_nu, }, @@ -1421,6 +1426,7 @@ static const struct cpsw_ale_dev_id cpsw_ale_id_match[] = { .features = CPSW_ALE_F_STATUS_REG, .tbl_entries = 64, .reg_fields = ale_fields_cpsw_nu, + .num_fields = ARRAY_SIZE(ale_fields_cpsw_nu), .nu_switch_ale = true, .vlan_entry_tbl = vlan_entry_nu, }, @@ -1429,6 +1435,7 @@ static const struct cpsw_ale_dev_id cpsw_ale_id_match[] = { .features = CPSW_ALE_F_STATUS_REG | CPSW_ALE_F_HW_AUTOAGING, .tbl_entries = 64, .reg_fields = ale_fields_cpsw_nu, + .num_fields = ARRAY_SIZE(ale_fields_cpsw_nu), .nu_switch_ale = true, .vlan_entry_tbl = vlan_entry_nu, }, @@ -1436,12 +1443,14 @@ static const struct cpsw_ale_dev_id cpsw_ale_id_match[] = { .dev_id = "j721e-cpswxg", .features = CPSW_ALE_F_STATUS_REG | CPSW_ALE_F_HW_AUTOAGING, .reg_fields = ale_fields_cpsw_nu, + .num_fields = ARRAY_SIZE(ale_fields_cpsw_nu), .vlan_entry_tbl = vlan_entry_k3_cpswxg, }, { .dev_id = "am64-cpswxg", .features = CPSW_ALE_F_STATUS_REG | CPSW_ALE_F_HW_AUTOAGING, .reg_fields = ale_fields_cpsw_nu, + .num_fields = ARRAY_SIZE(ale_fields_cpsw_nu), .vlan_entry_tbl = vlan_entry_k3_cpswxg, .tbl_entries = 512, }, @@ -1477,7 +1486,7 @@ static int cpsw_ale_regfield_init(struct cpsw_ale *ale) struct regmap *regmap = ale->regmap; int i; - for (i = 0; i < ALE_FIELDS_MAX; i++) { + for (i = 0; i < ale->params.num_fields; i++) { ale->fields[i] = devm_regmap_field_alloc(dev, regmap, reg_fields[i]); if (IS_ERR(ale->fields[i])) { @@ -1503,6 +1512,7 @@ struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params) params->ale_entries = ale_dev_id->tbl_entries; params->nu_switch_ale = ale_dev_id->nu_switch_ale; params->reg_fields = ale_dev_id->reg_fields; + params->num_fields = ale_dev_id->num_fields; ale = devm_kzalloc(params->dev, sizeof(*ale), GFP_KERNEL); if (!ale) diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h index 1e4e9a3dd234..87b7d1b3a34a 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.h +++ b/drivers/net/ethernet/ti/cpsw_ale.h @@ -24,6 +24,7 @@ struct cpsw_ale_params { */ bool nu_switch_ale; const struct reg_field *reg_fields; + int num_fields; const char *dev_id; unsigned long bus_freq; }; From c20029db28399ecc50e556964eaba75c43b1e2f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Sep 2024 15:02:56 +0000 Subject: [PATCH 620/655] net: avoid potential underflow in qdisc_pkt_len_init() with UFO After commit 7c6d2ecbda83 ("net: be more gentle about silly gso requests coming from user") virtio_net_hdr_to_skb() had sanity check to detect malicious attempts from user space to cook a bad GSO packet. Then commit cf9acc90c80ec ("net: virtio_net_hdr_to_skb: count transport header in UFO") while fixing one issue, allowed user space to cook a GSO packet with the following characteristic : IPv4 SKB_GSO_UDP, gso_size=3, skb->len = 28. When this packet arrives in qdisc_pkt_len_init(), we end up with hdr_len = 28 (IPv4 header + UDP header), matching skb->len Then the following sets gso_segs to 0 : gso_segs = DIV_ROUND_UP(skb->len - hdr_len, shinfo->gso_size); Then later we set qdisc_skb_cb(skb)->pkt_len to back to zero :/ qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; This leads to the following crash in fq_codel [1] qdisc_pkt_len_init() is best effort, we only want an estimation of the bytes sent on the wire, not crashing the kernel. This patch is fixing this particular issue, a following one adds more sanity checks for another potential bug. [1] [ 70.724101] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 70.724561] #PF: supervisor read access in kernel mode [ 70.724561] #PF: error_code(0x0000) - not-present page [ 70.724561] PGD 10ac61067 P4D 10ac61067 PUD 107ee2067 PMD 0 [ 70.724561] Oops: Oops: 0000 [#1] SMP NOPTI [ 70.724561] CPU: 11 UID: 0 PID: 2163 Comm: b358537762 Not tainted 6.11.0-virtme #991 [ 70.724561] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 70.724561] RIP: 0010:fq_codel_enqueue (net/sched/sch_fq_codel.c:120 net/sched/sch_fq_codel.c:168 net/sched/sch_fq_codel.c:230) sch_fq_codel [ 70.724561] Code: 24 08 49 c1 e1 06 44 89 7c 24 18 45 31 ed 45 31 c0 31 ff 89 44 24 14 4c 03 8b 90 01 00 00 eb 04 39 ca 73 37 4d 8b 39 83 c7 01 <49> 8b 17 49 89 11 41 8b 57 28 45 8b 5f 34 49 c7 07 00 00 00 00 49 All code ======== 0: 24 08 and $0x8,%al 2: 49 c1 e1 06 shl $0x6,%r9 6: 44 89 7c 24 18 mov %r15d,0x18(%rsp) b: 45 31 ed xor %r13d,%r13d e: 45 31 c0 xor %r8d,%r8d 11: 31 ff xor %edi,%edi 13: 89 44 24 14 mov %eax,0x14(%rsp) 17: 4c 03 8b 90 01 00 00 add 0x190(%rbx),%r9 1e: eb 04 jmp 0x24 20: 39 ca cmp %ecx,%edx 22: 73 37 jae 0x5b 24: 4d 8b 39 mov (%r9),%r15 27: 83 c7 01 add $0x1,%edi 2a:* 49 8b 17 mov (%r15),%rdx <-- trapping instruction 2d: 49 89 11 mov %rdx,(%r9) 30: 41 8b 57 28 mov 0x28(%r15),%edx 34: 45 8b 5f 34 mov 0x34(%r15),%r11d 38: 49 c7 07 00 00 00 00 movq $0x0,(%r15) 3f: 49 rex.WB Code starting with the faulting instruction =========================================== 0: 49 8b 17 mov (%r15),%rdx 3: 49 89 11 mov %rdx,(%r9) 6: 41 8b 57 28 mov 0x28(%r15),%edx a: 45 8b 5f 34 mov 0x34(%r15),%r11d e: 49 c7 07 00 00 00 00 movq $0x0,(%r15) 15: 49 rex.WB [ 70.724561] RSP: 0018:ffff95ae85e6fb90 EFLAGS: 00000202 [ 70.724561] RAX: 0000000002000000 RBX: ffff95ae841de000 RCX: 0000000000000000 [ 70.724561] RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000001 [ 70.724561] RBP: ffff95ae85e6fbf8 R08: 0000000000000000 R09: ffff95b710a30000 [ 70.724561] R10: 0000000000000000 R11: bdf289445ce31881 R12: ffff95ae85e6fc58 [ 70.724561] R13: 0000000000000000 R14: 0000000000000040 R15: 0000000000000000 [ 70.724561] FS: 000000002c5c1380(0000) GS:ffff95bd7fcc0000(0000) knlGS:0000000000000000 [ 70.724561] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 70.724561] CR2: 0000000000000000 CR3: 000000010c568000 CR4: 00000000000006f0 [ 70.724561] Call Trace: [ 70.724561] [ 70.724561] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434) [ 70.724561] ? page_fault_oops (arch/x86/mm/fault.c:715) [ 70.724561] ? exc_page_fault (./arch/x86/include/asm/irqflags.h:26 ./arch/x86/include/asm/irqflags.h:87 ./arch/x86/include/asm/irqflags.h:147 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539) [ 70.724561] ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623) [ 70.724561] ? fq_codel_enqueue (net/sched/sch_fq_codel.c:120 net/sched/sch_fq_codel.c:168 net/sched/sch_fq_codel.c:230) sch_fq_codel [ 70.724561] dev_qdisc_enqueue (net/core/dev.c:3784) [ 70.724561] __dev_queue_xmit (net/core/dev.c:3880 (discriminator 2) net/core/dev.c:4390 (discriminator 2)) [ 70.724561] ? irqentry_enter (kernel/entry/common.c:237) [ 70.724561] ? sysvec_apic_timer_interrupt (./arch/x86/include/asm/hardirq.h:74 (discriminator 2) arch/x86/kernel/apic/apic.c:1043 (discriminator 2) arch/x86/kernel/apic/apic.c:1043 (discriminator 2)) [ 70.724561] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:58 (discriminator 4)) [ 70.724561] ? asm_sysvec_apic_timer_interrupt (./arch/x86/include/asm/idtentry.h:702) [ 70.724561] ? virtio_net_hdr_to_skb.constprop.0 (./include/linux/virtio_net.h:129 (discriminator 1)) [ 70.724561] packet_sendmsg (net/packet/af_packet.c:3145 (discriminator 1) net/packet/af_packet.c:3177 (discriminator 1)) [ 70.724561] ? _raw_spin_lock_bh (./arch/x86/include/asm/atomic.h:107 (discriminator 4) ./include/linux/atomic/atomic-arch-fallback.h:2170 (discriminator 4) ./include/linux/atomic/atomic-instrumented.h:1302 (discriminator 4) ./include/asm-generic/qspinlock.h:111 (discriminator 4) ./include/linux/spinlock.h:187 (discriminator 4) ./include/linux/spinlock_api_smp.h:127 (discriminator 4) kernel/locking/spinlock.c:178 (discriminator 4)) [ 70.724561] ? netdev_name_node_lookup_rcu (net/core/dev.c:325 (discriminator 1)) [ 70.724561] __sys_sendto (net/socket.c:730 (discriminator 1) net/socket.c:745 (discriminator 1) net/socket.c:2210 (discriminator 1)) [ 70.724561] ? __sys_setsockopt (./include/linux/file.h:34 net/socket.c:2355) [ 70.724561] __x64_sys_sendto (net/socket.c:2222 (discriminator 1) net/socket.c:2218 (discriminator 1) net/socket.c:2218 (discriminator 1)) [ 70.724561] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) [ 70.724561] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) [ 70.724561] RIP: 0033:0x41ae09 Fixes: cf9acc90c80ec ("net: virtio_net_hdr_to_skb: count transport header in UFO") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Jonathan Davies Reviewed-by: Willem de Bruijn Reviewed-by: Jonathan Davies Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 74cf78a6b512..7d3d34a3bdf5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3758,7 +3758,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) sizeof(_tcphdr), &_tcphdr); if (likely(th)) hdr_len += __tcp_hdrlen(th); - } else { + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { struct udphdr _udphdr; if (skb_header_pointer(skb, hdr_len, From ab9a9a9e9647392a19e7a885b08000e89c86b535 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Sep 2024 15:02:57 +0000 Subject: [PATCH 621/655] net: add more sanity checks to qdisc_pkt_len_init() One path takes care of SKB_GSO_DODGY, assuming skb->len is bigger than hdr_len. virtio_net_hdr_to_skb() does not fully dissect TCP headers, it only make sure it is at least 20 bytes. It is possible for an user to provide a malicious 'GSO' packet, total length of 80 bytes. - 20 bytes of IPv4 header - 60 bytes TCP header - a small gso_size like 8 virtio_net_hdr_to_skb() would declare this packet as a normal GSO packet, because it would see 40 bytes of payload, bigger than gso_size. We need to make detect this case to not underflow qdisc_skb_cb(skb)->pkt_len. Fixes: 1def9238d4aa ("net_sched: more precise pkt_len computation") Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- net/core/dev.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 7d3d34a3bdf5..ea5fbcd133ae 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3766,10 +3766,14 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) hdr_len += sizeof(struct udphdr); } - if (shinfo->gso_type & SKB_GSO_DODGY) - gso_segs = DIV_ROUND_UP(skb->len - hdr_len, - shinfo->gso_size); + if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) { + int payload = skb->len - hdr_len; + /* Malicious packet. */ + if (payload <= 0) + return; + gso_segs = DIV_ROUND_UP(payload, shinfo->gso_size); + } qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; } } From 0d24852bd71ec85ca0016b6d6fc997e6a3381552 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 30 Sep 2024 11:55:00 -0700 Subject: [PATCH 622/655] iov_iter: fix advancing slot in iter_folioq_get_pages() iter_folioq_get_pages() decides to advance to the next folioq slot when it has reached the end of the current folio. However, it is checking offset, which is the beginning of the current part, instead of iov_offset, which is adjusted to the end of the current part, so it doesn't advance the slot when it's supposed to. As a result, on the next iteration, we'll use the same folio with an out-of-bounds offset and return an unrelated page. This manifested as various crashes and other failures in 9pfs in drgn's VM testing setup and BPF CI. Fixes: db0aa2e9566f ("mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios") Link: https://lore.kernel.org/linux-fsdevel/20240923183432.1876750-1-chantr4@gmail.com/ Tested-by: Manu Bretelle Signed-off-by: Omar Sandoval Link: https://lore.kernel.org/r/cbaf141ba6c0e2e209717d02746584072844841a.1727722269.git.osandov@fb.com Tested-by: Eduard Zingerman Tested-by: Leon Romanovsky Tested-by: Joey Gouly Acked-by: David Howells Signed-off-by: Christian Brauner --- lib/iov_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 97003155bfac..1abb32c0da50 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1033,7 +1033,7 @@ static ssize_t iter_folioq_get_pages(struct iov_iter *iter, if (maxpages == 0 || extracted >= maxsize) break; - if (offset >= fsize) { + if (iov_offset >= fsize) { iov_offset = 0; slot++; if (slot == folioq_nr_slots(folioq) && folioq->next) { From 4c1b56671b68ffcbe6b78308bfdda6bcce6491ae Mon Sep 17 00:00:00 2001 From: Shenwei Wang Date: Tue, 24 Sep 2024 15:54:24 -0500 Subject: [PATCH 623/655] net: stmmac: dwmac4: extend timeout for VLAN Tag register busy bit check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increase the timeout for checking the busy bit of the VLAN Tag register from 10µs to 500ms. This change is necessary to accommodate scenarios where Energy Efficient Ethernet (EEE) is enabled. Overnight testing revealed that when EEE is active, the busy bit can remain set for up to approximately 300ms. The new 500ms timeout provides a safety margin. Fixes: ed64639bc1e0 ("net: stmmac: Add support for VLAN Rx filtering") Reviewed-by: Andrew Lunn Signed-off-by: Shenwei Wang Link: https://patch.msgid.link/20240924205424.573913-1-shenwei.wang@nxp.com Signed-off-by: Paolo Abeni --- .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index a1858f083eef..e65a65666cc1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "stmmac.h" #include "stmmac_pcs.h" #include "dwmac4.h" @@ -471,7 +472,7 @@ static int dwmac4_write_vlan_filter(struct net_device *dev, u8 index, u32 data) { void __iomem *ioaddr = (void __iomem *)dev->base_addr; - int i, timeout = 10; + int ret; u32 val; if (index >= hw->num_vlan) @@ -487,16 +488,15 @@ static int dwmac4_write_vlan_filter(struct net_device *dev, writel(val, ioaddr + GMAC_VLAN_TAG); - for (i = 0; i < timeout; i++) { - val = readl(ioaddr + GMAC_VLAN_TAG); - if (!(val & GMAC_VLAN_TAG_CTRL_OB)) - return 0; - udelay(1); + ret = readl_poll_timeout(ioaddr + GMAC_VLAN_TAG, val, + !(val & GMAC_VLAN_TAG_CTRL_OB), + 1000, 500000); + if (ret) { + netdev_err(dev, "Timeout accessing MAC_VLAN_Tag_Filter\n"); + return -EBUSY; } - netdev_err(dev, "Timeout accessing MAC_VLAN_Tag_Filter\n"); - - return -EBUSY; + return 0; } static int dwmac4_add_hw_vlan_rx_fltr(struct net_device *dev, From a3f9a74d210bf5b80046a840d3e9949b5fe0a67c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 1 Oct 2024 03:54:05 -0700 Subject: [PATCH 624/655] Revert "Input: Add driver for PixArt PS/2 touchpad" This reverts commit 740ff03d7238214a318cdcfd96dec51832b053d2 because current PixArt detection is too greedy and claims devices that are not PixArt. Reported-by: Benjamin Tissoires Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2314756 Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/Kconfig | 12 -- drivers/input/mouse/Makefile | 1 - drivers/input/mouse/pixart_ps2.c | 300 ----------------------------- drivers/input/mouse/pixart_ps2.h | 36 ---- drivers/input/mouse/psmouse-base.c | 17 -- drivers/input/mouse/psmouse.h | 3 +- 6 files changed, 1 insertion(+), 368 deletions(-) delete mode 100644 drivers/input/mouse/pixart_ps2.c delete mode 100644 drivers/input/mouse/pixart_ps2.h diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig index 8a27a20d04b0..833b643f0616 100644 --- a/drivers/input/mouse/Kconfig +++ b/drivers/input/mouse/Kconfig @@ -69,18 +69,6 @@ config MOUSE_PS2_LOGIPS2PP If unsure, say Y. -config MOUSE_PS2_PIXART - bool "PixArt PS/2 touchpad protocol extension" if EXPERT - default y - depends on MOUSE_PS2 - help - This driver supports the PixArt PS/2 touchpad found in some - laptops. - Say Y here if you have a PixArt PS/2 TouchPad connected to - your system. - - If unsure, say Y. - config MOUSE_PS2_SYNAPTICS bool "Synaptics PS/2 mouse protocol extension" if EXPERT default y diff --git a/drivers/input/mouse/Makefile b/drivers/input/mouse/Makefile index 563029551529..a1336d5bee6f 100644 --- a/drivers/input/mouse/Makefile +++ b/drivers/input/mouse/Makefile @@ -32,7 +32,6 @@ psmouse-$(CONFIG_MOUSE_PS2_ELANTECH) += elantech.o psmouse-$(CONFIG_MOUSE_PS2_OLPC) += hgpk.o psmouse-$(CONFIG_MOUSE_PS2_LOGIPS2PP) += logips2pp.o psmouse-$(CONFIG_MOUSE_PS2_LIFEBOOK) += lifebook.o -psmouse-$(CONFIG_MOUSE_PS2_PIXART) += pixart_ps2.o psmouse-$(CONFIG_MOUSE_PS2_SENTELIC) += sentelic.o psmouse-$(CONFIG_MOUSE_PS2_TRACKPOINT) += trackpoint.o psmouse-$(CONFIG_MOUSE_PS2_TOUCHKIT) += touchkit_ps2.o diff --git a/drivers/input/mouse/pixart_ps2.c b/drivers/input/mouse/pixart_ps2.c deleted file mode 100644 index 1993fc760d7b..000000000000 --- a/drivers/input/mouse/pixart_ps2.c +++ /dev/null @@ -1,300 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Pixart Touchpad Controller 1336U PS2 driver - * - * Author: Jon Xie - * Jay Lee - * Further cleanup and restructuring by: - * Binbin Zhou - * - * Copyright (C) 2021-2024 Pixart Imaging. - * Copyright (C) 2024 Loongson Technology Corporation Limited. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "pixart_ps2.h" - -static int pixart_read_tp_mode(struct ps2dev *ps2dev, u8 *mode) -{ - int error; - u8 param[1] = { 0 }; - - error = ps2_command(ps2dev, param, PIXART_CMD_REPORT_FORMAT); - if (error) - return error; - - *mode = param[0] == 1 ? PIXART_MODE_ABS : PIXART_MODE_REL; - - return 0; -} - -static int pixart_read_tp_type(struct ps2dev *ps2dev, u8 *type) -{ - int error; - u8 param[3] = { 0 }; - - param[0] = 0x0a; - error = ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); - if (error) - return error; - - param[0] = 0x0; - error = ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES); - if (error) - return error; - - error = ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES); - if (error) - return error; - - error = ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES); - if (error) - return error; - - param[0] = 0x03; - error = ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES); - if (error) - return error; - - error = ps2_command(ps2dev, param, PSMOUSE_CMD_GETINFO); - if (error) - return error; - - *type = param[0] == 0x0e ? PIXART_TYPE_TOUCHPAD : PIXART_TYPE_CLICKPAD; - - return 0; -} - -static void pixart_reset(struct psmouse *psmouse) -{ - ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_RESET_DIS); - - /* according to PixArt, 100ms is required for the upcoming reset */ - msleep(100); - psmouse_reset(psmouse); -} - -static void pixart_process_packet(struct psmouse *psmouse) -{ - struct pixart_data *priv = psmouse->private; - struct input_dev *dev = psmouse->dev; - const u8 *pkt = psmouse->packet; - unsigned int contact_cnt = FIELD_GET(CONTACT_CNT_MASK, pkt[0]); - unsigned int i, id, abs_x, abs_y; - bool tip; - - for (i = 0; i < contact_cnt; i++) { - const u8 *p = &pkt[i * 3]; - - id = FIELD_GET(SLOT_ID_MASK, p[3]); - abs_y = FIELD_GET(ABS_Y_MASK, p[3]) << 8 | p[1]; - abs_x = FIELD_GET(ABS_X_MASK, p[3]) << 8 | p[2]; - - if (i == PIXART_MAX_FINGERS - 1) - tip = pkt[14] & BIT(1); - else - tip = pkt[3 * contact_cnt + 1] & BIT(2 * i + 1); - - input_mt_slot(dev, id); - if (input_mt_report_slot_state(dev, MT_TOOL_FINGER, tip)) { - input_report_abs(dev, ABS_MT_POSITION_Y, abs_y); - input_report_abs(dev, ABS_MT_POSITION_X, abs_x); - } - } - - input_mt_sync_frame(dev); - - if (priv->type == PIXART_TYPE_CLICKPAD) { - input_report_key(dev, BTN_LEFT, pkt[0] & 0x03); - } else { - input_report_key(dev, BTN_LEFT, pkt[0] & BIT(0)); - input_report_key(dev, BTN_RIGHT, pkt[0] & BIT(1)); - } - - input_sync(dev); -} - -static psmouse_ret_t pixart_protocol_handler(struct psmouse *psmouse) -{ - u8 *pkt = psmouse->packet; - u8 contact_cnt; - - if ((pkt[0] & 0x8c) != 0x80) - return PSMOUSE_BAD_DATA; - - contact_cnt = FIELD_GET(CONTACT_CNT_MASK, pkt[0]); - if (contact_cnt > PIXART_MAX_FINGERS) - return PSMOUSE_BAD_DATA; - - if (contact_cnt == PIXART_MAX_FINGERS && - psmouse->pktcnt < psmouse->pktsize) { - return PSMOUSE_GOOD_DATA; - } - - if (contact_cnt == 0 && psmouse->pktcnt < 5) - return PSMOUSE_GOOD_DATA; - - if (psmouse->pktcnt < 3 * contact_cnt + 2) - return PSMOUSE_GOOD_DATA; - - pixart_process_packet(psmouse); - - return PSMOUSE_FULL_PACKET; -} - -static void pixart_disconnect(struct psmouse *psmouse) -{ - pixart_reset(psmouse); - kfree(psmouse->private); - psmouse->private = NULL; -} - -static int pixart_reconnect(struct psmouse *psmouse) -{ - struct ps2dev *ps2dev = &psmouse->ps2dev; - u8 mode; - int error; - - pixart_reset(psmouse); - - error = pixart_read_tp_mode(ps2dev, &mode); - if (error) - return error; - - if (mode != PIXART_MODE_ABS) - return -EIO; - - error = ps2_command(ps2dev, NULL, PIXART_CMD_SWITCH_PROTO); - if (error) - return error; - - return 0; -} - -static int pixart_set_input_params(struct input_dev *dev, - struct pixart_data *priv) -{ - /* No relative support */ - __clear_bit(EV_REL, dev->evbit); - __clear_bit(REL_X, dev->relbit); - __clear_bit(REL_Y, dev->relbit); - __clear_bit(BTN_MIDDLE, dev->keybit); - - /* Buttons */ - __set_bit(EV_KEY, dev->evbit); - __set_bit(BTN_LEFT, dev->keybit); - if (priv->type == PIXART_TYPE_CLICKPAD) - __set_bit(INPUT_PROP_BUTTONPAD, dev->propbit); - else - __set_bit(BTN_RIGHT, dev->keybit); - - /* Absolute position */ - input_set_abs_params(dev, ABS_X, 0, PIXART_PAD_WIDTH, 0, 0); - input_set_abs_params(dev, ABS_Y, 0, PIXART_PAD_HEIGHT, 0, 0); - - input_set_abs_params(dev, ABS_MT_POSITION_X, - 0, PIXART_PAD_WIDTH, 0, 0); - input_set_abs_params(dev, ABS_MT_POSITION_Y, - 0, PIXART_PAD_HEIGHT, 0, 0); - - return input_mt_init_slots(dev, PIXART_MAX_FINGERS, INPUT_MT_POINTER); -} - -static int pixart_query_hardware(struct ps2dev *ps2dev, u8 *mode, u8 *type) -{ - int error; - - error = pixart_read_tp_type(ps2dev, type); - if (error) - return error; - - error = pixart_read_tp_mode(ps2dev, mode); - if (error) - return error; - - return 0; -} - -int pixart_detect(struct psmouse *psmouse, bool set_properties) -{ - u8 type; - int error; - - pixart_reset(psmouse); - - error = pixart_read_tp_type(&psmouse->ps2dev, &type); - if (error) - return error; - - if (set_properties) { - psmouse->vendor = "PixArt"; - psmouse->name = (type == PIXART_TYPE_TOUCHPAD) ? - "touchpad" : "clickpad"; - } - - return 0; -} - -int pixart_init(struct psmouse *psmouse) -{ - int error; - struct pixart_data *priv; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - psmouse->private = priv; - pixart_reset(psmouse); - - error = pixart_query_hardware(&psmouse->ps2dev, - &priv->mode, &priv->type); - if (error) { - psmouse_err(psmouse, "init: Unable to query PixArt touchpad hardware.\n"); - goto err_exit; - } - - /* Relative mode follows standard PS/2 mouse protocol */ - if (priv->mode != PIXART_MODE_ABS) { - error = -EIO; - goto err_exit; - } - - /* Set absolute mode */ - error = ps2_command(&psmouse->ps2dev, NULL, PIXART_CMD_SWITCH_PROTO); - if (error) { - psmouse_err(psmouse, "init: Unable to initialize PixArt absolute mode.\n"); - goto err_exit; - } - - error = pixart_set_input_params(psmouse->dev, priv); - if (error) { - psmouse_err(psmouse, "init: Unable to set input params.\n"); - goto err_exit; - } - - psmouse->pktsize = 15; - psmouse->protocol_handler = pixart_protocol_handler; - psmouse->disconnect = pixart_disconnect; - psmouse->reconnect = pixart_reconnect; - psmouse->cleanup = pixart_reset; - /* resync is not supported yet */ - psmouse->resync_time = 0; - - return 0; - -err_exit: - pixart_reset(psmouse); - kfree(priv); - psmouse->private = NULL; - return error; -} diff --git a/drivers/input/mouse/pixart_ps2.h b/drivers/input/mouse/pixart_ps2.h deleted file mode 100644 index 47a1d040f2d1..000000000000 --- a/drivers/input/mouse/pixart_ps2.h +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _PIXART_PS2_H -#define _PIXART_PS2_H - -#include "psmouse.h" - -#define PIXART_PAD_WIDTH 1023 -#define PIXART_PAD_HEIGHT 579 -#define PIXART_MAX_FINGERS 4 - -#define PIXART_CMD_REPORT_FORMAT 0x01d8 -#define PIXART_CMD_SWITCH_PROTO 0x00de - -#define PIXART_MODE_REL 0 -#define PIXART_MODE_ABS 1 - -#define PIXART_TYPE_CLICKPAD 0 -#define PIXART_TYPE_TOUCHPAD 1 - -#define CONTACT_CNT_MASK GENMASK(6, 4) - -#define SLOT_ID_MASK GENMASK(2, 0) -#define ABS_Y_MASK GENMASK(5, 4) -#define ABS_X_MASK GENMASK(7, 6) - -struct pixart_data { - u8 mode; - u8 type; - int x_max; - int y_max; -}; - -int pixart_detect(struct psmouse *psmouse, bool set_properties); -int pixart_init(struct psmouse *psmouse); - -#endif /* _PIXART_PS2_H */ diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index 5a4defe9cf32..a2c9f7144864 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -36,7 +36,6 @@ #include "focaltech.h" #include "vmmouse.h" #include "byd.h" -#include "pixart_ps2.h" #define DRIVER_DESC "PS/2 mouse driver" @@ -906,15 +905,6 @@ static const struct psmouse_protocol psmouse_protocols[] = { .detect = byd_detect, .init = byd_init, }, -#endif -#ifdef CONFIG_MOUSE_PS2_PIXART - { - .type = PSMOUSE_PIXART, - .name = "PixArtPS/2", - .alias = "pixart", - .detect = pixart_detect, - .init = pixart_init, - }, #endif { .type = PSMOUSE_AUTO, @@ -1182,13 +1172,6 @@ static int psmouse_extensions(struct psmouse *psmouse, return ret; } - /* Try PixArt touchpad */ - if (max_proto > PSMOUSE_IMEX && - psmouse_try_protocol(psmouse, PSMOUSE_PIXART, &max_proto, - set_properties, true)) { - return PSMOUSE_PIXART; - } - if (max_proto > PSMOUSE_IMEX) { if (psmouse_try_protocol(psmouse, PSMOUSE_GENPS, &max_proto, set_properties, true)) diff --git a/drivers/input/mouse/psmouse.h b/drivers/input/mouse/psmouse.h index 23f7fa7243cb..4d8acfe0d82a 100644 --- a/drivers/input/mouse/psmouse.h +++ b/drivers/input/mouse/psmouse.h @@ -69,7 +69,6 @@ enum psmouse_type { PSMOUSE_BYD, PSMOUSE_SYNAPTICS_SMBUS, PSMOUSE_ELANTECH_SMBUS, - PSMOUSE_PIXART, PSMOUSE_AUTO /* This one should always be last */ }; @@ -95,7 +94,7 @@ struct psmouse { const char *vendor; const char *name; const struct psmouse_protocol *protocol; - unsigned char packet[16]; + unsigned char packet[8]; unsigned char badbyte; unsigned char pktcnt; unsigned char pktsize; From c4a14f6d9d17ad1e41a36182dd3b8a5fd91efbd7 Mon Sep 17 00:00:00 2001 From: Anton Danilov Date: Wed, 25 Sep 2024 02:51:59 +0300 Subject: [PATCH 625/655] ipv4: ip_gre: Fix drops of small packets in ipgre_xmit Regression Description: Depending on the options specified for the GRE tunnel device, small packets may be dropped. This occurs because the pskb_network_may_pull function fails due to the packet's insufficient length. For example, if only the okey option is specified for the tunnel device, original (before encapsulation) packets smaller than 28 bytes (including the IPv4 header) will be dropped. This happens because the required length is calculated relative to the network header, not the skb->head. Here is how the required length is computed and checked: * The pull_len variable is set to 28 bytes, consisting of: * IPv4 header: 20 bytes * GRE header with Key field: 8 bytes * The pskb_network_may_pull function adds the network offset, shifting the checkable space further to the beginning of the network header and extending it to the beginning of the packet. As a result, the end of the checkable space occurs beyond the actual end of the packet. Instead of ensuring that 28 bytes are present in skb->head, the function is requesting these 28 bytes starting from the network header. For small packets, this requested length exceeds the actual packet size, causing the check to fail and the packets to be dropped. This issue affects both locally originated and forwarded packets in DMVPN-like setups. How to reproduce (for local originated packets): ip link add dev gre1 type gre ikey 1.9.8.4 okey 1.9.8.4 \ local remote 0.0.0.0 ip link set mtu 1400 dev gre1 ip link set up dev gre1 ip address add 192.168.13.1/24 dev gre1 ip neighbor add 192.168.13.2 lladdr dev gre1 ping -s 1374 -c 10 192.168.13.2 tcpdump -vni gre1 tcpdump -vni 'ip proto 47' ip -s -s -d link show dev gre1 Solution: Use the pskb_may_pull function instead the pskb_network_may_pull. Fixes: 80d875cfc9d3 ("ipv4: ip_gre: Avoid skb_pull() failure in ipgre_xmit()") Signed-off-by: Anton Danilov Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240924235158.106062-1-littlesmilingcloud@gmail.com Signed-off-by: Paolo Abeni --- net/ipv4/ip_gre.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5f6fd382af38..f1f31ebfc793 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -662,11 +662,11 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, if (skb_cow_head(skb, 0)) goto free_skb; - tnl_params = (const struct iphdr *)skb->data; - - if (!pskb_network_may_pull(skb, pull_len)) + if (!pskb_may_pull(skb, pull_len)) goto free_skb; + tnl_params = (const struct iphdr *)skb->data; + /* ip_tunnel_xmit() needs skb->data pointing to gre header. */ skb_pull(skb, pull_len); skb_reset_mac_header(skb); From f6023535b52f5a066fa52fcfd0dc51c7f7894ce6 Mon Sep 17 00:00:00 2001 From: Chang Yu Date: Mon, 30 Sep 2024 23:31:52 -0700 Subject: [PATCH 626/655] netfs: Fix a KMSAN uninit-value error in netfs_clear_buffer Use folioq_count instead of folioq_nr_slots to fix a KMSAN uninit-value error in netfs_clear_buffer Signed-off-by: Chang Yu Link: https://lore.kernel.org/r/ZvuXWC2bYpvQsWgS@gmail.com Fixes: cd0277ed0c18 ("netfs: Use new folio_queue data type and iterator instead of xarray iter") Acked-by: David Howells Reported-by: syzbot+921873345a95f4dae7e9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=921873345a95f4dae7e9 Signed-off-by: Christian Brauner --- fs/netfs/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 63280791de3b..78fe5796b2b2 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -102,7 +102,7 @@ void netfs_clear_buffer(struct netfs_io_request *rreq) while ((p = rreq->buffer)) { rreq->buffer = p->next; - for (int slot = 0; slot < folioq_nr_slots(p); slot++) { + for (int slot = 0; slot < folioq_count(p); slot++) { struct folio *folio = folioq_folio(p, slot); if (!folio) continue; From fb5cc65f973661241e4a2b7390b429aa7b330c69 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Tue, 1 Oct 2024 07:46:44 -0700 Subject: [PATCH 627/655] Input: adp5589-keys - fix NULL pointer dereference We register a devm action to call adp5589_clear_config() and then pass the i2c client as argument so that we can call i2c_get_clientdata() in order to get our device object. However, i2c_set_clientdata() is only being set at the end of the probe function which means that we'll get a NULL pointer dereference in case the probe function fails early. Fixes: 30df385e35a4 ("Input: adp5589-keys - use devm_add_action_or_reset() for register clear") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20241001-b4-dev-adp5589-fw-conversion-v1-1-fca0149dfc47@analog.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/adp5589-keys.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c index 8996e00cd63a..68a29d67be57 100644 --- a/drivers/input/keyboard/adp5589-keys.c +++ b/drivers/input/keyboard/adp5589-keys.c @@ -936,10 +936,9 @@ static int adp5589_keypad_add(struct adp5589_kpad *kpad, unsigned int revid) static void adp5589_clear_config(void *data) { - struct i2c_client *client = data; - struct adp5589_kpad *kpad = i2c_get_clientdata(client); + struct adp5589_kpad *kpad = data; - adp5589_write(client, kpad->var->reg(ADP5589_GENERAL_CFG), 0); + adp5589_write(kpad->client, kpad->var->reg(ADP5589_GENERAL_CFG), 0); } static int adp5589_probe(struct i2c_client *client) @@ -983,7 +982,7 @@ static int adp5589_probe(struct i2c_client *client) } error = devm_add_action_or_reset(&client->dev, adp5589_clear_config, - client); + kpad); if (error) return error; @@ -1010,8 +1009,6 @@ static int adp5589_probe(struct i2c_client *client) if (error) return error; - i2c_set_clientdata(client, kpad); - dev_info(&client->dev, "Rev.%d keypad, irq %d\n", revid, client->irq); return 0; } From c684771630e64bc39bddffeb65dd8a6612a6b249 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Tue, 1 Oct 2024 07:47:23 -0700 Subject: [PATCH 628/655] Input: adp5589-keys - fix adp5589_gpio_get_value() The adp5589 seems to have the same behavior as similar devices as explained in commit 910a9f5636f5 ("Input: adp5588-keys - get value from data out when dir is out"). Basically, when the gpio is set as output we need to get the value from ADP5589_GPO_DATA_OUT_A register instead of ADP5589_GPI_STATUS_A. Fixes: 9d2e173644bb ("Input: ADP5589 - new driver for I2C Keypad Decoder and I/O Expander") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20241001-b4-dev-adp5589-fw-conversion-v1-2-fca0149dfc47@analog.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/adp5589-keys.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c index 68a29d67be57..922d3ab998f3 100644 --- a/drivers/input/keyboard/adp5589-keys.c +++ b/drivers/input/keyboard/adp5589-keys.c @@ -391,10 +391,17 @@ static int adp5589_gpio_get_value(struct gpio_chip *chip, unsigned off) struct adp5589_kpad *kpad = gpiochip_get_data(chip); unsigned int bank = kpad->var->bank(kpad->gpiomap[off]); unsigned int bit = kpad->var->bit(kpad->gpiomap[off]); + int val; - return !!(adp5589_read(kpad->client, - kpad->var->reg(ADP5589_GPI_STATUS_A) + bank) & - bit); + mutex_lock(&kpad->gpio_lock); + if (kpad->dir[bank] & bit) + val = kpad->dat_out[bank]; + else + val = adp5589_read(kpad->client, + kpad->var->reg(ADP5589_GPI_STATUS_A) + bank); + mutex_unlock(&kpad->gpio_lock); + + return !!(val & bit); } static void adp5589_gpio_set_value(struct gpio_chip *chip, From f5c82730bedbc4a424cb94d2653bcb8be9dbd2ec Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 1 Oct 2024 17:01:40 +0200 Subject: [PATCH 629/655] folio_queue: fix documentation s/folioq_count/folioq_full/ Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20241001134729.3f65ae78@canb.auug.org.au Signed-off-by: Christian Brauner --- include/linux/folio_queue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index af871405ae55..3abe614ef5f0 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -81,7 +81,7 @@ static inline unsigned int folioq_count(struct folio_queue *folioq) } /** - * folioq_count: Query if a folio queue segment is full + * folioq_full: Query if a folio queue segment is full * @folioq: The segment to query * * Query if a folio queue segment is fully occupied. Note that this does not From 59d39b9259e4d15b6e4c6da758ab318a76a10ca4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 1 Oct 2024 17:04:32 +0200 Subject: [PATCH 630/655] Documentation: add missing folio_queue entry Add missing folio_queue entry. Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20241001133920.6e28637b@canb.auug.org.au Signed-off-by: Christian Brauner --- Documentation/core-api/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index a331d2c814f5..6a875743dd4b 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -37,6 +37,7 @@ Library functionality that is used throughout the kernel. kref cleanup assoc_array + folio_queue xarray maple_tree idr From 04afb0a3c30aeb5fbe890a92debbfc0cc4044b6f Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 27 Sep 2024 16:36:42 +0200 Subject: [PATCH 631/655] ksmbd: Use struct_size() to improve get_file_alternate_info() Use struct_size() to calculate the output buffer length. Signed-off-by: Thorsten Blum Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 7460089c186f..f28346fad894 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4883,7 +4883,7 @@ static void get_file_alternate_info(struct ksmbd_work *work, spin_unlock(&dentry->d_lock); file_info->FileNameLength = cpu_to_le32(conv_len); rsp->OutputBufferLength = - cpu_to_le32(sizeof(struct smb2_file_alt_name_info) + conv_len); + cpu_to_le32(struct_size(file_info, FileName, conv_len)); } static int get_file_stream_info(struct ksmbd_work *work, From 0801c1374ab1552bd7376370987142ff77912527 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 25 Sep 2024 11:03:13 +0200 Subject: [PATCH 632/655] ksmbd: Annotate struct copychunk_ioctl_req with __counted_by_le() Add the __counted_by_le compiler attribute to the flexible array member Chunks to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. Change the data type of the flexible array member Chunks from __u8[] to struct srv_copychunk[] for ChunkCount to match the number of elements in the Chunks array. (With __u8[], each srv_copychunk would occupy 24 array entries and the __counted_by compiler attribute wouldn't be applicable.) Use struct_size() to calculate the size of the copychunk_ioctl_req. Read Chunks[0] after checking that ChunkCount is not 0. Signed-off-by: Thorsten Blum Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 5 ++--- fs/smb/server/smb2pdu.h | 14 +++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index f28346fad894..797b0f24097b 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -7562,7 +7562,6 @@ static int fsctl_copychunk(struct ksmbd_work *work, ci_rsp->TotalBytesWritten = cpu_to_le32(ksmbd_server_side_copy_max_total_size()); - chunks = (struct srv_copychunk *)&ci_req->Chunks[0]; chunk_count = le32_to_cpu(ci_req->ChunkCount); if (chunk_count == 0) goto out; @@ -7570,12 +7569,12 @@ static int fsctl_copychunk(struct ksmbd_work *work, /* verify the SRV_COPYCHUNK_COPY packet */ if (chunk_count > ksmbd_server_side_copy_max_chunk_count() || - input_count < offsetof(struct copychunk_ioctl_req, Chunks) + - chunk_count * sizeof(struct srv_copychunk)) { + input_count < struct_size(ci_req, Chunks, chunk_count)) { rsp->hdr.Status = STATUS_INVALID_PARAMETER; return -EINVAL; } + chunks = &ci_req->Chunks[0]; for (i = 0; i < chunk_count; i++) { if (le32_to_cpu(chunks[i].Length) == 0 || le32_to_cpu(chunks[i].Length) > ksmbd_server_side_copy_max_chunk_size()) diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h index 73aff20e22d0..649dacf7e8c4 100644 --- a/fs/smb/server/smb2pdu.h +++ b/fs/smb/server/smb2pdu.h @@ -190,13 +190,6 @@ struct resume_key_ioctl_rsp { __u8 Context[4]; /* ignored, Windows sets to 4 bytes of zero */ } __packed; -struct copychunk_ioctl_req { - __le64 ResumeKey[3]; - __le32 ChunkCount; - __le32 Reserved; - __u8 Chunks[]; /* array of srv_copychunk */ -} __packed; - struct srv_copychunk { __le64 SourceOffset; __le64 TargetOffset; @@ -204,6 +197,13 @@ struct srv_copychunk { __le32 Reserved; } __packed; +struct copychunk_ioctl_req { + __le64 ResumeKey[3]; + __le32 ChunkCount; + __le32 Reserved; + struct srv_copychunk Chunks[] __counted_by_le(ChunkCount); +} __packed; + struct copychunk_ioctl_rsp { __le32 ChunksWritten; __le32 ChunkBytesWritten; From 9c383396362a4d1db99ed5240f4708d443361ef3 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 28 Sep 2024 17:00:30 +0200 Subject: [PATCH 633/655] ksmbd: Use struct_size() to improve smb_direct_rdma_xmit() Use struct_size() to calculate the number of bytes to allocate for a new message. Signed-off-by: Thorsten Blum Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 44c87e300c16..17c76713c6d0 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -1405,8 +1405,8 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, /* build rdma_rw_ctx for each descriptor */ desc_buf = buf; for (i = 0; i < desc_num; i++) { - msg = kzalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) + - sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL); + msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE), + GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto out; From 134d988208602ccae792e91475c05911c962798e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Dec 2023 21:53:22 -0500 Subject: [PATCH 634/655] parisc: get rid of private asm/unaligned.h Declarations local to arch/*/kernel/*.c are better off *not* in a public header - arch/parisc/kernel/unaligned.h is just fine for those bits. With that done parisc asm/unaligned.h is reduced to include of asm-generic/unaligned.h and can be removed - unaligned.h is in mandatory-y in include/asm-generic/Kbuild. Acked-by: Helge Deller Signed-off-by: Al Viro --- arch/parisc/include/asm/unaligned.h | 11 ----------- arch/parisc/kernel/traps.c | 2 ++ arch/parisc/kernel/unaligned.c | 1 + arch/parisc/kernel/unaligned.h | 3 +++ 4 files changed, 6 insertions(+), 11 deletions(-) delete mode 100644 arch/parisc/include/asm/unaligned.h create mode 100644 arch/parisc/kernel/unaligned.h diff --git a/arch/parisc/include/asm/unaligned.h b/arch/parisc/include/asm/unaligned.h deleted file mode 100644 index c0621295100d..000000000000 --- a/arch/parisc/include/asm/unaligned.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_PARISC_UNALIGNED_H -#define _ASM_PARISC_UNALIGNED_H - -#include - -struct pt_regs; -void handle_unaligned(struct pt_regs *regs); -int check_unaligned(struct pt_regs *regs); - -#endif /* _ASM_PARISC_UNALIGNED_H */ diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 294b0e026c9a..a111d7362d56 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -47,6 +47,8 @@ #include #include +#include "unaligned.h" + #if defined(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK) #include #endif diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 3e79e40e361d..99897107d800 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -15,6 +15,7 @@ #include #include #include +#include "unaligned.h" /* #define DEBUG_UNALIGNED 1 */ diff --git a/arch/parisc/kernel/unaligned.h b/arch/parisc/kernel/unaligned.h new file mode 100644 index 000000000000..c1aa4b12e284 --- /dev/null +++ b/arch/parisc/kernel/unaligned.h @@ -0,0 +1,3 @@ +struct pt_regs; +void handle_unaligned(struct pt_regs *regs); +int check_unaligned(struct pt_regs *regs); From 462763212dd71c41f092b48eaa352bc1f5ed5d66 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 2 Oct 2024 15:56:18 +0200 Subject: [PATCH 635/655] Revert: "dm-verity: restart or panic on an I/O error" This reverts commit e6a3531dd542cb127c8de32ab1e54a48ae19962b. The problem that the commit e6a3531dd542cb127c8de32ab1e54a48ae19962b fixes was reported as a security bug, but Google engineers working on Android and ChromeOS didn't want to change the default behavior, they want to get -EIO rather than restarting the system, so I am reverting that commit. Note also that calling machine_restart from the I/O handling code is potentially unsafe (the reboot notifiers may wait for the bio that triggered the restart), but Android uses the reboot notifiers to store the reboot reason into the PMU microcontroller, so machine_restart must be used. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Fixes: e6a3531dd542 ("dm-verity: restart or panic on an I/O error") Suggested-by: Sami Tolvanen Suggested-by: Will Drewry --- drivers/md/dm-verity-target.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 36e4ddfe2d15..24ba9a10444c 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -273,10 +273,8 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, if (v->mode == DM_VERITY_MODE_LOGGING) return 0; - if (v->mode == DM_VERITY_MODE_RESTART) { - pr_emerg("dm-verity device corrupted\n"); - emergency_restart(); - } + if (v->mode == DM_VERITY_MODE_RESTART) + kernel_restart("dm-verity device corrupted"); if (v->mode == DM_VERITY_MODE_PANIC) panic("dm-verity device corrupted"); @@ -599,23 +597,6 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status) if (!static_branch_unlikely(&use_bh_wq_enabled) || !io->in_bh) verity_fec_finish_io(io); - if (unlikely(status != BLK_STS_OK) && - unlikely(!(bio->bi_opf & REQ_RAHEAD)) && - !verity_is_system_shutting_down()) { - if (v->mode == DM_VERITY_MODE_RESTART || - v->mode == DM_VERITY_MODE_PANIC) - DMERR_LIMIT("%s has error: %s", v->data_dev->name, - blk_status_to_str(status)); - - if (v->mode == DM_VERITY_MODE_RESTART) { - pr_emerg("dm-verity device corrupted\n"); - emergency_restart(); - } - - if (v->mode == DM_VERITY_MODE_PANIC) - panic("dm-verity device corrupted"); - } - bio_endio(bio); } From f811b83879fb6717cdb288e34253cf26d135b019 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 2 Oct 2024 16:03:41 +0200 Subject: [PATCH 636/655] dm-verity: introduce the options restart_on_error and panic_on_error This patch introduces the options restart_on_error and panic_on_error on dm-verity. Previously, restarting on error was handled by the patch e6a3531dd542cb127c8de32ab1e54a48ae19962b, but Google engineers wanted to have a special option for it. Signed-off-by: Mikulas Patocka Suggested-by: Sami Tolvanen Suggested-by: Will Drewry --- drivers/md/dm-verity-target.c | 83 ++++++++++++++++++++++++++++++++++- drivers/md/dm-verity.h | 1 + 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 24ba9a10444c..7d4d90b4395a 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -36,11 +36,13 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" #define DM_VERITY_OPT_PANIC "panic_on_corruption" +#define DM_VERITY_OPT_ERROR_RESTART "restart_on_error" +#define DM_VERITY_OPT_ERROR_PANIC "panic_on_error" #define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks" #define DM_VERITY_OPT_AT_MOST_ONCE "check_at_most_once" #define DM_VERITY_OPT_TASKLET_VERIFY "try_verify_in_tasklet" -#define DM_VERITY_OPTS_MAX (4 + DM_VERITY_OPTS_FEC + \ +#define DM_VERITY_OPTS_MAX (5 + DM_VERITY_OPTS_FEC + \ DM_VERITY_ROOT_HASH_VERIFICATION_OPTS) static unsigned int dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; @@ -583,6 +585,11 @@ static inline bool verity_is_system_shutting_down(void) || system_state == SYSTEM_RESTART; } +static void restart_io_error(struct work_struct *w) +{ + kernel_restart("dm-verity device has I/O error"); +} + /* * End one "io" structure with a given error. */ @@ -597,6 +604,23 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status) if (!static_branch_unlikely(&use_bh_wq_enabled) || !io->in_bh) verity_fec_finish_io(io); + if (unlikely(status != BLK_STS_OK) && + unlikely(!(bio->bi_opf & REQ_RAHEAD)) && + !verity_is_system_shutting_down()) { + if (v->error_mode == DM_VERITY_MODE_PANIC) { + panic("dm-verity device has I/O error"); + } + if (v->error_mode == DM_VERITY_MODE_RESTART) { + static DECLARE_WORK(restart_work, restart_io_error); + queue_work(v->verify_wq, &restart_work); + /* + * We deliberately don't call bio_endio here, because + * the machine will be restarted anyway. + */ + return; + } + } + bio_endio(bio); } @@ -805,6 +829,8 @@ static void verity_status(struct dm_target *ti, status_type_t type, DMEMIT("%02x", v->salt[x]); if (v->mode != DM_VERITY_MODE_EIO) args++; + if (v->error_mode != DM_VERITY_MODE_EIO) + args++; if (verity_fec_is_enabled(v)) args += DM_VERITY_OPTS_FEC; if (v->zero_digest) @@ -834,6 +860,19 @@ static void verity_status(struct dm_target *ti, status_type_t type, BUG(); } } + if (v->error_mode != DM_VERITY_MODE_EIO) { + DMEMIT(" "); + switch (v->error_mode) { + case DM_VERITY_MODE_RESTART: + DMEMIT(DM_VERITY_OPT_ERROR_RESTART); + break; + case DM_VERITY_MODE_PANIC: + DMEMIT(DM_VERITY_OPT_ERROR_PANIC); + break; + default: + BUG(); + } + } if (v->zero_digest) DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES); if (v->validated_blocks) @@ -886,6 +925,19 @@ static void verity_status(struct dm_target *ti, status_type_t type, DMEMIT("invalid"); } } + if (v->error_mode != DM_VERITY_MODE_EIO) { + DMEMIT(",verity_error_mode="); + switch (v->error_mode) { + case DM_VERITY_MODE_RESTART: + DMEMIT(DM_VERITY_OPT_ERROR_RESTART); + break; + case DM_VERITY_MODE_PANIC: + DMEMIT(DM_VERITY_OPT_ERROR_PANIC); + break; + default: + DMEMIT("invalid"); + } + } DMEMIT(";"); break; } @@ -1088,6 +1140,25 @@ static int verity_parse_verity_mode(struct dm_verity *v, const char *arg_name) return 0; } +static inline bool verity_is_verity_error_mode(const char *arg_name) +{ + return (!strcasecmp(arg_name, DM_VERITY_OPT_ERROR_RESTART) || + !strcasecmp(arg_name, DM_VERITY_OPT_ERROR_PANIC)); +} + +static int verity_parse_verity_error_mode(struct dm_verity *v, const char *arg_name) +{ + if (v->error_mode) + return -EINVAL; + + if (!strcasecmp(arg_name, DM_VERITY_OPT_ERROR_RESTART)) + v->error_mode = DM_VERITY_MODE_RESTART; + else if (!strcasecmp(arg_name, DM_VERITY_OPT_ERROR_PANIC)) + v->error_mode = DM_VERITY_MODE_PANIC; + + return 0; +} + static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, struct dm_verity_sig_opts *verify_args, bool only_modifier_opts) @@ -1122,6 +1193,16 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, } continue; + } else if (verity_is_verity_error_mode(arg_name)) { + if (only_modifier_opts) + continue; + r = verity_parse_verity_error_mode(v, arg_name); + if (r) { + ti->error = "Conflicting error handling parameters"; + return r; + } + continue; + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_IGN_ZEROES)) { if (only_modifier_opts) continue; diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 754e70bb5fe0..6b75159bf835 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -64,6 +64,7 @@ struct dm_verity { unsigned int digest_size; /* digest size for the current hash algorithm */ unsigned int hash_reqsize; /* the size of temporary space for crypto */ enum verity_mode mode; /* mode for handling verification errors */ + enum verity_mode error_mode;/* mode for handling I/O errors */ unsigned int corrupted_errs;/* Number of errors for corrupted blocks */ struct workqueue_struct *verify_wq; From 1ca4169c391c370e0f3a92938df2862900575096 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Oct 2024 15:45:50 +0100 Subject: [PATCH 637/655] netfs: Fix missing wakeup after issuing writes After dividing up a proposed write into subrequests, netfslib sets NETFS_RREQ_ALL_QUEUED to indicate to the collector that it can move on to the final cleanup once it has emptied the subrequest queues. Now, whilst the collector will normally end up running at least once after this bit is set just because it takes a while to process all the write subrequests before the collector runs out of subrequests, there exists the possibility that the issuing thread will be forced to sleep and the collector thread will clean up all the subrequests before ALL_QUEUED gets set. In such a case, the collector thread will not get triggered again and will never clear NETFS_RREQ_IN_PROGRESS thus leaving a request uncompleted and causing a potential futute hang. Fix this by scheduling the write collector if all the subrequest queues are empty (and thus no writes pending issuance). Note that we'd do this ideally before queuing the subrequest, but in the case of buffered writeback, at least, we can't find out that we've run out of folios until after we've called writeback_iter() and it has returned NULL - at which point we might not actually have any subrequests still under construction. Fixes: 288ace2f57c9 ("netfs: New writeback implementation") Signed-off-by: David Howells Link: https://lore.kernel.org/r/3317784.1727880350@warthog.procyon.org.uk cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/write_issue.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 6293f547e4c3..bf6d507578e5 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -508,6 +508,30 @@ static int netfs_write_folio(struct netfs_io_request *wreq, return 0; } +/* + * End the issuing of writes, letting the collector know we're done. + */ +static void netfs_end_issue_write(struct netfs_io_request *wreq) +{ + bool needs_poke = true; + + smp_wmb(); /* Write subreq lists before ALL_QUEUED. */ + set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); + + for (int s = 0; s < NR_IO_STREAMS; s++) { + struct netfs_io_stream *stream = &wreq->io_streams[s]; + + if (!stream->active) + continue; + if (!list_empty(&stream->subrequests)) + needs_poke = false; + netfs_issue_write(wreq, stream); + } + + if (needs_poke) + netfs_wake_write_collector(wreq, false); +} + /* * Write some of the pending data back to the server */ @@ -559,10 +583,7 @@ int netfs_writepages(struct address_space *mapping, break; } while ((folio = writeback_iter(mapping, wbc, folio, &error))); - for (int s = 0; s < NR_IO_STREAMS; s++) - netfs_issue_write(wreq, &wreq->io_streams[s]); - smp_wmb(); /* Write lists before ALL_QUEUED. */ - set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); + netfs_end_issue_write(wreq); mutex_unlock(&ictx->wb_lock); @@ -650,10 +671,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr if (writethrough_cache) netfs_write_folio(wreq, wbc, writethrough_cache); - netfs_issue_write(wreq, &wreq->io_streams[0]); - netfs_issue_write(wreq, &wreq->io_streams[1]); - smp_wmb(); /* Write lists before ALL_QUEUED. */ - set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); + netfs_end_issue_write(wreq); mutex_unlock(&ictx->wb_lock); @@ -699,13 +717,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t break; } - netfs_issue_write(wreq, upload); - - smp_wmb(); /* Write lists before ALL_QUEUED. */ - set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); - if (list_empty(&upload->subrequests)) - netfs_wake_write_collector(wreq, false); - + netfs_end_issue_write(wreq); _leave(" = %d", error); return error; } From 00429083f404efe230fee577aa3dfbf2dea9b1f1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Dec 2023 21:53:22 -0500 Subject: [PATCH 638/655] arc: get rid of private asm/unaligned.h Declarations local to arch/*/kernel/*.c are better off *not* in a public header - arch/arc/kernel/unaligned.h is just fine for those bits. Unlike the parisc case, here we have an extra twist - asm/mmu.h has an implicit dependency on struct pt_regs, and in some users that used to be satisfied by include of asm/ptrace.h from asm/unaligned.h (note that asm/mmu.h itself did _not_ pull asm/unaligned.h - it relied upon the users having pulled asm/unaligned.h before asm/mmu.h got there). Seeing that asm/mmu.h only wants struct pt_regs * arguments in an extern, just pre-declare it there - less brittle that way. With that done _all_ asm/unaligned.h instances are reduced to include of asm-generic/unaligned.h and can be removed - unaligned.h is in mandatory-y in include/asm-generic/Kbuild. What's more, we can move asm-generic/unaligned.h to linux/unaligned.h and switch includes of to ; that's better off as an auto-generated commit, though, to be done by Linus at -rc1 time next cycle. Acked-by: Vineet Gupta Signed-off-by: Al Viro --- arch/arc/include/asm/mmu.h | 1 + arch/arc/include/asm/unaligned.h | 27 --------------------------- arch/arc/kernel/traps.c | 1 + arch/arc/kernel/unaligned.c | 1 + arch/arc/kernel/unaligned.h | 16 ++++++++++++++++ 5 files changed, 19 insertions(+), 27 deletions(-) delete mode 100644 arch/arc/include/asm/unaligned.h create mode 100644 arch/arc/kernel/unaligned.h diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 9febf5bc3de6..4ae2db59d494 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -14,6 +14,7 @@ typedef struct { unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */ } mm_context_t; +struct pt_regs; extern void do_tlb_overlap_fault(unsigned long, unsigned long, struct pt_regs *); #endif diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h deleted file mode 100644 index cf5a02382e0e..000000000000 --- a/arch/arc/include/asm/unaligned.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - */ - -#ifndef _ASM_ARC_UNALIGNED_H -#define _ASM_ARC_UNALIGNED_H - -/* ARC700 can't handle unaligned Data accesses. */ - -#include -#include - -#ifdef CONFIG_ARC_EMUL_UNALIGNED -int misaligned_fixup(unsigned long address, struct pt_regs *regs, - struct callee_regs *cregs); -#else -static inline int -misaligned_fixup(unsigned long address, struct pt_regs *regs, - struct callee_regs *cregs) -{ - /* Not fixed */ - return 1; -} -#endif - -#endif /* _ASM_ARC_UNALIGNED_H */ diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index a19751e824fb..41af02081549 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -20,6 +20,7 @@ #include #include #include +#include "unaligned.h" void die(const char *str, struct pt_regs *regs, unsigned long address) { diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c index 99a9b92ed98d..d2f5ceaaed1b 100644 --- a/arch/arc/kernel/unaligned.c +++ b/arch/arc/kernel/unaligned.c @@ -12,6 +12,7 @@ #include #include #include +#include "unaligned.h" #ifdef CONFIG_CPU_BIG_ENDIAN #define BE 1 diff --git a/arch/arc/kernel/unaligned.h b/arch/arc/kernel/unaligned.h new file mode 100644 index 000000000000..5244453bb85f --- /dev/null +++ b/arch/arc/kernel/unaligned.h @@ -0,0 +1,16 @@ +struct pt_regs; +struct callee_regs; + +#ifdef CONFIG_ARC_EMUL_UNALIGNED +int misaligned_fixup(unsigned long address, struct pt_regs *regs, + struct callee_regs *cregs); +#else +static inline int +misaligned_fixup(unsigned long address, struct pt_regs *regs, + struct callee_regs *cregs) +{ + /* Not fixed */ + return 1; +} +#endif + From 5f60d5f6bbc12e782fac78110b0ee62698f3b576 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 1 Oct 2024 15:35:57 -0400 Subject: [PATCH 639/655] move asm/unaligned.h to linux/unaligned.h asm/unaligned.h is always an include of asm-generic/unaligned.h; might as well move that thing to linux/unaligned.h and include that - there's nothing arch-specific in that header. auto-generated by the following: for i in `git grep -l -w asm/unaligned.h`; do sed -i -e "s/asm\/unaligned.h/linux\/unaligned.h/" $i done for i in `git grep -l -w asm-generic/unaligned.h`; do sed -i -e "s/asm-generic\/unaligned.h/linux\/unaligned.h/" $i done git mv include/asm-generic/unaligned.h include/linux/unaligned.h git mv tools/include/asm-generic/unaligned.h tools/include/linux/unaligned.h sed -i -e "/unaligned.h/d" include/asm-generic/Kbuild sed -i -e "s/__ASM_GENERIC/__LINUX/" include/linux/unaligned.h tools/include/linux/unaligned.h --- Documentation/arch/arm/mem_alignment.rst | 2 +- Documentation/core-api/unaligned-memory-access.rst | 2 +- .../translations/zh_CN/core-api/unaligned-memory-access.rst | 2 +- arch/alpha/kernel/traps.c | 2 +- arch/arc/include/asm/io.h | 2 +- arch/arc/kernel/traps.c | 2 +- arch/arc/kernel/unwind.c | 2 +- arch/arm/crypto/aes-ce-glue.c | 2 +- arch/arm/crypto/crc32-ce-glue.c | 2 +- arch/arm/crypto/ghash-ce-glue.c | 2 +- arch/arm/crypto/poly1305-glue.c | 2 +- arch/arm/crypto/sha2-ce-glue.c | 2 +- arch/arm/include/asm/uaccess.h | 2 +- arch/arm/mm/alignment.c | 2 +- arch/arm64/crypto/aes-ce-ccm-glue.c | 2 +- arch/arm64/crypto/aes-ce-glue.c | 2 +- arch/arm64/crypto/ghash-ce-glue.c | 2 +- arch/arm64/crypto/poly1305-glue.c | 2 +- arch/arm64/crypto/sha1-ce-glue.c | 2 +- arch/arm64/crypto/sha2-ce-glue.c | 2 +- arch/arm64/crypto/sha3-ce-glue.c | 2 +- arch/arm64/crypto/sha512-ce-glue.c | 2 +- arch/arm64/crypto/sm3-ce-glue.c | 2 +- arch/arm64/crypto/sm3-neon-glue.c | 2 +- arch/loongarch/crypto/crc32-loongarch.c | 2 +- arch/microblaze/include/asm/flat.h | 2 +- arch/mips/boot/compressed/decompress.c | 2 +- arch/mips/crypto/crc32-mips.c | 2 +- arch/mips/crypto/poly1305-glue.c | 2 +- arch/nios2/kernel/misaligned.c | 2 +- arch/parisc/boot/compressed/misc.c | 2 +- arch/parisc/kernel/traps.c | 2 +- arch/parisc/kernel/unaligned.c | 2 +- arch/powerpc/crypto/aes-gcm-p10-glue.c | 2 +- arch/powerpc/crypto/poly1305-p10-glue.c | 2 +- arch/powerpc/platforms/pseries/papr_scm.c | 2 +- arch/sh/include/asm/flat.h | 2 +- arch/sh/kernel/dwarf.c | 2 +- arch/sh/kernel/module.c | 2 +- arch/sparc/crypto/crc32c_glue.c | 2 +- arch/um/drivers/virt-pci.c | 2 +- arch/um/include/asm/uaccess.h | 2 +- arch/x86/crypto/camellia_glue.c | 2 +- arch/x86/crypto/ghash-clmulni-intel_glue.c | 2 +- arch/x86/lib/insn.c | 2 +- arch/xtensa/include/asm/flat.h | 2 +- block/partitions/ldm.h | 2 +- block/partitions/msdos.c | 2 +- block/t10-pi.c | 2 +- crypto/aes_generic.c | 2 +- crypto/blake2b_generic.c | 2 +- crypto/blowfish_generic.c | 2 +- crypto/camellia_generic.c | 2 +- crypto/cast5_generic.c | 2 +- crypto/cast6_generic.c | 2 +- crypto/chacha_generic.c | 2 +- crypto/crc32_generic.c | 2 +- crypto/crc32c_generic.c | 2 +- crypto/crc64_rocksoft_generic.c | 2 +- crypto/ecc.c | 2 +- crypto/michael_mic.c | 2 +- crypto/nhpoly1305.c | 2 +- crypto/poly1305_generic.c | 2 +- crypto/polyval-generic.c | 2 +- crypto/serpent_generic.c | 2 +- crypto/sha256_generic.c | 2 +- crypto/sha3_generic.c | 2 +- crypto/sha512_generic.c | 2 +- crypto/sm3.c | 2 +- crypto/sm3_generic.c | 2 +- crypto/sm4.c | 2 +- crypto/sm4_generic.c | 2 +- crypto/twofish_generic.c | 2 +- crypto/vmac.c | 2 +- crypto/xxhash_generic.c | 2 +- drivers/acpi/apei/apei-base.c | 2 +- drivers/acpi/apei/einj-core.c | 2 +- drivers/acpi/battery.c | 2 +- drivers/acpi/cppc_acpi.c | 2 +- drivers/ata/libata-core.c | 2 +- drivers/ata/libata-sata.c | 2 +- drivers/ata/libata-scsi.c | 2 +- drivers/auxdisplay/ht16k33.c | 2 +- drivers/base/regmap/regmap.c | 2 +- drivers/block/aoe/aoecmd.c | 2 +- drivers/block/aoe/aoenet.c | 2 +- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/pktcdvd.c | 2 +- drivers/bluetooth/ath3k.c | 2 +- drivers/bluetooth/btbcm.c | 2 +- drivers/bluetooth/btintel.c | 2 +- drivers/bluetooth/btintel_pcie.c | 2 +- drivers/bluetooth/btmtk.c | 2 +- drivers/bluetooth/btmtksdio.c | 2 +- drivers/bluetooth/btmtkuart.c | 2 +- drivers/bluetooth/btnxpuart.c | 2 +- drivers/bluetooth/btrsi.c | 2 +- drivers/bluetooth/btrtl.c | 2 +- drivers/bluetooth/btusb.c | 2 +- drivers/bluetooth/h4_recv.h | 2 +- drivers/bluetooth/hci_bcm4377.c | 2 +- drivers/bluetooth/hci_bcsp.c | 2 +- drivers/bluetooth/hci_h4.c | 2 +- drivers/bluetooth/hci_nokia.c | 2 +- drivers/bluetooth/hci_qca.c | 2 +- drivers/bluetooth/hci_vhci.c | 2 +- drivers/char/tpm/tpm2-sessions.c | 2 +- drivers/char/tpm/tpm2-space.c | 2 +- drivers/clk/clk-si5341.c | 2 +- drivers/comedi/drivers/usbduxsigma.c | 2 +- drivers/counter/104-quad-8.c | 2 +- drivers/counter/i8254.c | 2 +- drivers/cpufreq/cppc_cpufreq.c | 2 +- drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c | 2 +- drivers/crypto/caam/caamalg.c | 2 +- drivers/crypto/caam/caamalg_qi.c | 2 +- drivers/crypto/caam/caamalg_qi2.c | 2 +- drivers/crypto/inside-secure/safexcel_cipher.c | 2 +- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 2 +- drivers/crypto/stm32/stm32-crc32.c | 2 +- drivers/cxl/core/mbox.c | 2 +- drivers/cxl/core/trace.h | 2 +- drivers/cxl/pci.c | 2 +- drivers/cxl/pmem.c | 2 +- drivers/cxl/security.c | 2 +- drivers/firewire/net.c | 2 +- drivers/firmware/arm_scmi/common.h | 2 +- drivers/firmware/arm_scmi/protocols.h | 2 +- drivers/firmware/dmi_scan.c | 2 +- drivers/firmware/efi/fdtparams.c | 2 +- drivers/firmware/efi/libstub/riscv-stub.c | 2 +- drivers/firmware/efi/libstub/riscv.c | 2 +- drivers/firmware/efi/libstub/zboot.c | 2 +- drivers/fpga/microchip-spi.c | 2 +- drivers/fsi/fsi-occ.c | 2 +- drivers/gpu/drm/amd/amdgpu/atom.c | 2 +- drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c | 2 +- drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c | 2 +- drivers/gpu/drm/bridge/samsung-dsim.c | 2 +- drivers/gpu/drm/bridge/sil-sii8620.c | 2 +- drivers/gpu/drm/bridge/tc358775.c | 2 +- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 2 +- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 2 +- drivers/gpu/drm/nouveau/include/nvif/os.h | 2 +- drivers/gpu/drm/radeon/atom.c | 2 +- drivers/gpu/drm/udl/udl_transfer.c | 2 +- drivers/greybus/es2.c | 2 +- drivers/greybus/gb-beagleplay.c | 2 +- drivers/hid/hid-alps.c | 2 +- drivers/hid/hid-core.c | 2 +- drivers/hid/hid-generic.c | 2 +- drivers/hid/hid-goodix-spi.c | 2 +- drivers/hid/hid-google-hammer.c | 2 +- drivers/hid/hid-kye.c | 2 +- drivers/hid/hid-letsketch.c | 2 +- drivers/hid/hid-logitech-dj.c | 2 +- drivers/hid/hid-logitech-hidpp.c | 2 +- drivers/hid/hid-nintendo.c | 2 +- drivers/hid/hid-playstation.c | 2 +- drivers/hid/hid-sony.c | 2 +- drivers/hid/hid-uclogic-params.c | 2 +- drivers/hid/hid-uclogic-rdesc.c | 2 +- drivers/hid/i2c-hid/i2c-hid-core.c | 2 +- drivers/hid/surface-hid/surface_hid.c | 2 +- drivers/hid/surface-hid/surface_hid_core.c | 2 +- drivers/hid/surface-hid/surface_kbd.c | 2 +- drivers/hid/usbhid/hid-core.c | 2 +- drivers/hid/wacom.h | 2 +- drivers/hwmon/adt7310.c | 2 +- drivers/hwmon/aquacomputer_d5next.c | 2 +- drivers/hwmon/asus-ec-sensors.c | 2 +- drivers/hwmon/asus_rog_ryujin.c | 2 +- drivers/hwmon/dell-smm-hwmon.c | 2 +- drivers/hwmon/gigabyte_waterforce.c | 2 +- drivers/hwmon/nzxt-kraken2.c | 2 +- drivers/hwmon/nzxt-kraken3.c | 2 +- drivers/hwmon/nzxt-smart2.c | 2 +- drivers/hwmon/occ/common.c | 2 +- drivers/hwmon/occ/p8_i2c.c | 2 +- drivers/i2c/busses/i2c-nvidia-gpu.c | 2 +- drivers/iio/accel/adxl355_core.c | 2 +- drivers/iio/accel/adxl367.c | 2 +- drivers/iio/accel/adxl380.c | 2 +- drivers/iio/accel/bma400_core.c | 2 +- drivers/iio/accel/bmi088-accel-core.c | 2 +- drivers/iio/accel/dmard09.c | 2 +- drivers/iio/accel/sca3300.c | 2 +- drivers/iio/adc/ad4130.c | 2 +- drivers/iio/adc/ad_sigma_delta.c | 2 +- drivers/iio/adc/axp20x_adc.c | 2 +- drivers/iio/adc/intel_mrfld_adc.c | 2 +- drivers/iio/adc/ltc2497.c | 2 +- drivers/iio/adc/max11100.c | 2 +- drivers/iio/adc/max11410.c | 2 +- drivers/iio/adc/mcp3422.c | 2 +- drivers/iio/adc/mcp3911.c | 2 +- drivers/iio/adc/mt6360-adc.c | 2 +- drivers/iio/adc/pac1921.c | 2 +- drivers/iio/adc/pac1934.c | 2 +- drivers/iio/adc/qcom-spmi-rradc.c | 2 +- drivers/iio/adc/ti-ads124s08.c | 2 +- drivers/iio/adc/ti-ads1298.c | 2 +- drivers/iio/adc/ti-ads131e08.c | 2 +- drivers/iio/adc/ti-tsc2046.c | 2 +- drivers/iio/addac/ad74115.c | 2 +- drivers/iio/addac/ad74413r.c | 2 +- drivers/iio/amplifiers/ada4250.c | 2 +- drivers/iio/cdc/ad7746.c | 2 +- drivers/iio/chemical/bme680_core.c | 2 +- drivers/iio/chemical/pms7003.c | 2 +- drivers/iio/chemical/scd30_i2c.c | 2 +- drivers/iio/chemical/scd30_serial.c | 2 +- drivers/iio/chemical/scd4x.c | 2 +- drivers/iio/chemical/sps30_i2c.c | 2 +- drivers/iio/common/st_sensors/st_sensors_core.c | 2 +- drivers/iio/dac/ad3552r.c | 2 +- drivers/iio/dac/ad5064.c | 2 +- drivers/iio/dac/ad5446.c | 2 +- drivers/iio/dac/ad5449.c | 2 +- drivers/iio/dac/ad5593r.c | 2 +- drivers/iio/dac/ad5624r_spi.c | 2 +- drivers/iio/dac/ad5766.c | 2 +- drivers/iio/dac/ad7293.c | 2 +- drivers/iio/dac/ltc2632.c | 2 +- drivers/iio/dac/mcp4821.c | 2 +- drivers/iio/frequency/adf4377.c | 2 +- drivers/iio/frequency/admv1013.c | 2 +- drivers/iio/frequency/admv1014.c | 2 +- drivers/iio/frequency/admv4420.c | 2 +- drivers/iio/frequency/adrf6780.c | 2 +- drivers/iio/gyro/adis16130.c | 2 +- drivers/iio/health/afe4403.c | 2 +- drivers/iio/humidity/ens210.c | 2 +- drivers/iio/humidity/hdc3020.c | 2 +- drivers/iio/imu/adis.c | 2 +- drivers/iio/imu/bmi323/bmi323_core.c | 2 +- drivers/iio/light/apds9306.c | 2 +- drivers/iio/light/gp2ap020a00f.c | 2 +- drivers/iio/light/ltr390.c | 2 +- drivers/iio/light/ltrf216a.c | 2 +- drivers/iio/light/si1133.c | 2 +- drivers/iio/light/tsl2591.c | 2 +- drivers/iio/light/zopt2201.c | 2 +- drivers/iio/magnetometer/rm3100-core.c | 2 +- drivers/iio/magnetometer/yamaha-yas530.c | 2 +- drivers/iio/pressure/bmp280-core.c | 2 +- drivers/iio/pressure/dlhl60d.c | 2 +- drivers/iio/pressure/hp206c.c | 2 +- drivers/iio/pressure/hsc030pa.c | 2 +- drivers/iio/pressure/mprls0025pa.c | 2 +- drivers/iio/pressure/ms5611_i2c.c | 2 +- drivers/iio/pressure/ms5611_spi.c | 2 +- drivers/iio/pressure/sdp500.c | 2 +- drivers/iio/pressure/st_pressure_core.c | 2 +- drivers/iio/pressure/zpa2326.c | 2 +- drivers/iio/proximity/aw96103.c | 2 +- drivers/iio/proximity/cros_ec_mkbp_proximity.c | 2 +- drivers/iio/proximity/hx9023s.c | 2 +- drivers/iio/proximity/irsd200.c | 2 +- drivers/iio/temperature/ltc2983.c | 2 +- drivers/iio/temperature/max31856.c | 2 +- drivers/iio/temperature/max31865.c | 2 +- drivers/input/joystick/adafruit-seesaw.c | 2 +- drivers/input/joystick/adc-joystick.c | 2 +- drivers/input/joystick/iforce/iforce-main.c | 2 +- drivers/input/joystick/iforce/iforce-packets.c | 2 +- drivers/input/joystick/spaceball.c | 2 +- drivers/input/keyboard/applespi.c | 2 +- drivers/input/keyboard/cros_ec_keyb.c | 2 +- drivers/input/misc/ims-pcu.c | 2 +- drivers/input/misc/iqs7222.c | 2 +- drivers/input/mouse/cyapa_gen3.c | 2 +- drivers/input/mouse/cyapa_gen5.c | 2 +- drivers/input/mouse/cyapa_gen6.c | 2 +- drivers/input/mouse/elan_i2c_core.c | 2 +- drivers/input/mouse/elan_i2c_i2c.c | 2 +- drivers/input/mouse/elantech.c | 2 +- drivers/input/rmi4/rmi_f01.c | 2 +- drivers/input/rmi4/rmi_f34.c | 2 +- drivers/input/rmi4/rmi_f34v7.c | 2 +- drivers/input/tablet/aiptek.c | 2 +- drivers/input/tablet/kbtab.c | 2 +- drivers/input/touchscreen/ads7846.c | 2 +- drivers/input/touchscreen/atmel_mxt_ts.c | 2 +- drivers/input/touchscreen/chipone_icn8505.c | 2 +- drivers/input/touchscreen/cy8ctma140.c | 2 +- drivers/input/touchscreen/cyttsp5.c | 2 +- drivers/input/touchscreen/edt-ft5x06.c | 2 +- drivers/input/touchscreen/eeti_ts.c | 2 +- drivers/input/touchscreen/elants_i2c.c | 2 +- drivers/input/touchscreen/exc3000.c | 2 +- drivers/input/touchscreen/goodix.c | 2 +- drivers/input/touchscreen/goodix_berlin_core.c | 2 +- drivers/input/touchscreen/goodix_berlin_spi.c | 2 +- drivers/input/touchscreen/hideep.c | 2 +- drivers/input/touchscreen/hycon-hy46xx.c | 2 +- drivers/input/touchscreen/hynitron_cstxxx.c | 2 +- drivers/input/touchscreen/ili210x.c | 2 +- drivers/input/touchscreen/ilitek_ts_i2c.c | 2 +- drivers/input/touchscreen/iqs5xx.c | 2 +- drivers/input/touchscreen/iqs7211.c | 2 +- drivers/input/touchscreen/melfas_mip4.c | 2 +- drivers/input/touchscreen/novatek-nvt-ts.c | 2 +- drivers/input/touchscreen/pixcir_i2c_ts.c | 2 +- drivers/input/touchscreen/raydium_i2c_ts.c | 2 +- drivers/input/touchscreen/s6sy761.c | 2 +- drivers/input/touchscreen/silead.c | 2 +- drivers/input/touchscreen/sis_i2c.c | 2 +- drivers/input/touchscreen/surface3_spi.c | 2 +- drivers/input/touchscreen/wacom_i2c.c | 2 +- drivers/input/touchscreen/wdt87xx_i2c.c | 2 +- drivers/input/touchscreen/zet6223.c | 2 +- drivers/input/touchscreen/zforce_ts.c | 2 +- drivers/isdn/hardware/mISDN/avmfritz.c | 2 +- drivers/leds/rgb/leds-mt6370-rgb.c | 2 +- drivers/macintosh/adb-iop.c | 2 +- drivers/md/dm-crypt.c | 2 +- drivers/md/dm-vdo/murmurhash3.c | 2 +- drivers/md/dm-vdo/numeric.h | 2 +- drivers/media/dvb-frontends/mxl5xx.c | 2 +- drivers/media/i2c/ccs/ccs-reg-access.c | 2 +- drivers/media/i2c/hi556.c | 2 +- drivers/media/i2c/hi846.c | 2 +- drivers/media/i2c/hi847.c | 2 +- drivers/media/i2c/imx208.c | 2 +- drivers/media/i2c/imx258.c | 2 +- drivers/media/i2c/imx290.c | 2 +- drivers/media/i2c/imx319.c | 2 +- drivers/media/i2c/imx334.c | 2 +- drivers/media/i2c/imx335.c | 2 +- drivers/media/i2c/imx355.c | 2 +- drivers/media/i2c/imx412.c | 2 +- drivers/media/i2c/ir-kbd-i2c.c | 2 +- drivers/media/i2c/og01a1b.c | 2 +- drivers/media/i2c/ov01a10.c | 2 +- drivers/media/i2c/ov08x40.c | 2 +- drivers/media/i2c/ov2740.c | 2 +- drivers/media/i2c/ov5670.c | 2 +- drivers/media/i2c/ov5675.c | 2 +- drivers/media/i2c/ov8856.c | 2 +- drivers/media/i2c/ov8858.c | 2 +- drivers/media/i2c/ov9282.c | 2 +- drivers/media/i2c/ov9734.c | 2 +- drivers/media/i2c/thp7312.c | 2 +- drivers/media/i2c/vgxy61.c | 2 +- drivers/media/pci/bt8xx/bttv-cards.c | 2 +- drivers/media/platform/chips-media/coda/coda-jpeg.c | 2 +- drivers/media/platform/renesas/rcar_jpu.c | 2 +- drivers/media/platform/verisilicon/hantro_g1_mpeg2_dec.c | 2 +- drivers/media/platform/verisilicon/hantro_h1_jpeg_enc.c | 2 +- .../media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c | 2 +- .../media/platform/verisilicon/rockchip_vpu2_hw_mpeg2_dec.c | 2 +- drivers/media/radio/radio-raremono.c | 2 +- drivers/media/radio/si470x/radio-si470x.h | 2 +- drivers/media/rc/ir_toy.c | 2 +- drivers/media/rc/redrat3.c | 2 +- drivers/media/tuners/xc2028.c | 2 +- drivers/media/tuners/xc4000.c | 2 +- drivers/media/usb/dvb-usb/m920x.c | 2 +- drivers/media/usb/uvc/uvc_driver.c | 2 +- drivers/media/usb/uvc/uvc_video.c | 2 +- drivers/media/v4l2-core/v4l2-cci.c | 2 +- drivers/media/v4l2-core/v4l2-jpeg.c | 2 +- drivers/memstick/host/rtsx_usb_ms.c | 2 +- drivers/mfd/gateworks-gsc.c | 2 +- drivers/mfd/iqs62x.c | 2 +- drivers/mfd/ntxec.c | 2 +- drivers/mfd/rave-sp.c | 2 +- drivers/mfd/si476x-cmd.c | 2 +- drivers/misc/altera-stapl/altera.c | 2 +- drivers/misc/bcm-vk/bcm_vk_sg.c | 2 +- drivers/misc/cardreader/rtsx_pcr.c | 2 +- drivers/misc/lattice-ecp3-config.c | 2 +- drivers/misc/mei/platform-vsc.c | 2 +- drivers/misc/mei/vsc-fw-loader.c | 2 +- drivers/mmc/host/atmel-mci.c | 2 +- drivers/mmc/host/mmc_spi.c | 2 +- drivers/mmc/host/mvsdio.c | 2 +- drivers/mmc/host/rtsx_pci_sdmmc.c | 2 +- drivers/mmc/host/rtsx_usb_sdmmc.c | 2 +- drivers/mtd/nand/raw/intel-nand-controller.c | 2 +- drivers/mtd/nand/raw/marvell_nand.c | 2 +- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c | 2 +- drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c | 2 +- drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c | 2 +- drivers/net/can/usb/etas_es58x/es581_4.c | 2 +- drivers/net/can/usb/etas_es58x/es58x_core.c | 2 +- drivers/net/can/usb/etas_es58x/es58x_fd.c | 2 +- drivers/net/can/usb/f81604.c | 2 +- drivers/net/can/usb/mcba_usb.c | 2 +- drivers/net/can/usb/peak_usb/pcan_usb.c | 2 +- drivers/net/dsa/b53/b53_spi.c | 2 +- drivers/net/dsa/microchip/ksz_spi.c | 2 +- drivers/net/ethernet/adi/adin1110.c | 2 +- drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c | 2 +- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +- drivers/net/ethernet/dec/tulip/de2104x.c | 2 +- drivers/net/ethernet/dec/tulip/eeprom.c | 2 +- drivers/net/ethernet/dec/tulip/tulip.h | 2 +- drivers/net/ethernet/dec/tulip/tulip_core.c | 2 +- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 2 +- drivers/net/ethernet/intel/e100.c | 2 +- drivers/net/ethernet/intel/ice/ice_fw_update.c | 2 +- drivers/net/ethernet/mediatek/mtk_wed_mcu.c | 2 +- drivers/net/ethernet/meta/fbnic/fbnic_devlink.c | 2 +- drivers/net/ethernet/netronome/nfp/crypto/ipsec.c | 2 +- drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c | 2 +- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c | 2 +- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c | 2 +- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c | 2 +- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c | 2 +- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c | 2 +- drivers/net/ethernet/packetengines/hamachi.c | 2 +- drivers/net/ethernet/packetengines/yellowfin.c | 2 +- drivers/net/ethernet/realtek/r8169_main.c | 2 +- drivers/net/ethernet/smsc/smsc9420.c | 2 +- drivers/net/ieee802154/cc2520.c | 2 +- drivers/net/mctp/mctp-i3c.c | 2 +- drivers/net/phy/air_en8811h.c | 2 +- drivers/net/phy/aquantia/aquantia_firmware.c | 2 +- drivers/net/phy/bcm-phy-ptp.c | 2 +- drivers/net/phy/mscc/mscc_ptp.c | 2 +- drivers/net/ppp/ppp_async.c | 2 +- drivers/net/ppp/ppp_deflate.c | 2 +- drivers/net/ppp/ppp_generic.c | 2 +- drivers/net/ppp/ppp_mppe.c | 2 +- drivers/net/ppp/ppp_synctty.c | 2 +- drivers/net/slip/slhc.c | 2 +- drivers/net/usb/net1080.c | 2 +- drivers/net/usb/sierra_net.c | 2 +- drivers/net/wireless/ath/ath5k/base.c | 2 +- drivers/net/wireless/ath/ath5k/mac80211-ops.c | 2 +- drivers/net/wireless/ath/ath5k/pcu.c | 2 +- drivers/net/wireless/ath/ath5k/phy.c | 2 +- drivers/net/wireless/ath/ath5k/reset.c | 2 +- drivers/net/wireless/ath/ath6kl/htc_mbox.c | 2 +- drivers/net/wireless/ath/ath9k/ar9003_eeprom.c | 2 +- drivers/net/wireless/ath/ath9k/debug.c | 2 +- drivers/net/wireless/ath/ath9k/eeprom_4k.c | 2 +- drivers/net/wireless/ath/ath9k/eeprom_9287.c | 2 +- drivers/net/wireless/ath/ath9k/eeprom_def.c | 2 +- drivers/net/wireless/ath/ath9k/hif_usb.c | 2 +- drivers/net/wireless/ath/ath9k/hw.c | 2 +- drivers/net/wireless/ath/carl9170/mac.c | 2 +- drivers/net/wireless/ath/hw.c | 2 +- drivers/net/wireless/ath/key.c | 2 +- drivers/net/wireless/broadcom/b43/main.c | 2 +- drivers/net/wireless/broadcom/b43legacy/main.c | 2 +- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h | 2 +- drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c | 2 +- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c | 2 +- drivers/net/wireless/intel/iwlegacy/3945.c | 2 +- drivers/net/wireless/intel/iwlegacy/4965.c | 2 +- drivers/net/wireless/intel/iwlwifi/dvm/led.c | 2 +- drivers/net/wireless/intel/iwlwifi/dvm/rx.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 2 +- drivers/net/wireless/marvell/libertas/cfg.c | 2 +- drivers/net/wireless/marvell/libertas/cmdresp.c | 2 +- drivers/net/wireless/marvell/mwifiex/cmdevt.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c | 2 +- drivers/net/wireless/mediatek/mt7601u/dma.h | 2 +- drivers/net/wireless/mediatek/mt7601u/eeprom.c | 2 +- drivers/net/wireless/purelifi/plfxlc/usb.c | 2 +- drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 2 +- drivers/nfc/nfcmrvl/fw_dnld.c | 2 +- drivers/nfc/nxp-nci/firmware.c | 2 +- drivers/nfc/nxp-nci/i2c.c | 2 +- drivers/nfc/pn544/i2c.c | 2 +- drivers/nvme/common/auth.c | 2 +- drivers/nvme/host/auth.c | 2 +- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/hwmon.c | 2 +- drivers/nvme/host/pr.c | 2 +- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/host/trace.c | 2 +- drivers/nvme/target/admin-cmd.c | 2 +- drivers/nvme/target/auth.c | 2 +- drivers/nvme/target/rdma.c | 2 +- drivers/nvme/target/trace.c | 2 +- drivers/pci/vpd.c | 2 +- drivers/pcmcia/cistpl.c | 2 +- drivers/peci/controller/peci-aspeed.c | 2 +- drivers/peci/request.c | 2 +- drivers/platform/arm64/acer-aspire1-ec.c | 2 +- drivers/platform/chrome/cros_ec_proto.c | 2 +- drivers/platform/chrome/cros_ec_proto_test.c | 2 +- drivers/platform/chrome/wilco_ec/properties.c | 2 +- drivers/platform/cznic/turris-omnia-mcu-gpio.c | 2 +- drivers/platform/cznic/turris-omnia-mcu.h | 2 +- drivers/platform/surface/aggregator/ssh_msgb.h | 2 +- drivers/platform/surface/aggregator/ssh_packet_layer.c | 2 +- drivers/platform/surface/aggregator/ssh_parser.c | 2 +- drivers/platform/surface/aggregator/ssh_request_layer.c | 2 +- drivers/platform/surface/aggregator/trace.h | 2 +- drivers/platform/surface/surface3_power.c | 2 +- drivers/platform/surface/surface_acpi_notify.c | 2 +- drivers/platform/surface/surface_aggregator_tabletsw.c | 2 +- drivers/platform/surface/surface_platform_profile.c | 2 +- drivers/platform/x86/asus-tf103c-dock.c | 2 +- drivers/platform/x86/dell/dell-wmi-ddv.c | 2 +- drivers/platform/x86/msi-wmi-platform.c | 2 +- drivers/platform/x86/quickstart.c | 2 +- drivers/power/supply/axp288_fuel_gauge.c | 2 +- drivers/power/supply/bq27xxx_battery_i2c.c | 2 +- drivers/power/supply/cros_peripheral_charger.c | 2 +- drivers/power/supply/max1720x_battery.c | 2 +- drivers/power/supply/rk817_charger.c | 2 +- drivers/power/supply/surface_battery.c | 2 +- drivers/power/supply/surface_charger.c | 2 +- drivers/ptp/ptp_clockmatrix.c | 2 +- drivers/ptp/ptp_fc3.c | 2 +- drivers/rtc/rtc-max31335.c | 2 +- drivers/rtc/rtc-pm8xxx.c | 2 +- drivers/scsi/aacraid/aachba.c | 2 +- drivers/scsi/csiostor/csio_lnode.c | 2 +- drivers/scsi/csiostor/csio_scsi.c | 2 +- drivers/scsi/cxlflash/lunmgt.c | 2 +- drivers/scsi/cxlflash/main.c | 2 +- drivers/scsi/cxlflash/superpipe.c | 2 +- drivers/scsi/cxlflash/vlun.c | 2 +- drivers/scsi/device_handler/scsi_dh_alua.c | 2 +- drivers/scsi/hpsa.c | 2 +- drivers/scsi/ipr.h | 2 +- drivers/scsi/libfc/fc_disc.c | 2 +- drivers/scsi/libfc/fc_elsct.c | 2 +- drivers/scsi/libfc/fc_encode.h | 2 +- drivers/scsi/libfc/fc_lport.c | 2 +- drivers/scsi/libfc/fc_rport.c | 2 +- drivers/scsi/libiscsi.c | 2 +- drivers/scsi/libsas/sas_expander.c | 2 +- drivers/scsi/lpfc/lpfc_nvme.c | 2 +- drivers/scsi/lpfc/lpfc_nvmet.c | 2 +- drivers/scsi/lpfc/lpfc_scsi.c | 2 +- drivers/scsi/megaraid/megaraid_sas_base.c | 2 +- drivers/scsi/mpi3mr/mpi3mr.h | 2 +- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 2 +- drivers/scsi/mpt3sas/mpt3sas_warpdrive.c | 2 +- drivers/scsi/mvsas/mv_sas.h | 2 +- drivers/scsi/myrb.c | 2 +- drivers/scsi/myrs.c | 2 +- drivers/scsi/qla2xxx/qla_dsd.h | 2 +- drivers/scsi/qla2xxx/qla_target.c | 2 +- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 2 +- drivers/scsi/scsi.c | 2 +- drivers/scsi/scsi_common.c | 2 +- drivers/scsi/scsi_debug.c | 2 +- drivers/scsi/scsi_error.c | 2 +- drivers/scsi/scsi_lib.c | 2 +- drivers/scsi/scsi_proto_test.c | 2 +- drivers/scsi/scsi_scan.c | 2 +- drivers/scsi/scsi_trace.c | 2 +- drivers/scsi/scsicam.c | 2 +- drivers/scsi/sd.c | 2 +- drivers/scsi/sd_zbc.c | 2 +- drivers/scsi/ses.c | 2 +- drivers/scsi/smartpqi/smartpqi_init.c | 2 +- drivers/scsi/smartpqi/smartpqi_sas_transport.c | 2 +- drivers/scsi/smartpqi/smartpqi_sis.c | 2 +- drivers/scsi/sr.c | 2 +- drivers/scsi/st.c | 2 +- drivers/soc/qcom/socinfo.c | 2 +- drivers/spi/spi-airoha-snfi.c | 2 +- drivers/spi/spi-dln2.c | 2 +- drivers/spi/spi-npcm-pspi.c | 2 +- drivers/spi/spi-orion.c | 2 +- drivers/spi/spi-rpc-if.c | 2 +- drivers/spi/spi-sh-msiof.c | 2 +- drivers/spi/spi-uniphier.c | 2 +- drivers/spi/spi-xcomm.c | 2 +- drivers/staging/media/av7110/av7110.c | 2 +- drivers/staging/rtl8192e/rtl819x_BAProc.c | 2 +- drivers/staging/rtl8723bs/core/rtw_ap.c | 2 +- drivers/staging/rtl8723bs/core/rtw_ieee80211.c | 2 +- drivers/staging/rtl8723bs/core/rtw_mlme_ext.c | 2 +- drivers/staging/rtl8723bs/core/rtw_recv.c | 2 +- drivers/staging/rtl8723bs/os_dep/recv_linux.c | 2 +- drivers/target/iscsi/cxgbit/cxgbit_target.c | 2 +- drivers/target/iscsi/iscsi_target.c | 2 +- drivers/target/iscsi/iscsi_target_tmr.c | 2 +- drivers/target/sbp/sbp_target.c | 2 +- drivers/target/target_core_alua.c | 2 +- drivers/target/target_core_device.c | 2 +- drivers/target/target_core_fabric_lib.c | 2 +- drivers/target/target_core_file.c | 2 +- drivers/target/target_core_iblock.c | 2 +- drivers/target/target_core_pr.c | 2 +- drivers/target/target_core_pscsi.c | 2 +- drivers/target/target_core_sbc.c | 2 +- drivers/target/target_core_spc.c | 2 +- drivers/target/target_core_transport.c | 2 +- drivers/target/target_core_xcopy.c | 2 +- drivers/target/tcm_fc/tfc_cmd.c | 2 +- drivers/target/tcm_fc/tfc_conf.c | 2 +- drivers/target/tcm_fc/tfc_io.c | 2 +- drivers/target/tcm_fc/tfc_sess.c | 2 +- drivers/thermal/qcom/qcom-spmi-adc-tm5.c | 2 +- drivers/tty/serial/max3100.c | 2 +- drivers/tty/vt/vc_screen.c | 2 +- drivers/ufs/core/ufs-mcq.c | 2 +- drivers/ufs/core/ufs-sysfs.c | 2 +- drivers/ufs/core/ufshcd.c | 2 +- drivers/ufs/host/ufs-exynos.c | 2 +- drivers/usb/atm/cxacru.c | 2 +- drivers/usb/atm/ueagle-atm.c | 2 +- drivers/usb/class/cdc-acm.c | 2 +- drivers/usb/class/cdc-wdm.c | 2 +- drivers/usb/core/hcd.c | 2 +- drivers/usb/fotg210/fotg210-hcd.c | 2 +- drivers/usb/gadget/composite.c | 2 +- drivers/usb/gadget/function/f_fs.c | 2 +- drivers/usb/gadget/function/f_mass_storage.c | 2 +- drivers/usb/gadget/function/f_printer.c | 2 +- drivers/usb/gadget/function/f_tcm.c | 2 +- drivers/usb/gadget/function/rndis.c | 2 +- drivers/usb/gadget/function/storage_common.h | 2 +- drivers/usb/gadget/function/uvc_video.c | 2 +- drivers/usb/gadget/legacy/tcm_usb_gadget.c | 2 +- drivers/usb/gadget/u_os_desc.h | 2 +- drivers/usb/gadget/udc/bdc/bdc.h | 2 +- drivers/usb/gadget/udc/bdc/bdc_ep.c | 2 +- drivers/usb/gadget/udc/bdc/bdc_udc.c | 2 +- drivers/usb/gadget/udc/cdns2/cdns2-ep0.c | 2 +- drivers/usb/gadget/udc/dummy_hcd.c | 2 +- drivers/usb/gadget/udc/fsl_udc_core.c | 2 +- drivers/usb/gadget/udc/goku_udc.c | 2 +- drivers/usb/gadget/udc/mv_udc_core.c | 2 +- drivers/usb/gadget/udc/net2272.c | 2 +- drivers/usb/gadget/udc/net2280.c | 2 +- drivers/usb/gadget/udc/omap_udc.c | 2 +- drivers/usb/gadget/udc/pxa25x_udc.c | 2 +- drivers/usb/gadget/udc/snps_udc_core.c | 2 +- drivers/usb/host/ehci-hcd.c | 2 +- drivers/usb/host/isp1362-hcd.c | 2 +- drivers/usb/host/ohci-da8xx.c | 2 +- drivers/usb/host/ohci-hcd.c | 2 +- drivers/usb/host/oxu210hp-hcd.c | 2 +- drivers/usb/host/sl811-hcd.c | 2 +- drivers/usb/host/xhci-hub.c | 2 +- drivers/usb/host/xhci-pci-renesas.c | 2 +- drivers/usb/isp1760/isp1760-hcd.c | 2 +- drivers/usb/misc/usb-ljca.c | 2 +- drivers/usb/musb/musb_virthub.c | 2 +- drivers/usb/phy/phy-fsl-usb.c | 2 +- drivers/usb/serial/aircable.c | 2 +- drivers/usb/serial/ch341.c | 2 +- drivers/usb/serial/cypress_m8.c | 2 +- drivers/usb/serial/kl5kusb105.c | 2 +- drivers/usb/serial/mct_u232.c | 2 +- drivers/usb/serial/mxuport.c | 2 +- drivers/usb/serial/pl2303.c | 2 +- drivers/usb/serial/quatech2.c | 2 +- drivers/usb/typec/ucsi/ucsi.h | 2 +- drivers/usb/typec/ucsi/ucsi_ccg.c | 2 +- drivers/usb/typec/ucsi/ucsi_stm32g0.c | 2 +- drivers/vhost/scsi.c | 2 +- drivers/video/fbdev/aty/mach64_accel.c | 2 +- drivers/video/fbdev/c2p_iplan2.c | 2 +- drivers/video/fbdev/c2p_planar.c | 2 +- drivers/video/fbdev/matrox/matroxfb_base.h | 2 +- drivers/video/fbdev/metronomefb.c | 2 +- drivers/video/fbdev/udlfb.c | 2 +- drivers/watchdog/ziirave_wdt.c | 2 +- fs/adfs/map.c | 2 +- fs/bcachefs/bset.c | 2 +- fs/bcachefs/inode.c | 2 +- fs/bcachefs/siphash.c | 2 +- fs/bcachefs/varint.c | 2 +- fs/binfmt_flat.c | 2 +- fs/btrfs/accessors.c | 2 +- fs/btrfs/accessors.h | 2 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/inode.c | 2 +- fs/btrfs/uuid-tree.c | 2 +- fs/ceph/export.c | 2 +- fs/ceph/super.h | 2 +- fs/crypto/keyring.c | 2 +- fs/ecryptfs/crypto.c | 2 +- fs/ecryptfs/inode.c | 2 +- fs/ecryptfs/mmap.c | 2 +- fs/erofs/zmap.c | 2 +- fs/exfat/cache.c | 2 +- fs/exfat/fatent.c | 2 +- fs/exfat/nls.c | 2 +- fs/f2fs/dir.c | 2 +- fs/f2fs/recovery.c | 2 +- fs/fat/inode.c | 2 +- fs/hfsplus/wrapper.c | 2 +- fs/hpfs/hpfs_fn.h | 2 +- fs/isofs/isofs.h | 2 +- fs/lockd/mon.c | 2 +- fs/nls/nls_ucs2_utils.c | 2 +- fs/ntfs3/lib/decompress_common.h | 2 +- fs/orangefs/orangefs-kernel.h | 2 +- fs/reiserfs/inode.c | 2 +- fs/reiserfs/reiserfs.h | 2 +- fs/smb/client/cifspdu.h | 2 +- fs/smb/client/compress/lz77.c | 2 +- fs/smb/server/unicode.c | 2 +- fs/xfs/xfs_linux.h | 2 +- include/asm-generic/Kbuild | 1 - include/asm-generic/uaccess.h | 2 +- include/crypto/chacha.h | 2 +- include/crypto/internal/ecc.h | 2 +- include/crypto/internal/poly1305.h | 2 +- include/crypto/sha1_base.h | 2 +- include/crypto/sha256_base.h | 2 +- include/crypto/sha512_base.h | 2 +- include/crypto/sm3_base.h | 2 +- include/crypto/utils.h | 2 +- include/linux/ceph/decode.h | 2 +- include/linux/ceph/libceph.h | 2 +- include/linux/etherdevice.h | 2 +- include/linux/ieee80211.h | 2 +- include/linux/mtd/map.h | 2 +- include/linux/ptp_classify.h | 2 +- include/linux/sunrpc/xdr.h | 2 +- include/linux/tpm.h | 2 +- include/{asm-generic => linux}/unaligned.h | 6 +++--- include/net/bluetooth/l2cap.h | 2 +- include/net/calipso.h | 2 +- include/net/cipso_ipv4.h | 2 +- include/net/ieee80211_radiotap.h | 2 +- include/net/mac80211.h | 2 +- include/net/mac802154.h | 2 +- include/net/netfilter/nf_tables.h | 2 +- include/rdma/ib_hdrs.h | 2 +- include/rdma/iba.h | 2 +- include/scsi/scsi_transport_fc.h | 2 +- include/target/target_core_backend.h | 2 +- kernel/bpf/core.c | 2 +- kernel/debug/gdbstub.c | 2 +- lib/842/842.h | 2 +- lib/crypto/aes.c | 2 +- lib/crypto/blake2s-generic.c | 2 +- lib/crypto/chacha.c | 2 +- lib/crypto/chacha20poly1305-selftest.c | 2 +- lib/crypto/chacha20poly1305.c | 2 +- lib/crypto/curve25519-fiat32.c | 2 +- lib/crypto/curve25519-hacl64.c | 2 +- lib/crypto/des.c | 2 +- lib/crypto/memneq.c | 2 +- lib/crypto/poly1305-donna32.c | 2 +- lib/crypto/poly1305-donna64.c | 2 +- lib/crypto/poly1305.c | 2 +- lib/crypto/sha1.c | 2 +- lib/crypto/sha256.c | 2 +- lib/crypto/utils.c | 2 +- lib/decompress_unlz4.c | 2 +- lib/decompress_unlzo.c | 2 +- lib/hexdump.c | 2 +- lib/lz4/lz4_compress.c | 2 +- lib/lz4/lz4_decompress.c | 2 +- lib/lz4/lz4defs.h | 2 +- lib/lzo/lzo1x_compress.c | 2 +- lib/lzo/lzo1x_decompress_safe.c | 2 +- lib/pldmfw/pldmfw.c | 2 +- lib/random32.c | 2 +- lib/siphash.c | 2 +- lib/string.c | 2 +- lib/vsprintf.c | 2 +- lib/xxhash.c | 2 +- lib/xz/xz_private.h | 2 +- lib/zstd/common/mem.h | 2 +- net/802/garp.c | 2 +- net/802/mrp.c | 2 +- net/batman-adv/distributed-arp-table.c | 2 +- net/bluetooth/bnep/core.c | 2 +- net/bluetooth/coredump.c | 2 +- net/bluetooth/eir.h | 2 +- net/bluetooth/hci_core.c | 2 +- net/bluetooth/hci_event.c | 2 +- net/bluetooth/hci_sock.c | 2 +- net/bluetooth/mgmt.c | 2 +- net/bluetooth/mgmt_util.c | 2 +- net/bluetooth/rfcomm/core.c | 2 +- net/bridge/br_fdb.c | 2 +- net/bridge/br_stp_bpdu.c | 2 +- net/caif/cfrfml.c | 2 +- net/core/drop_monitor.c | 2 +- net/core/filter.c | 2 +- net/core/net-traces.c | 2 +- net/core/netpoll.c | 2 +- net/core/sock.c | 2 +- net/core/tso.c | 2 +- net/dccp/ccids/ccid3.c | 2 +- net/dccp/options.c | 2 +- net/ipv4/cipso_ipv4.c | 2 +- net/ipv4/ip_options.c | 2 +- net/ipv4/tcp_input.c | 2 +- net/ipv6/addrconf.c | 2 +- net/ipv6/calipso.c | 2 +- net/mac80211/key.c | 2 +- net/mac80211/mesh.c | 2 +- net/mac80211/mesh_hwmp.c | 2 +- net/mac80211/michael.c | 2 +- net/mac80211/mlme.c | 2 +- net/mac80211/ocb.c | 2 +- net/mac80211/rx.c | 2 +- net/mac80211/status.c | 2 +- net/mac80211/tkip.c | 2 +- net/mac80211/tx.c | 2 +- net/mac80211/wep.c | 2 +- net/mac80211/wpa.c | 2 +- net/mac802154/rx.c | 2 +- net/mac802154/tx.c | 2 +- net/mptcp/crypto.c | 2 +- net/netfilter/ipvs/ip_vs_ftp.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- net/netfilter/nf_synproxy_core.c | 2 +- net/netfilter/nft_byteorder.c | 2 +- net/netfilter/nft_exthdr.c | 2 +- net/phonet/af_phonet.c | 2 +- net/sched/em_cmp.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 +- net/tls/trace.h | 2 +- net/wireless/radiotap.c | 2 +- net/xfrm/xfrm_user.c | 2 +- security/apparmor/policy_unpack.c | 2 +- security/keys/trusted-keys/trusted_tpm2.c | 2 +- sound/i2c/cs8427.c | 2 +- sound/pci/hda/hda_eld.c | 2 +- sound/pci/hda/tas2781_hda_i2c.c | 2 +- sound/soc/codecs/adau1701.c | 2 +- sound/soc/codecs/adau17x1.c | 2 +- sound/soc/codecs/pcm6240.c | 2 +- sound/soc/codecs/peb2466.c | 2 +- sound/soc/codecs/sigmadsp-i2c.c | 2 +- sound/soc/codecs/tas2781-fmwlib.c | 2 +- sound/soc/codecs/tas2781-i2c.c | 2 +- sound/soc/codecs/tas571x.c | 2 +- sound/soc/codecs/tlv320aic31xx.c | 2 +- sound/soc/codecs/wm5102.c | 2 +- sound/soc/codecs/wm8958-dsp2.c | 2 +- sound/soc/sof/iomem-utils.c | 2 +- sound/soc/sof/sof-utils.c | 2 +- tools/arch/x86/lib/insn.c | 2 +- tools/include/{asm-generic => linux}/unaligned.h | 6 +++--- tools/perf/check-headers.sh | 2 +- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +- tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c | 2 +- tools/testing/cxl/test/mem.c | 2 +- .../testing/selftests/bpf/progs/test_tcp_custom_syncookie.h | 2 +- 848 files changed, 851 insertions(+), 852 deletions(-) rename include/{asm-generic => linux}/unaligned.h (96%) rename tools/include/{asm-generic => linux}/unaligned.h (97%) diff --git a/Documentation/arch/arm/mem_alignment.rst b/Documentation/arch/arm/mem_alignment.rst index aa22893b62bc..64bd77959300 100644 --- a/Documentation/arch/arm/mem_alignment.rst +++ b/Documentation/arch/arm/mem_alignment.rst @@ -12,7 +12,7 @@ ones. Of course this is a bad idea to rely on the alignment trap to perform unaligned memory access in general. If those access are predictable, you -are better to use the macros provided by include/asm/unaligned.h. The +are better to use the macros provided by include/linux/unaligned.h. The alignment trap can fixup misaligned access for the exception cases, but at a high performance cost. It better be rare. diff --git a/Documentation/core-api/unaligned-memory-access.rst b/Documentation/core-api/unaligned-memory-access.rst index 1ee82419d8aa..5ceeb80eb539 100644 --- a/Documentation/core-api/unaligned-memory-access.rst +++ b/Documentation/core-api/unaligned-memory-access.rst @@ -203,7 +203,7 @@ Avoiding unaligned accesses =========================== The easiest way to avoid unaligned access is to use the get_unaligned() and -put_unaligned() macros provided by the header file. +put_unaligned() macros provided by the header file. Going back to an earlier example of code that potentially causes unaligned access:: diff --git a/Documentation/translations/zh_CN/core-api/unaligned-memory-access.rst b/Documentation/translations/zh_CN/core-api/unaligned-memory-access.rst index 29c33e7e0855..fbe0989a8ce5 100644 --- a/Documentation/translations/zh_CN/core-api/unaligned-memory-access.rst +++ b/Documentation/translations/zh_CN/core-api/unaligned-memory-access.rst @@ -175,7 +175,7 @@ field2会导致非对齐访问,这并不是不合理的。你会期望field2 避免非对齐访问 ============== -避免非对齐访问的最简单方法是使用头文件提供的get_unaligned()和 +避免非对齐访问的最简单方法是使用头文件提供的get_unaligned()和 put_unaligned()宏。 回到前面的一个可能导致非对齐访问的代码例子:: diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 6afae65e9a8b..a9a38c80c4a7 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index 4fdb7350636c..f57cb5a6b624 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #ifdef CONFIG_ISA_ARCV2 #include diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index 41af02081549..8d2ea2cbd98b 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include "unaligned.h" diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 9270d0a713c3..d8969dab12d4 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include extern char __start_unwind[], __end_unwind[]; diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index f5b66f4cf45d..21df5e7f51f9 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c index 4ff18044af07..20b4dff13e3a 100644 --- a/arch/arm/crypto/crc32-ce-glue.c +++ b/arch/arm/crypto/crc32-ce-glue.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #define PMULL_MIN_LEN 64L /* minimum size of buffer * for crc32_pmull_le_16 */ diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c index 3ddf05b4234d..3af997082534 100644 --- a/arch/arm/crypto/ghash-ce-glue.c +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c index 8482e302c45a..4464ffbf8fd1 100644 --- a/arch/arm/crypto/poly1305-glue.c +++ b/arch/arm/crypto/poly1305-glue.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c index c62ce89dd3e0..aeac45bfbf9f 100644 --- a/arch/arm/crypto/sha2-ce-glue.c +++ b/arch/arm/crypto/sha2-ce-glue.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include "sha256_glue.h" diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 6c9c16d767cf..f90be312418e 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index f8dd0b3cc8e0..3c6ddb1afdc4 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include "fault.h" diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index ce9b28e3c7d6..a523b519700f 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c index e921823ca103..00b8749013c5 100644 --- a/arch/arm64/crypto/aes-ce-glue.c +++ b/arch/arm64/crypto/aes-ce-glue.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 97331b454ea8..da7b7ec1a664 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c index 9c4bfd62e789..18883ea438f3 100644 --- a/arch/arm64/crypto/poly1305-glue.c +++ b/arch/arm64/crypto/poly1305-glue.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 1dd93e1fcb39..cbd14f208f83 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 0a44d2e7ee1f..6b4866a88ded 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index 250e1377c481..5662c3ac49e9 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index f3431fc62315..071f64293227 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index 54bf6ebcfffb..1a71788c4cda 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c index 7182ee683f14..8dd71ce79b69 100644 --- a/arch/arm64/crypto/sm3-neon-glue.c +++ b/arch/arm64/crypto/sm3-neon-glue.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c index 3eebea3a7b47..b7d9782827f5 100644 --- a/arch/loongarch/crypto/crc32-loongarch.c +++ b/arch/loongarch/crypto/crc32-loongarch.c @@ -13,7 +13,7 @@ #include #include -#include +#include #define _CRC32(crc, value, size, type) \ do { \ diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h index 79a749f4ad04..edff4306fa70 100644 --- a/arch/microblaze/include/asm/flat.h +++ b/arch/microblaze/include/asm/flat.h @@ -8,7 +8,7 @@ #ifndef _ASM_MICROBLAZE_FLAT_H #define _ASM_MICROBLAZE_FLAT_H -#include +#include /* * Microblaze works a little differently from other arches, because diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index adb6d5b0e6eb..90021c6a8cab 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include "decompress.h" diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c index 2a59b85f88aa..a7a1d43a1b2c 100644 --- a/arch/mips/crypto/crc32-mips.c +++ b/arch/mips/crypto/crc32-mips.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c index 867728ee535a..c03ad0bbe69c 100644 --- a/arch/mips/crypto/poly1305-glue.c +++ b/arch/mips/crypto/poly1305-glue.c @@ -5,7 +5,7 @@ * Copyright (C) 2019 Linaro Ltd. */ -#include +#include #include #include #include diff --git a/arch/nios2/kernel/misaligned.c b/arch/nios2/kernel/misaligned.c index 23e0544e117c..2f2862eab3c6 100644 --- a/arch/nios2/kernel/misaligned.c +++ b/arch/nios2/kernel/misaligned.c @@ -23,7 +23,7 @@ #include #include -#include +#include /* instructions we emulate */ #define INST_LDHU 0x0b diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c index d389359e22ac..9c83bd06ef15 100644 --- a/arch/parisc/boot/compressed/misc.c +++ b/arch/parisc/boot/compressed/misc.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include "sizes.h" diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index a111d7362d56..b9b3d527bc90 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 99897107d800..f4626943633a 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include "unaligned.h" diff --git a/arch/powerpc/crypto/aes-gcm-p10-glue.c b/arch/powerpc/crypto/aes-gcm-p10-glue.c index f62ee54076c0..f66ad56e765f 100644 --- a/arch/powerpc/crypto/aes-gcm-p10-glue.c +++ b/arch/powerpc/crypto/aes-gcm-p10-glue.c @@ -5,7 +5,7 @@ * Copyright 2022- IBM Inc. All rights reserved */ -#include +#include #include #include #include diff --git a/arch/powerpc/crypto/poly1305-p10-glue.c b/arch/powerpc/crypto/poly1305-p10-glue.c index 95dd708573ee..369686e9370b 100644 --- a/arch/powerpc/crypto/poly1305-p10-glue.c +++ b/arch/powerpc/crypto/poly1305-p10-glue.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index d95e03b3d3e3..9e297f88adc5 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #define BIND_ANY_ADDR (~0ul) diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h index fee4f25555cb..70752c7bc55f 100644 --- a/arch/sh/include/asm/flat.h +++ b/arch/sh/include/asm/flat.h @@ -9,7 +9,7 @@ #ifndef __ASM_SH_FLAT_H #define __ASM_SH_FLAT_H -#include +#include static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, u32 *addr) diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index 45c8ae20d109..a1b54bedc929 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include /* Reserve enough memory for two stack frames */ diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index b9cee98a754e..a469a80840d3 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include int apply_relocate_add(Elf32_Shdr *sechdrs, diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index 688db0dcb97d..913b9a09e885 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include "opcodes.h" diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index 6100819681b5..744e7f31e8ef 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #define MAX_DEVICES 8 diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h index 7d9d60e41e4e..1d4b6bbc1b65 100644 --- a/arch/um/include/asm/uaccess.h +++ b/arch/um/include/asm/uaccess.h @@ -8,7 +8,7 @@ #define __UM_UACCESS_H #include -#include +#include #define __under_task_size(addr, size) \ (((unsigned long) (addr) < TASK_SIZE) && \ diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index d45e9c0c42ac..f110708c8038 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -8,7 +8,7 @@ * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) */ -#include +#include #include #include #include diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 700ecaee9a08..41bc02e48916 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 5952ab41c60f..6ffb931b9fb1 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -13,7 +13,7 @@ #endif #include /*__ignore_sync_check__ */ #include /* __ignore_sync_check__ */ -#include /* __ignore_sync_check__ */ +#include /* __ignore_sync_check__ */ #include #include diff --git a/arch/xtensa/include/asm/flat.h b/arch/xtensa/include/asm/flat.h index ed5870c779f9..4854419dcd86 100644 --- a/arch/xtensa/include/asm/flat.h +++ b/arch/xtensa/include/asm/flat.h @@ -2,7 +2,7 @@ #ifndef __ASM_XTENSA_FLAT_H #define __ASM_XTENSA_FLAT_H -#include +#include static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, u32 *addr) diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h index 0a747a0c782d..e259180c8914 100644 --- a/block/partitions/ldm.h +++ b/block/partitions/ldm.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include struct parsed_partitions; diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c index b5d5c229cc3b..073be78ba0b0 100644 --- a/block/partitions/msdos.c +++ b/block/partitions/msdos.c @@ -36,7 +36,7 @@ * the nr_sects and start_sect partition table entries are * at a 2 (mod 4) address. */ -#include +#include static inline sector_t nr_sects(struct msdos_partition *p) { diff --git a/block/t10-pi.c b/block/t10-pi.c index e7052a728966..2d05421f0fa5 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include "blk.h" struct blk_integrity_iter { diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c index 666474b81c6a..3c66d425c97b 100644 --- a/crypto/aes_generic.c +++ b/crypto/aes_generic.c @@ -54,7 +54,7 @@ #include #include #include -#include +#include static inline u8 byte(const u32 x, const unsigned n) { diff --git a/crypto/blake2b_generic.c b/crypto/blake2b_generic.c index 32e380b714b6..04a712ddfb43 100644 --- a/crypto/blake2b_generic.c +++ b/crypto/blake2b_generic.c @@ -15,7 +15,7 @@ * More information about BLAKE2 can be found at https://blake2.net. */ -#include +#include #include #include #include diff --git a/crypto/blowfish_generic.c b/crypto/blowfish_generic.c index 0e74c7242e77..0146bc762c09 100644 --- a/crypto/blowfish_generic.c +++ b/crypto/blowfish_generic.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c index c04670cf51ac..197fcf3abc89 100644 --- a/crypto/camellia_generic.c +++ b/crypto/camellia_generic.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include static const u32 camellia_sp1110[256] = { 0x70707000, 0x82828200, 0x2c2c2c00, 0xececec00, diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c index 085a1eedae03..f3e57775fa02 100644 --- a/crypto/cast5_generic.c +++ b/crypto/cast5_generic.c @@ -13,7 +13,7 @@ */ -#include +#include #include #include #include diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c index 34f1ab53e3a7..11b725b12f27 100644 --- a/crypto/cast6_generic.c +++ b/crypto/cast6_generic.c @@ -10,7 +10,7 @@ */ -#include +#include #include #include #include diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c index 8beea79ab117..ba7fcb47f9aa 100644 --- a/crypto/chacha_generic.c +++ b/crypto/chacha_generic.c @@ -6,7 +6,7 @@ * Copyright (C) 2018 Google LLC */ -#include +#include #include #include #include diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c index a989cb44fd16..d1251663ed66 100644 --- a/crypto/crc32_generic.c +++ b/crypto/crc32_generic.c @@ -7,7 +7,7 @@ * This is crypto api shash wrappers to crc32_le. */ -#include +#include #include #include #include diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 768614738541..a8c90b3f4c6c 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -30,7 +30,7 @@ * Copyright (c) 2008 Herbert Xu */ -#include +#include #include #include #include diff --git a/crypto/crc64_rocksoft_generic.c b/crypto/crc64_rocksoft_generic.c index 9e812bb26dba..ce0f3059b912 100644 --- a/crypto/crc64_rocksoft_generic.c +++ b/crypto/crc64_rocksoft_generic.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include static int chksum_init(struct shash_desc *desc) { diff --git a/crypto/ecc.c b/crypto/ecc.c index 420decdad7d9..50ad2d4ed672 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include "ecc_curve_defs.h" diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c index f4c31049601c..0d14e980d4d6 100644 --- a/crypto/michael_mic.c +++ b/crypto/michael_mic.c @@ -7,7 +7,7 @@ * Copyright (c) 2004 Jouni Malinen */ #include -#include +#include #include #include #include diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c index 8a3006c3b51b..a661d4f667cd 100644 --- a/crypto/nhpoly1305.c +++ b/crypto/nhpoly1305.c @@ -30,7 +30,7 @@ * (https://cr.yp.to/mac/poly1305-20050329.pdf) */ -#include +#include #include #include #include diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c index 94af47eb6fa6..e6f29a98725a 100644 --- a/crypto/poly1305_generic.c +++ b/crypto/poly1305_generic.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include static int crypto_poly1305_init(struct shash_desc *desc) { diff --git a/crypto/polyval-generic.c b/crypto/polyval-generic.c index 16bfa6925b31..4f98910bcdb5 100644 --- a/crypto/polyval-generic.c +++ b/crypto/polyval-generic.c @@ -44,7 +44,7 @@ * */ -#include +#include #include #include #include diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c index c6bca47931e2..f6ef187be6fe 100644 --- a/crypto/serpent_generic.c +++ b/crypto/serpent_generic.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index bf147b01e313..b00521f1a6d4 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE] = { 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47, diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index 3e4069935b53..b103642b56ea 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include /* * On some 32-bit architectures (h8300), GCC ends up using diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index be70e76d6d86..ed81813bd420 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE] = { 0x38, 0xb0, 0x60, 0xa7, 0x51, 0xac, 0x96, 0x38, diff --git a/crypto/sm3.c b/crypto/sm3.c index d473e358a873..18c2fb73ba16 100644 --- a/crypto/sm3.c +++ b/crypto/sm3.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include static const u32 ____cacheline_aligned K[64] = { diff --git a/crypto/sm3_generic.c b/crypto/sm3_generic.c index a215c1c37e73..a2d23a46924e 100644 --- a/crypto/sm3_generic.c +++ b/crypto/sm3_generic.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include const u8 sm3_zero_message_hash[SM3_DIGEST_SIZE] = { 0x1A, 0xB2, 0x1D, 0x83, 0x55, 0xCF, 0xA1, 0x7F, diff --git a/crypto/sm4.c b/crypto/sm4.c index 2c44193bc27e..f4cd7edc11f0 100644 --- a/crypto/sm4.c +++ b/crypto/sm4.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include static const u32 ____cacheline_aligned fk[4] = { diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c index 560eba37dc55..7df86369ac00 100644 --- a/crypto/sm4_generic.c +++ b/crypto/sm4_generic.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include /** * sm4_setkey - Set the SM4 key. diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c index 557915e4062d..19f2b365e140 100644 --- a/crypto/twofish_generic.c +++ b/crypto/twofish_generic.c @@ -24,7 +24,7 @@ * Third Edition. */ -#include +#include #include #include #include diff --git a/crypto/vmac.c b/crypto/vmac.c index 0a1d8efa6c1a..bd9d70eac22e 100644 --- a/crypto/vmac.c +++ b/crypto/vmac.c @@ -28,7 +28,7 @@ * Last modified: 17 APR 08, 1700 PDT */ -#include +#include #include #include #include diff --git a/crypto/xxhash_generic.c b/crypto/xxhash_generic.c index 55d1c8a76127..ac206ad4184d 100644 --- a/crypto/xxhash_generic.c +++ b/crypto/xxhash_generic.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #define XXHASH64_BLOCK_SIZE 32 #define XXHASH64_DIGEST_SIZE 8 diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index c7c26872f4ce..9c84f3da7c09 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "apei-internal.h" diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index 73903a497d73..5c22720f43cc 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include "apei-internal.h" diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index f4599261cfc3..2c9289ac2824 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 5b06e236aabe..b73b3aa92f3f 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index cdb20a700b55..c085dd81ebe7 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index c8b119a06bb2..9c76fb1ad2ec 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include "libata.h" #include "libata-transport.h" diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a4aedf7e1775..f915e3df57a9 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c index 8a7034b41d50..a816f9e10255 100644 --- a/drivers/auxdisplay/ht16k33.c +++ b/drivers/auxdisplay/ht16k33.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include "line-display.h" diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 9ed842d17642..4ded93687c1f 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index cc9077b588d7..1ad4eb5c3c8f 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include "aoe.h" diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 923a134fd766..66e617664c14 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "aoe.h" #define NECODES 5 diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 5d65c9754d83..720fc30e2ecc 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -25,7 +25,7 @@ #include "drbd_protocol.h" #include "drbd_req.h" #include "drbd_state_change.h" -#include +#include #include #include diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 499c110465e3..65b96c083b3c 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -71,7 +71,7 @@ #include #include -#include +#include #define DRIVER_NAME "pktcdvd" diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index ce97b336fbfb..fc796f1dbda9 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #define VERSION "1.0" diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index f9a7c790d7e2..eef00467905e 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 1ccbb5157515..438b92967bc3 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index fda47948c35d..5252125b003f 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index 2b7c80043aa2..9bbf20502163 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 497e4c87f5be..11d33cd7b08f 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -10,7 +10,7 @@ * */ -#include +#include #include #include #include diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index aa87c3e78871..64e4d835af52 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -8,7 +8,7 @@ * */ -#include +#include #include #include #include diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 7c2030cec10e..5ea0d23e88c0 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/bluetooth/btrsi.c b/drivers/bluetooth/btrsi.c index 0c91d7635ac3..6c1f584c8a33 100644 --- a/drivers/bluetooth/btrsi.c +++ b/drivers/bluetooth/btrsi.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #define RSI_DMA_ALIGN 8 diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 2d95b3ea046d..0bcb44cf7b31 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 6c9c761d5b93..f23c8801ad5c 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/bluetooth/h4_recv.h b/drivers/bluetooth/h4_recv.h index 647d37ca4cdd..28cf2d8c2d48 100644 --- a/drivers/bluetooth/h4_recv.h +++ b/drivers/bluetooth/h4_recv.h @@ -6,7 +6,7 @@ * Copyright (C) 2015-2018 Intel Corporation */ -#include +#include struct h4_recv_pkt { u8 type; /* Packet type */ diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c index 77a5454a8721..9bce53e49cfa 100644 --- a/drivers/bluetooth/hci_bcm4377.c +++ b/drivers/bluetooth/hci_bcm4377.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index 2a5a27d713f8..76878119d910 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 1d0cdf023243..9070e31a68bf 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c index 62633d9ba7c4..49bbe4975be4 100644 --- a/drivers/bluetooth/hci_nokia.c +++ b/drivers/bluetooth/hci_nokia.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 678f150229e7..37fddf6055be 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index aa6af351d02d..7651321d351c 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c index 44f60730cff4..511c67061728 100644 --- a/drivers/char/tpm/tpm2-sessions.c +++ b/drivers/char/tpm/tpm2-sessions.c @@ -71,7 +71,7 @@ #include "tpm.h" #include #include -#include +#include #include #include #include diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c index 25a66870c165..60354cd53b5c 100644 --- a/drivers/char/tpm/tpm2-space.c +++ b/drivers/char/tpm/tpm2-space.c @@ -12,7 +12,7 @@ */ #include -#include +#include #include "tpm.h" enum tpm2_handle_types { diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c index 6e8dd7387cfd..5004888c7eca 100644 --- a/drivers/clk/clk-si5341.c +++ b/drivers/clk/clk-si5341.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #define SI5341_NUM_INPUTS 4 diff --git a/drivers/comedi/drivers/usbduxsigma.c b/drivers/comedi/drivers/usbduxsigma.c index 2aaeaf44fbe5..3f215ae228b2 100644 --- a/drivers/comedi/drivers/usbduxsigma.c +++ b/drivers/comedi/drivers/usbduxsigma.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include /* timeout for the USB-transfer in ms*/ diff --git a/drivers/counter/104-quad-8.c b/drivers/counter/104-quad-8.c index ed1f57511955..4a6868b8f58b 100644 --- a/drivers/counter/104-quad-8.c +++ b/drivers/counter/104-quad-8.c @@ -22,7 +22,7 @@ #include #include -#include +#include #define QUAD8_EXTENT 32 diff --git a/drivers/counter/i8254.c b/drivers/counter/i8254.c index c41e4fdc9601..6d74e8ef92f0 100644 --- a/drivers/counter/i8254.c +++ b/drivers/counter/i8254.c @@ -15,7 +15,7 @@ #include #include -#include +#include #define I8254_COUNTER_REG(_counter) (_counter) #define I8254_CONTROL_REG 0x3 diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 1a5ad184d28f..2b8708475ac7 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c index f7893e4ac59d..434f2b271012 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c @@ -9,7 +9,7 @@ * You could find the datasheet in Documentation/arch/arm/sunxi.rst */ #include "sun4i-ss.h" -#include +#include #include /* This is a totally arbitrary value */ diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 066f08a3a040..2cfb1b8d8c7c 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -56,7 +56,7 @@ #include "sg_sw_sec4.h" #include "key_gen.h" #include "caamalg_desc.h" -#include +#include #include #include #include diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 13347dfecf7a..65f6adb6c673 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -19,7 +19,7 @@ #include "jr.h" #include "caamalg_desc.h" #include -#include +#include #include #include #include diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index 44e1f8f46967..e809d030ab11 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #define CAAM_CRA_PRIORITY 2000 diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c index 42677f7458b7..919e5a2cab95 100644 --- a/drivers/crypto/inside-secure/safexcel_cipher.c +++ b/drivers/crypto/inside-secure/safexcel_cipher.c @@ -5,7 +5,7 @@ * Antoine Tenart */ -#include +#include #include #include #include diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index a235e6c300f1..69d6019d8abc 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -9,7 +9,7 @@ * Some ideas are from marvell/cesa.c and s5p-sss.c driver. */ -#include +#include #include #include #include diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c index b0cf6d2fd352..e0faddbf8990 100644 --- a/drivers/crypto/stm32/stm32-crc32.c +++ b/drivers/crypto/stm32/stm32-crc32.c @@ -17,7 +17,7 @@ #include -#include +#include #define DRIVER_NAME "stm32-crc32" #define CHKSUM_DIGEST_SIZE 4 diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 946f8e44455f..5175138c4fb7 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 9167cfba7f59..8672b42ee4d1 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 37164174b5fb..188412d45e0d 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ -#include +#include #include #include #include diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index a6538a5f5c9f..d2d43a4fc053 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2021 Intel Corporation. All rights reserved. */ #include -#include +#include #include #include #include diff --git a/drivers/cxl/security.c b/drivers/cxl/security.c index 452d1a9b9148..ab793e8577c7 100644 --- a/drivers/cxl/security.c +++ b/drivers/cxl/security.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ #include -#include +#include #include #include #include diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 7a4d1a478e33..1bf0e15c1540 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index 6d9227db473f..c4b8e7ff88aa 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -22,7 +22,7 @@ #include #include -#include +#include #include "protocols.h" #include "notify.h" diff --git a/drivers/firmware/arm_scmi/protocols.h b/drivers/firmware/arm_scmi/protocols.h index 8e95f53bd7b7..aaee57cdcd55 100644 --- a/drivers/firmware/arm_scmi/protocols.h +++ b/drivers/firmware/arm_scmi/protocols.h @@ -22,7 +22,7 @@ #include #include -#include +#include #define PROTOCOL_REV_MINOR_MASK GENMASK(15, 0) #define PROTOCOL_REV_MAJOR_MASK GENMASK(31, 16) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 23b002e4d4a0..fde0656481cc 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #ifndef SMBIOS_ENTRY_POINT_SCAN_START #define SMBIOS_ENTRY_POINT_SCAN_START 0xF0000 diff --git a/drivers/firmware/efi/fdtparams.c b/drivers/firmware/efi/fdtparams.c index 0ec83ba58097..b815d2a754ee 100644 --- a/drivers/firmware/efi/fdtparams.c +++ b/drivers/firmware/efi/fdtparams.c @@ -8,7 +8,7 @@ #include #include -#include +#include enum { SYSTAB, diff --git a/drivers/firmware/efi/libstub/riscv-stub.c b/drivers/firmware/efi/libstub/riscv-stub.c index c96d6dcee86c..e7d9204baee3 100644 --- a/drivers/firmware/efi/libstub/riscv-stub.c +++ b/drivers/firmware/efi/libstub/riscv-stub.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include "efistub.h" diff --git a/drivers/firmware/efi/libstub/riscv.c b/drivers/firmware/efi/libstub/riscv.c index 8022b104c3e6..f66f33ceb99e 100644 --- a/drivers/firmware/efi/libstub/riscv.c +++ b/drivers/firmware/efi/libstub/riscv.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include "efistub.h" diff --git a/drivers/firmware/efi/libstub/zboot.c b/drivers/firmware/efi/libstub/zboot.c index 1ceace956758..af23b3c50228 100644 --- a/drivers/firmware/efi/libstub/zboot.c +++ b/drivers/firmware/efi/libstub/zboot.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include "efistub.h" diff --git a/drivers/fpga/microchip-spi.c b/drivers/fpga/microchip-spi.c index 2a82c726d6e5..6134cea86ac8 100644 --- a/drivers/fpga/microchip-spi.c +++ b/drivers/fpga/microchip-spi.c @@ -3,7 +3,7 @@ * Microchip Polarfire FPGA programming over slave SPI interface. */ -#include +#include #include #include #include diff --git a/drivers/fsi/fsi-occ.c b/drivers/fsi/fsi-occ.c index f58b158d097c..a6d4c8f123a5 100644 --- a/drivers/fsi/fsi-occ.c +++ b/drivers/fsi/fsi-occ.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #define OCC_SRAM_BYTES 4096 #define OCC_CMD_DATA_BYTES 4090 diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 09715b506468..81d195d366ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c index dee640ab1d3a..41f72d458487 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c @@ -47,7 +47,7 @@ #include #include -#include +#include #include "cdns-mhdp8546-core.h" #include "cdns-mhdp8546-hdcp.h" diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c index 5e3b8edcf794..31832ba4017f 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index e7e53a9e42af..430f8adebf9c 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -10,7 +10,7 @@ * Tomasz Figa */ -#include +#include #include #include diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 6bb755e9f0a5..26b8d137bce0 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -6,7 +6,7 @@ * Andrzej Hajda */ -#include +#include #include #include diff --git a/drivers/gpu/drm/bridge/tc358775.c b/drivers/gpu/drm/bridge/tc358775.c index 3b7cc3be2ccd..0b4efaca6d68 100644 --- a/drivers/gpu/drm/bridge/tc358775.c +++ b/drivers/gpu/drm/bridge/tc358775.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 84698a0b27a8..582cf4f73a74 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index d8951464bd2b..f0e3be0fe420 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/gpu/drm/nouveau/include/nvif/os.h b/drivers/gpu/drm/nouveau/include/nvif/os.h index a2eaf3929ac3..4a1123b81fee 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/os.h +++ b/drivers/gpu/drm/nouveau/include/nvif/os.h @@ -30,7 +30,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index 5bc3e6b41c34..b31125eb9a65 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c index 5ff1037a3453..62224992988f 100644 --- a/drivers/gpu/drm/udl/udl_transfer.c +++ b/drivers/gpu/drm/udl/udl_transfer.c @@ -7,7 +7,7 @@ * Copyright (C) 2009 Bernie Thompson */ -#include +#include #include "udl_drv.h" #include "udl_proto.h" diff --git a/drivers/greybus/es2.c b/drivers/greybus/es2.c index 69e46b1dff1f..7630a36ecf81 100644 --- a/drivers/greybus/es2.c +++ b/drivers/greybus/es2.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "arpc.h" #include "greybus_trace.h" diff --git a/drivers/greybus/gb-beagleplay.c b/drivers/greybus/gb-beagleplay.c index 3a1ade84737c..473ac3f2d382 100644 --- a/drivers/greybus/gb-beagleplay.c +++ b/drivers/greybus/gb-beagleplay.c @@ -6,7 +6,7 @@ * Copyright (c) 2023 BeagleBoard.org Foundation */ -#include +#include #include #include #include diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c index 669d769ea1dc..ba00f6e6324b 100644 --- a/drivers/hid/hid-alps.c +++ b/drivers/hid/hid-alps.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include "hid-ids.h" /* ALPS Device Product ID */ diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 30de92d0bf0f..612ee6ddfc8d 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/hid/hid-generic.c b/drivers/hid/hid-generic.c index f9db991d3c5a..d2439399fb35 100644 --- a/drivers/hid/hid-generic.c +++ b/drivers/hid/hid-generic.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/hid/hid-goodix-spi.c b/drivers/hid/hid-goodix-spi.c index 0e59663814dd..0f87bf9c67cf 100644 --- a/drivers/hid/hid-goodix-spi.c +++ b/drivers/hid/hid-goodix-spi.c @@ -4,7 +4,7 @@ * * Copyright (C) 2024 Godix, Inc. */ -#include +#include #include #include #include diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 4e79fafeeafa..22683ec819aa 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include "hid-ids.h" #include "hid-vivaldi-common.h" diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c index 32344331282f..bd96bfa7af70 100644 --- a/drivers/hid/hid-kye.c +++ b/drivers/hid/hid-kye.c @@ -8,7 +8,7 @@ * Copyright (c) 2023 David Yang */ -#include +#include #include #include #include diff --git a/drivers/hid/hid-letsketch.c b/drivers/hid/hid-letsketch.c index 229820fda960..8602c63ed9c6 100644 --- a/drivers/hid/hid-letsketch.c +++ b/drivers/hid/hid-letsketch.c @@ -41,7 +41,7 @@ #include #include -#include +#include #include "hid-ids.h" diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index d9580bc3e19a..34fa71ceec2b 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -13,7 +13,7 @@ #include #include #include /* For to_usb_interface for kvm extra intf check */ -#include +#include #include "hid-ids.h" #define DJ_MAX_PAIRED_DEVICES 7 diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 0e33fa0eb8db..cf7a6986cf20 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include "usbhid/usbhid.h" #include "hid-ids.h" diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 58cd0506e431..55153a2f7988 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -29,7 +29,7 @@ */ #include "hid-ids.h" -#include +#include #include #include #include diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index 0d90d7ee693c..1468fb11e39d 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include "hid-ids.h" diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index df29c614e490..d2486f3734f0 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include "hid-ids.h" diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index 87fd4eb76c70..ef26c7defcf6 100644 --- a/drivers/hid/hid-uclogic-params.c +++ b/drivers/hid/hid-uclogic-params.c @@ -19,7 +19,7 @@ #include "hid-ids.h" #include #include -#include +#include /** * uclogic_params_pen_inrange_to_str() - Convert a pen in-range reporting type diff --git a/drivers/hid/hid-uclogic-rdesc.c b/drivers/hid/hid-uclogic-rdesc.c index 964d17e08f26..9b9cbc2aae36 100644 --- a/drivers/hid/hid-uclogic-rdesc.c +++ b/drivers/hid/hid-uclogic-rdesc.c @@ -16,7 +16,7 @@ #include "hid-uclogic-rdesc.h" #include -#include +#include #include /* Fixed WP4030U report descriptor */ diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 2f8a9d3f1e86..be5d342d5d13 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/hid/surface-hid/surface_hid.c b/drivers/hid/surface-hid/surface_hid.c index 61e5814b0ad7..eae47e0d95ed 100644 --- a/drivers/hid/surface-hid/surface_hid.c +++ b/drivers/hid/surface-hid/surface_hid.c @@ -8,7 +8,7 @@ * Maximilian Luz */ -#include +#include #include #include #include diff --git a/drivers/hid/surface-hid/surface_hid_core.c b/drivers/hid/surface-hid/surface_hid_core.c index a3e9cceddfac..6690c24f28f0 100644 --- a/drivers/hid/surface-hid/surface_hid_core.c +++ b/drivers/hid/surface-hid/surface_hid_core.c @@ -7,7 +7,7 @@ * Copyright (C) 2019-2021 Maximilian Luz */ -#include +#include #include #include #include diff --git a/drivers/hid/surface-hid/surface_kbd.c b/drivers/hid/surface-hid/surface_kbd.c index 8c0cbb2deb11..383200d9121a 100644 --- a/drivers/hid/surface-hid/surface_kbd.c +++ b/drivers/hid/surface-hid/surface_kbd.c @@ -7,7 +7,7 @@ * Copyright (C) 2019-2021 Maximilian Luz */ -#include +#include #include #include #include diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index cb687ea7325c..a9e85bdd4cc6 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/hid/wacom.h b/drivers/hid/wacom.h index 77c5fb26cd14..6f1443999d1d 100644 --- a/drivers/hid/wacom.h +++ b/drivers/hid/wacom.h @@ -89,7 +89,7 @@ #include #include #include -#include +#include /* * Version Information diff --git a/drivers/hwmon/adt7310.c b/drivers/hwmon/adt7310.c index 25281739aa3b..6a834a37bc65 100644 --- a/drivers/hwmon/adt7310.c +++ b/drivers/hwmon/adt7310.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "adt7x10.h" diff --git a/drivers/hwmon/aquacomputer_d5next.c b/drivers/hwmon/aquacomputer_d5next.c index 8e55cd2f46f5..34cac27e4dde 100644 --- a/drivers/hwmon/aquacomputer_d5next.c +++ b/drivers/hwmon/aquacomputer_d5next.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #define USB_VENDOR_ID_AQUACOMPUTER 0x0c70 #define USB_PRODUCT_ID_AQUAERO 0xf001 diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index ee396f21fac5..9555366aeaf0 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -34,7 +34,7 @@ #include #include -#include +#include static char *mutex_path_override; diff --git a/drivers/hwmon/asus_rog_ryujin.c b/drivers/hwmon/asus_rog_ryujin.c index f8b20346a995..e5e93a20723c 100644 --- a/drivers/hwmon/asus_rog_ryujin.c +++ b/drivers/hwmon/asus_rog_ryujin.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #define DRIVER_NAME "asus_rog_ryujin" diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index f9b3a3030f13..f5bdf842040e 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -38,7 +38,7 @@ #include #include -#include +#include #define I8K_SMM_FN_STATUS 0x0025 #define I8K_SMM_POWER_STATUS 0x0069 diff --git a/drivers/hwmon/gigabyte_waterforce.c b/drivers/hwmon/gigabyte_waterforce.c index 8129d7b3ceaf..27487e215bdd 100644 --- a/drivers/hwmon/gigabyte_waterforce.c +++ b/drivers/hwmon/gigabyte_waterforce.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #define DRIVER_NAME "gigabyte_waterforce" diff --git a/drivers/hwmon/nzxt-kraken2.c b/drivers/hwmon/nzxt-kraken2.c index 7caf387eb144..ed38645a1dc2 100644 --- a/drivers/hwmon/nzxt-kraken2.c +++ b/drivers/hwmon/nzxt-kraken2.c @@ -9,7 +9,7 @@ * Copyright 2019-2021 Jonas Malaco */ -#include +#include #include #include #include diff --git a/drivers/hwmon/nzxt-kraken3.c b/drivers/hwmon/nzxt-kraken3.c index 00f3ac90a290..d00409bcab93 100644 --- a/drivers/hwmon/nzxt-kraken3.c +++ b/drivers/hwmon/nzxt-kraken3.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #define USB_VENDOR_ID_NZXT 0x1e71 #define USB_PRODUCT_ID_X53 0x2007 diff --git a/drivers/hwmon/nzxt-smart2.c b/drivers/hwmon/nzxt-smart2.c index df6fa72a6b59..c2d1173f42fe 100644 --- a/drivers/hwmon/nzxt-smart2.c +++ b/drivers/hwmon/nzxt-smart2.c @@ -14,7 +14,7 @@ #include #include -#include +#include /* * The device has only 3 fan channels/connectors. But all HID reports have diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index dd690f700d49..9486db249c64 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "common.h" diff --git a/drivers/hwmon/occ/p8_i2c.c b/drivers/hwmon/occ/p8_i2c.c index 31159606cec7..5817a099c622 100644 --- a/drivers/hwmon/occ/p8_i2c.c +++ b/drivers/hwmon/occ/p8_i2c.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include "common.h" diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c index 9bcaa29a7191..541d808d62d0 100644 --- a/drivers/i2c/busses/i2c-nvidia-gpu.c +++ b/drivers/i2c/busses/i2c-nvidia-gpu.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include "i2c-ccgx-ucsi.h" diff --git a/drivers/iio/accel/adxl355_core.c b/drivers/iio/accel/adxl355_core.c index 0c9225d18fb2..eabaefa92f19 100644 --- a/drivers/iio/accel/adxl355_core.c +++ b/drivers/iio/accel/adxl355_core.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include "adxl355.h" diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index 1c046e96aef9..e790a66d86c7 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include "adxl367.h" diff --git a/drivers/iio/accel/adxl380.c b/drivers/iio/accel/adxl380.c index 98863e22bb6b..f80527d899be 100644 --- a/drivers/iio/accel/adxl380.c +++ b/drivers/iio/accel/adxl380.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c index 89db242f06e0..f4e43c3bbf1a 100644 --- a/drivers/iio/accel/bma400_core.c +++ b/drivers/iio/accel/bma400_core.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/accel/bmi088-accel-core.c b/drivers/iio/accel/bmi088-accel-core.c index 469a1255d93c..fc1c1613d673 100644 --- a/drivers/iio/accel/bmi088-accel-core.c +++ b/drivers/iio/accel/bmi088-accel-core.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include "bmi088-accel.h" diff --git a/drivers/iio/accel/dmard09.c b/drivers/iio/accel/dmard09.c index 6644c1fec3e6..4ec70ca6910d 100644 --- a/drivers/iio/accel/dmard09.c +++ b/drivers/iio/accel/dmard09.c @@ -5,7 +5,7 @@ * Copyright (c) 2016, Jelle van der Waa */ -#include +#include #include #include #include diff --git a/drivers/iio/accel/sca3300.c b/drivers/iio/accel/sca3300.c index fca77d660625..ca0ce83e42b2 100644 --- a/drivers/iio/accel/sca3300.c +++ b/drivers/iio/accel/sca3300.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index e134d6497827..de32cc9d18c5 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index e2bed2d648f2..ea4aabd3960a 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -23,7 +23,7 @@ #include #include -#include +#include #define AD_SD_COMM_CHAN_MASK 0x3 diff --git a/drivers/iio/adc/axp20x_adc.c b/drivers/iio/adc/axp20x_adc.c index d43c8d124a0c..6c1a5d1b0a83 100644 --- a/drivers/iio/adc/axp20x_adc.c +++ b/drivers/iio/adc/axp20x_adc.c @@ -5,7 +5,7 @@ * Quentin Schulz */ -#include +#include #include #include #include diff --git a/drivers/iio/adc/intel_mrfld_adc.c b/drivers/iio/adc/intel_mrfld_adc.c index 0590a126f321..30733252aa56 100644 --- a/drivers/iio/adc/intel_mrfld_adc.c +++ b/drivers/iio/adc/intel_mrfld_adc.c @@ -25,7 +25,7 @@ #include #include -#include +#include #define BCOVE_GPADCREQ 0xDC #define BCOVE_GPADCREQ_BUSY BIT(0) diff --git a/drivers/iio/adc/ltc2497.c b/drivers/iio/adc/ltc2497.c index f010b2fd1202..eb9d521e86e5 100644 --- a/drivers/iio/adc/ltc2497.c +++ b/drivers/iio/adc/ltc2497.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include "ltc2497.h" diff --git a/drivers/iio/adc/max11100.c b/drivers/iio/adc/max11100.c index 2f07437caec3..520e37f75aac 100644 --- a/drivers/iio/adc/max11100.c +++ b/drivers/iio/adc/max11100.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/iio/adc/max11410.c b/drivers/iio/adc/max11410.c index 45368850b220..f0dc4b460903 100644 --- a/drivers/iio/adc/max11410.c +++ b/drivers/iio/adc/max11410.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/adc/mcp3422.c b/drivers/iio/adc/mcp3422.c index 0778a8fb6866..50834fdcf738 100644 --- a/drivers/iio/adc/mcp3422.c +++ b/drivers/iio/adc/mcp3422.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c index d0e77971c5d3..b097f04172c8 100644 --- a/drivers/iio/adc/mcp3911.c +++ b/drivers/iio/adc/mcp3911.c @@ -23,7 +23,7 @@ #include #include -#include +#include #define MCP3911_REG_CHANNEL0 0x00 #define MCP3911_REG_CHANNEL1 0x03 diff --git a/drivers/iio/adc/mt6360-adc.c b/drivers/iio/adc/mt6360-adc.c index e2ec805e834f..83161e6d29b9 100644 --- a/drivers/iio/adc/mt6360-adc.c +++ b/drivers/iio/adc/mt6360-adc.c @@ -16,7 +16,7 @@ #include #include -#include +#include #define MT6360_REG_PMUCHGCTRL3 0x313 #define MT6360_REG_PMUADCCFG 0x356 diff --git a/drivers/iio/adc/pac1921.c b/drivers/iio/adc/pac1921.c index 4c2a1c07bc39..36e813d9c73f 100644 --- a/drivers/iio/adc/pac1921.c +++ b/drivers/iio/adc/pac1921.c @@ -5,7 +5,7 @@ * Copyright (C) 2024 Matteo Martelli */ -#include +#include #include #include #include diff --git a/drivers/iio/adc/pac1934.c b/drivers/iio/adc/pac1934.c index 8210728034d0..7ef249d83286 100644 --- a/drivers/iio/adc/pac1934.c +++ b/drivers/iio/adc/pac1934.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include /* * maximum accumulation time should be (17 * 60 * 1000) around 17 minutes@1024 sps diff --git a/drivers/iio/adc/qcom-spmi-rradc.c b/drivers/iio/adc/qcom-spmi-rradc.c index 6aa70b4629a7..63ebaf13ef19 100644 --- a/drivers/iio/adc/qcom-spmi-rradc.c +++ b/drivers/iio/adc/qcom-spmi-rradc.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/adc/ti-ads124s08.c b/drivers/iio/adc/ti-ads124s08.c index 14941f384dad..425b48d8986f 100644 --- a/drivers/iio/adc/ti-ads124s08.c +++ b/drivers/iio/adc/ti-ads124s08.c @@ -21,7 +21,7 @@ #include #include -#include +#include /* Commands */ #define ADS124S08_CMD_NOP 0x00 diff --git a/drivers/iio/adc/ti-ads1298.c b/drivers/iio/adc/ti-ads1298.c index 13cb32125eef..0f9f75baaebb 100644 --- a/drivers/iio/adc/ti-ads1298.c +++ b/drivers/iio/adc/ti-ads1298.c @@ -23,7 +23,7 @@ #include #include -#include +#include /* Commands */ #define ADS1298_CMD_WAKEUP 0x02 diff --git a/drivers/iio/adc/ti-ads131e08.c b/drivers/iio/adc/ti-ads131e08.c index 91a427eb0882..31f1f229d97a 100644 --- a/drivers/iio/adc/ti-ads131e08.c +++ b/drivers/iio/adc/ti-ads131e08.c @@ -23,7 +23,7 @@ #include #include -#include +#include /* Commands */ #define ADS131E08_CMD_RESET 0x06 diff --git a/drivers/iio/adc/ti-tsc2046.c b/drivers/iio/adc/ti-tsc2046.c index 311d97001249..b56f2503f14c 100644 --- a/drivers/iio/adc/ti-tsc2046.c +++ b/drivers/iio/adc/ti-tsc2046.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/addac/ad74115.c b/drivers/iio/addac/ad74115.c index 12dc43d487b4..3ee0dd5537c1 100644 --- a/drivers/iio/addac/ad74115.c +++ b/drivers/iio/addac/ad74115.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c index 2410d72da49b..69c586525d21 100644 --- a/drivers/iio/addac/ad74413r.c +++ b/drivers/iio/addac/ad74413r.c @@ -4,7 +4,7 @@ * Author: Cosmin Tanislav */ -#include +#include #include #include #include diff --git a/drivers/iio/amplifiers/ada4250.c b/drivers/iio/amplifiers/ada4250.c index 4b32d350dc5d..566f0e1c98a5 100644 --- a/drivers/iio/amplifiers/ada4250.c +++ b/drivers/iio/amplifiers/ada4250.c @@ -14,7 +14,7 @@ #include #include -#include +#include /* ADA4250 Register Map */ #define ADA4250_REG_GAIN_MUX 0x00 diff --git a/drivers/iio/cdc/ad7746.c b/drivers/iio/cdc/ad7746.c index d11bc3496dda..ba18dbbe0940 100644 --- a/drivers/iio/cdc/ad7746.c +++ b/drivers/iio/cdc/ad7746.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index 5d2e750ca2b9..0b96534c6867 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include "bme680.h" diff --git a/drivers/iio/chemical/pms7003.c b/drivers/iio/chemical/pms7003.c index 43025866d5b7..d0bd94912e0a 100644 --- a/drivers/iio/chemical/pms7003.c +++ b/drivers/iio/chemical/pms7003.c @@ -5,7 +5,7 @@ * Copyright (c) Tomasz Duszynski */ -#include +#include #include #include #include diff --git a/drivers/iio/chemical/scd30_i2c.c b/drivers/iio/chemical/scd30_i2c.c index bd3b01ded246..b31dfaf52df9 100644 --- a/drivers/iio/chemical/scd30_i2c.c +++ b/drivers/iio/chemical/scd30_i2c.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include "scd30.h" diff --git a/drivers/iio/chemical/scd30_serial.c b/drivers/iio/chemical/scd30_serial.c index 2adb76dbb020..55044f07d5a3 100644 --- a/drivers/iio/chemical/scd30_serial.c +++ b/drivers/iio/chemical/scd30_serial.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "scd30.h" diff --git a/drivers/iio/chemical/scd4x.c b/drivers/iio/chemical/scd4x.c index ca6b20270711..52cad54e8572 100644 --- a/drivers/iio/chemical/scd4x.c +++ b/drivers/iio/chemical/scd4x.c @@ -11,7 +11,7 @@ * https://www.sensirion.com/file/datasheet_scd4x */ -#include +#include #include #include #include diff --git a/drivers/iio/chemical/sps30_i2c.c b/drivers/iio/chemical/sps30_i2c.c index 5c31299813ec..1b21b6bcd0e7 100644 --- a/drivers/iio/chemical/sps30_i2c.c +++ b/drivers/iio/chemical/sps30_i2c.c @@ -6,7 +6,7 @@ * * I2C slave address: 0x69 */ -#include +#include #include #include #include diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index c69399ac6657..1b4287991d00 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include "st_sensors_core.h" diff --git a/drivers/iio/dac/ad3552r.c b/drivers/iio/dac/ad3552r.c index bd37d304ca70..7d61b2fe6624 100644 --- a/drivers/iio/dac/ad3552r.c +++ b/drivers/iio/dac/ad3552r.c @@ -5,7 +5,7 @@ * * Copyright 2021 Analog Devices Inc. */ -#include +#include #include #include #include diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c index 7712dc6be608..905988724f27 100644 --- a/drivers/iio/dac/ad5064.c +++ b/drivers/iio/dac/ad5064.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c index 8103d2cd13f6..708629efc157 100644 --- a/drivers/iio/dac/ad5446.c +++ b/drivers/iio/dac/ad5446.c @@ -22,7 +22,7 @@ #include #include -#include +#include #define MODE_PWRDWN_1k 0x1 #define MODE_PWRDWN_100k 0x2 diff --git a/drivers/iio/dac/ad5449.c b/drivers/iio/dac/ad5449.c index 953fcfa2110b..1c996016756a 100644 --- a/drivers/iio/dac/ad5449.c +++ b/drivers/iio/dac/ad5449.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/iio/dac/ad5593r.c b/drivers/iio/dac/ad5593r.c index fae5e5a0e72f..62e1fbb9e910 100644 --- a/drivers/iio/dac/ad5593r.c +++ b/drivers/iio/dac/ad5593r.c @@ -13,7 +13,7 @@ #include #include -#include +#include #define AD5593R_MODE_CONF (0 << 4) #define AD5593R_MODE_DAC_WRITE (1 << 4) diff --git a/drivers/iio/dac/ad5624r_spi.c b/drivers/iio/dac/ad5624r_spi.c index 7e6f824de299..9304d0499bae 100644 --- a/drivers/iio/dac/ad5624r_spi.c +++ b/drivers/iio/dac/ad5624r_spi.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include "ad5624r.h" diff --git a/drivers/iio/dac/ad5766.c b/drivers/iio/dac/ad5766.c index 899894523752..f658ac8086aa 100644 --- a/drivers/iio/dac/ad5766.c +++ b/drivers/iio/dac/ad5766.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #define AD5766_UPPER_WORD_SPI_MASK GENMASK(31, 16) #define AD5766_LOWER_WORD_SPI_MASK GENMASK(15, 0) diff --git a/drivers/iio/dac/ad7293.c b/drivers/iio/dac/ad7293.c index 06f05750d921..1d4032670482 100644 --- a/drivers/iio/dac/ad7293.c +++ b/drivers/iio/dac/ad7293.c @@ -16,7 +16,7 @@ #include #include -#include +#include #define AD7293_R1B BIT(16) #define AD7293_R2B BIT(17) diff --git a/drivers/iio/dac/ltc2632.c b/drivers/iio/dac/ltc2632.c index 3a3c4f4874e4..a4fb2509c950 100644 --- a/drivers/iio/dac/ltc2632.c +++ b/drivers/iio/dac/ltc2632.c @@ -13,7 +13,7 @@ #include #include -#include +#include #define LTC2632_CMD_WRITE_INPUT_N 0x0 #define LTC2632_CMD_UPDATE_DAC_N 0x1 diff --git a/drivers/iio/dac/mcp4821.c b/drivers/iio/dac/mcp4821.c index 782e8f6b7782..c1a59bbbba3c 100644 --- a/drivers/iio/dac/mcp4821.c +++ b/drivers/iio/dac/mcp4821.c @@ -23,7 +23,7 @@ #include #include -#include +#include #define MCP4821_ACTIVE_MODE BIT(12) #define MCP4802_SECOND_CHAN BIT(15) diff --git a/drivers/iio/frequency/adf4377.c b/drivers/iio/frequency/adf4377.c index 25fbc2cef1f7..45ceeb828d6b 100644 --- a/drivers/iio/frequency/adf4377.c +++ b/drivers/iio/frequency/adf4377.c @@ -20,7 +20,7 @@ #include #include -#include +#include /* ADF4377 REG0000 Map */ #define ADF4377_0000_SOFT_RESET_R_MSK BIT(7) diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c index c0cd5d9844fe..8ef583680ad0 100644 --- a/drivers/iio/frequency/admv1013.c +++ b/drivers/iio/frequency/admv1013.c @@ -18,7 +18,7 @@ #include #include -#include +#include /* ADMV1013 Register Map */ #define ADMV1013_REG_SPI_CONTROL 0x00 diff --git a/drivers/iio/frequency/admv1014.c b/drivers/iio/frequency/admv1014.c index b46b73b89eb7..986b87a72577 100644 --- a/drivers/iio/frequency/admv1014.c +++ b/drivers/iio/frequency/admv1014.c @@ -19,7 +19,7 @@ #include #include -#include +#include /* ADMV1014 Register Map */ #define ADMV1014_REG_SPI_CONTROL 0x00 diff --git a/drivers/iio/frequency/admv4420.c b/drivers/iio/frequency/admv4420.c index 863ba8e98c95..3ae462b4f5c9 100644 --- a/drivers/iio/frequency/admv4420.c +++ b/drivers/iio/frequency/admv4420.c @@ -13,7 +13,7 @@ #include #include -#include +#include /* ADMV4420 Register Map */ #define ADMV4420_SPI_CONFIG_1 0x00 diff --git a/drivers/iio/frequency/adrf6780.c b/drivers/iio/frequency/adrf6780.c index 3f46032c9275..57ee908fc747 100644 --- a/drivers/iio/frequency/adrf6780.c +++ b/drivers/iio/frequency/adrf6780.c @@ -16,7 +16,7 @@ #include #include -#include +#include /* ADRF6780 Register Map */ #define ADRF6780_REG_CONTROL 0x00 diff --git a/drivers/iio/gyro/adis16130.c b/drivers/iio/gyro/adis16130.c index 33cde9e6fca5..2535e3c94037 100644 --- a/drivers/iio/gyro/adis16130.c +++ b/drivers/iio/gyro/adis16130.c @@ -12,7 +12,7 @@ #include -#include +#include #define ADIS16130_CON 0x0 #define ADIS16130_CON_RD (1 << 6) diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c index 85637e8ac45f..13e1dd4dd62c 100644 --- a/drivers/iio/health/afe4403.c +++ b/drivers/iio/health/afe4403.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include "afe440x.h" diff --git a/drivers/iio/humidity/ens210.c b/drivers/iio/humidity/ens210.c index e9167574203a..77418d97f30d 100644 --- a/drivers/iio/humidity/ens210.c +++ b/drivers/iio/humidity/ens210.c @@ -22,7 +22,7 @@ #include #include -#include +#include /* register definitions */ #define ENS210_REG_PART_ID 0x00 diff --git a/drivers/iio/humidity/hdc3020.c b/drivers/iio/humidity/hdc3020.c index a82dcc3da421..ffb25596d3a8 100644 --- a/drivers/iio/humidity/hdc3020.c +++ b/drivers/iio/humidity/hdc3020.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c index 876848b42f69..99410733c1ca 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/iio/imu/bmi323/bmi323_core.c b/drivers/iio/imu/bmi323/bmi323_core.c index 671401ce80dc..beda8d2de53f 100644 --- a/drivers/iio/imu/bmi323/bmi323_core.c +++ b/drivers/iio/imu/bmi323/bmi323_core.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/light/apds9306.c b/drivers/iio/light/apds9306.c index 66a063ea3db4..079e02be1005 100644 --- a/drivers/iio/light/apds9306.c +++ b/drivers/iio/light/apds9306.c @@ -28,7 +28,7 @@ #include #include -#include +#include #define APDS9306_MAIN_CTRL_REG 0x00 #define APDS9306_ALS_MEAS_RATE_REG 0x04 diff --git a/drivers/iio/light/gp2ap020a00f.c b/drivers/iio/light/gp2ap020a00f.c index b3f87dded040..81e718cdeae3 100644 --- a/drivers/iio/light/gp2ap020a00f.c +++ b/drivers/iio/light/gp2ap020a00f.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/iio/light/ltr390.c b/drivers/iio/light/ltr390.c index 7e58b50f3660..4f6975e63a8f 100644 --- a/drivers/iio/light/ltr390.c +++ b/drivers/iio/light/ltr390.c @@ -27,7 +27,7 @@ #include -#include +#include #define LTR390_MAIN_CTRL 0x00 #define LTR390_ALS_UVS_MEAS_RATE 0x04 diff --git a/drivers/iio/light/ltrf216a.c b/drivers/iio/light/ltrf216a.c index bc8444516689..37eecff571b9 100644 --- a/drivers/iio/light/ltrf216a.c +++ b/drivers/iio/light/ltrf216a.c @@ -26,7 +26,7 @@ #include -#include +#include #define LTRF216A_ALS_RESET_MASK BIT(4) #define LTRF216A_ALS_DATA_STATUS BIT(3) diff --git a/drivers/iio/light/si1133.c b/drivers/iio/light/si1133.c index eeff6cc792f2..44fa152dbd24 100644 --- a/drivers/iio/light/si1133.c +++ b/drivers/iio/light/si1133.c @@ -17,7 +17,7 @@ #include -#include +#include #define SI1133_REG_PART_ID 0x00 #define SI1133_REG_REV_ID 0x01 diff --git a/drivers/iio/light/tsl2591.c b/drivers/iio/light/tsl2591.c index 7bdbfe72f0f0..850c2465992f 100644 --- a/drivers/iio/light/tsl2591.c +++ b/drivers/iio/light/tsl2591.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/light/zopt2201.c b/drivers/iio/light/zopt2201.c index 327f94e447af..604be60e92ac 100644 --- a/drivers/iio/light/zopt2201.c +++ b/drivers/iio/light/zopt2201.c @@ -19,7 +19,7 @@ #include #include -#include +#include #define ZOPT2201_DRV_NAME "zopt2201" diff --git a/drivers/iio/magnetometer/rm3100-core.c b/drivers/iio/magnetometer/rm3100-core.c index 0e03a772fa43..baab918b3825 100644 --- a/drivers/iio/magnetometer/rm3100-core.c +++ b/drivers/iio/magnetometer/rm3100-core.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include "rm3100.h" diff --git a/drivers/iio/magnetometer/yamaha-yas530.c b/drivers/iio/magnetometer/yamaha-yas530.c index 7b041bb38693..65011a8598d3 100644 --- a/drivers/iio/magnetometer/yamaha-yas530.c +++ b/drivers/iio/magnetometer/yamaha-yas530.c @@ -43,7 +43,7 @@ #include #include -#include +#include /* Commonly used registers */ #define YAS5XX_DEVICE_ID 0x80 diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index da379230c837..a8b97b9b0461 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -46,7 +46,7 @@ #include #include -#include +#include #include "bmp280.h" diff --git a/drivers/iio/pressure/dlhl60d.c b/drivers/iio/pressure/dlhl60d.c index c1cea9d40424..e99e97ea6300 100644 --- a/drivers/iio/pressure/dlhl60d.c +++ b/drivers/iio/pressure/dlhl60d.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include /* Commands */ #define DLH_START_SINGLE 0xAA diff --git a/drivers/iio/pressure/hp206c.c b/drivers/iio/pressure/hp206c.c index 261af1562827..442740941933 100644 --- a/drivers/iio/pressure/hp206c.c +++ b/drivers/iio/pressure/hp206c.c @@ -18,7 +18,7 @@ #include #include -#include +#include /* I2C commands: */ #define HP206C_CMD_SOFT_RST 0x06 diff --git a/drivers/iio/pressure/hsc030pa.c b/drivers/iio/pressure/hsc030pa.c index 1682b90d4557..4e6f10eeabc3 100644 --- a/drivers/iio/pressure/hsc030pa.c +++ b/drivers/iio/pressure/hsc030pa.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include "hsc030pa.h" diff --git a/drivers/iio/pressure/mprls0025pa.c b/drivers/iio/pressure/mprls0025pa.c index 33a15d4c642c..3b6145348c2e 100644 --- a/drivers/iio/pressure/mprls0025pa.c +++ b/drivers/iio/pressure/mprls0025pa.c @@ -26,7 +26,7 @@ #include -#include +#include #include "mprls0025pa.h" diff --git a/drivers/iio/pressure/ms5611_i2c.c b/drivers/iio/pressure/ms5611_i2c.c index 9a0f52321fcb..7e2cb8b6afa2 100644 --- a/drivers/iio/pressure/ms5611_i2c.c +++ b/drivers/iio/pressure/ms5611_i2c.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include "ms5611.h" diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c index cc9d1f68c53c..87181963a3e3 100644 --- a/drivers/iio/pressure/ms5611_spi.c +++ b/drivers/iio/pressure/ms5611_spi.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include "ms5611.h" diff --git a/drivers/iio/pressure/sdp500.c b/drivers/iio/pressure/sdp500.c index 6ff32e3fa637..9828c73c4855 100644 --- a/drivers/iio/pressure/sdp500.c +++ b/drivers/iio/pressure/sdp500.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #define SDP500_CRC8_POLYNOMIAL 0x31 /* x8+x5+x4+1 (normalized to 0x31) */ #define SDP500_READ_SIZE 3 diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index 80176e3083af..597bf268ea51 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include "st_pressure.h" diff --git a/drivers/iio/pressure/zpa2326.c b/drivers/iio/pressure/zpa2326.c index dcc87a9015e8..950f8dee2b26 100644 --- a/drivers/iio/pressure/zpa2326.c +++ b/drivers/iio/pressure/zpa2326.c @@ -64,7 +64,7 @@ #include #include #include -#include +#include #include "zpa2326.h" /* 200 ms should be enough for the longest conversion time in one-shot mode. */ diff --git a/drivers/iio/proximity/aw96103.c b/drivers/iio/proximity/aw96103.c index db9d78e961fd..707ba0a510aa 100644 --- a/drivers/iio/proximity/aw96103.c +++ b/drivers/iio/proximity/aw96103.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #define AW_DATA_PROCESS_FACTOR 1024 #define AW96103_CHIP_ID 0xa961 diff --git a/drivers/iio/proximity/cros_ec_mkbp_proximity.c b/drivers/iio/proximity/cros_ec_mkbp_proximity.c index cff57d851762..c25472b14d4b 100644 --- a/drivers/iio/proximity/cros_ec_mkbp_proximity.c +++ b/drivers/iio/proximity/cros_ec_mkbp_proximity.c @@ -21,7 +21,7 @@ #include #include -#include +#include struct cros_ec_mkbp_proximity_data { struct cros_ec_device *ec; diff --git a/drivers/iio/proximity/hx9023s.c b/drivers/iio/proximity/hx9023s.c index 8b9f84400e00..d8fb34060d3d 100644 --- a/drivers/iio/proximity/hx9023s.c +++ b/drivers/iio/proximity/hx9023s.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/proximity/irsd200.c b/drivers/iio/proximity/irsd200.c index 323ac6dac90e..6e96b764fed8 100644 --- a/drivers/iio/proximity/irsd200.c +++ b/drivers/iio/proximity/irsd200.c @@ -5,7 +5,7 @@ * Copyright (C) 2023 Axis Communications AB */ -#include +#include #include #include #include diff --git a/drivers/iio/temperature/ltc2983.c b/drivers/iio/temperature/ltc2983.c index 21f2cfc55bf8..f8ea2219ab48 100644 --- a/drivers/iio/temperature/ltc2983.c +++ b/drivers/iio/temperature/ltc2983.c @@ -22,7 +22,7 @@ #include #include -#include +#include /* register map */ #define LTC2983_STATUS_REG 0x0000 diff --git a/drivers/iio/temperature/max31856.c b/drivers/iio/temperature/max31856.c index 8307aae2cb45..7ddec5cbe558 100644 --- a/drivers/iio/temperature/max31856.c +++ b/drivers/iio/temperature/max31856.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include /* * The MSB of the register value determines whether the following byte will diff --git a/drivers/iio/temperature/max31865.c b/drivers/iio/temperature/max31865.c index 29e23652ba5a..5a6fbe3c80e5 100644 --- a/drivers/iio/temperature/max31865.c +++ b/drivers/iio/temperature/max31865.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include /* * The MSB of the register value determines whether the following byte will diff --git a/drivers/input/joystick/adafruit-seesaw.c b/drivers/input/joystick/adafruit-seesaw.c index 5c775ca886a5..c248c15b849d 100644 --- a/drivers/input/joystick/adafruit-seesaw.c +++ b/drivers/input/joystick/adafruit-seesaw.c @@ -15,7 +15,7 @@ * - Add interrupt support */ -#include +#include #include #include #include diff --git a/drivers/input/joystick/adc-joystick.c b/drivers/input/joystick/adc-joystick.c index 02713e624df1..ff44f9978b71 100644 --- a/drivers/input/joystick/adc-joystick.c +++ b/drivers/input/joystick/adc-joystick.c @@ -11,7 +11,7 @@ #include #include -#include +#include struct adc_joystick_axis { u32 code; diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c index 84b87526b7ba..55e6321adab9 100644 --- a/drivers/input/joystick/iforce/iforce-main.c +++ b/drivers/input/joystick/iforce/iforce-main.c @@ -6,7 +6,7 @@ * USB/RS232 I-Force joysticks and wheels. */ -#include +#include #include "iforce.h" MODULE_AUTHOR("Vojtech Pavlik , Johann Deneux "); diff --git a/drivers/input/joystick/iforce/iforce-packets.c b/drivers/input/joystick/iforce/iforce-packets.c index 763642c8cee9..08c889a72f6c 100644 --- a/drivers/input/joystick/iforce/iforce-packets.c +++ b/drivers/input/joystick/iforce/iforce-packets.c @@ -6,7 +6,7 @@ * USB/RS232 I-Force joysticks and wheels. */ -#include +#include #include "iforce.h" static struct { diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c index 49101f1c858b..4f2221001a95 100644 --- a/drivers/input/joystick/spaceball.c +++ b/drivers/input/joystick/spaceball.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #define DRIVER_DESC "SpaceTec SpaceBall 2003/3003/4000 FLX driver" diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c index 707c5a8ae736..2c993fa8306a 100644 --- a/drivers/input/keyboard/applespi.c +++ b/drivers/input/keyboard/applespi.c @@ -57,7 +57,7 @@ #include #include -#include +#include #define CREATE_TRACE_POINTS #include "applespi.h" diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c index 12eb9df180ee..4c81b20ff6af 100644 --- a/drivers/input/keyboard/cros_ec_keyb.c +++ b/drivers/input/keyboard/cros_ec_keyb.c @@ -27,7 +27,7 @@ #include #include -#include +#include /** * struct cros_ec_keyb - Structure representing EC keyboard device diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index 058f3470b7ae..4215f9b9c2b0 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #define IMS_PCU_KEYMAP_LEN 32 diff --git a/drivers/input/misc/iqs7222.c b/drivers/input/misc/iqs7222.c index 9ca5a743f19f..be80a31de9f8 100644 --- a/drivers/input/misc/iqs7222.c +++ b/drivers/input/misc/iqs7222.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #define IQS7222_PROD_NUM 0x00 #define IQS7222_PROD_NUM_A 840 diff --git a/drivers/input/mouse/cyapa_gen3.c b/drivers/input/mouse/cyapa_gen3.c index 60c83bc71d84..fc3fb954523b 100644 --- a/drivers/input/mouse/cyapa_gen3.c +++ b/drivers/input/mouse/cyapa_gen3.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include "cyapa.h" diff --git a/drivers/input/mouse/cyapa_gen5.c b/drivers/input/mouse/cyapa_gen5.c index 2e6bcb07257e..3b4439f10635 100644 --- a/drivers/input/mouse/cyapa_gen5.c +++ b/drivers/input/mouse/cyapa_gen5.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include "cyapa.h" diff --git a/drivers/input/mouse/cyapa_gen6.c b/drivers/input/mouse/cyapa_gen6.c index 4ffe08fee10c..570c06dcef78 100644 --- a/drivers/input/mouse/cyapa_gen6.c +++ b/drivers/input/mouse/cyapa_gen6.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include "cyapa.h" diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index ce96513b34f6..7521981274bd 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include "elan_i2c.h" diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c index 13dc097eb6c6..15cf4463b64e 100644 --- a/drivers/input/mouse/elan_i2c_i2c.c +++ b/drivers/input/mouse/elan_i2c_i2c.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include "elan_i2c.h" diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index b4723ea395eb..79ad98cc1e79 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include "psmouse.h" #include "elantech.h" #include "elan_i2c.h" diff --git a/drivers/input/rmi4/rmi_f01.c b/drivers/input/rmi4/rmi_f01.c index cc1d4b424640..47be64284b25 100644 --- a/drivers/input/rmi4/rmi_f01.c +++ b/drivers/input/rmi4/rmi_f01.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include "rmi_driver.h" #define RMI_PRODUCT_ID_LENGTH 10 diff --git a/drivers/input/rmi4/rmi_f34.c b/drivers/input/rmi4/rmi_f34.c index 3b3ac71e53dc..e2468bc04a5c 100644 --- a/drivers/input/rmi4/rmi_f34.c +++ b/drivers/input/rmi4/rmi_f34.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include "rmi_driver.h" diff --git a/drivers/input/rmi4/rmi_f34v7.c b/drivers/input/rmi4/rmi_f34v7.c index 886557b01eba..fd49acc02071 100644 --- a/drivers/input/rmi4/rmi_f34v7.c +++ b/drivers/input/rmi4/rmi_f34v7.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include "rmi_driver.h" #include "rmi_f34.h" diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c index 2d176fbab251..2b3fbb0455d5 100644 --- a/drivers/input/tablet/aiptek.c +++ b/drivers/input/tablet/aiptek.c @@ -63,7 +63,7 @@ #include #include #include -#include +#include /* * Aiptek status packet: diff --git a/drivers/input/tablet/kbtab.c b/drivers/input/tablet/kbtab.c index 38d36d25f6f4..794caa102909 100644 --- a/drivers/input/tablet/kbtab.c +++ b/drivers/input/tablet/kbtab.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include /* * Pressure-threshold modules param code from Alex Perry diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index f89c0dd15d8b..607f18af7010 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include /* * This code has been heavily tested on a Nokia 770, and lightly diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index cfc92157701f..3ddabc5a2c99 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/input/touchscreen/chipone_icn8505.c b/drivers/input/touchscreen/chipone_icn8505.c index c1b4fc28fa8d..cde0e4789503 100644 --- a/drivers/input/touchscreen/chipone_icn8505.c +++ b/drivers/input/touchscreen/chipone_icn8505.c @@ -8,7 +8,7 @@ * Hans de Goede */ -#include +#include #include #include #include diff --git a/drivers/input/touchscreen/cy8ctma140.c b/drivers/input/touchscreen/cy8ctma140.c index 567c9dcaac91..2d4b6e343203 100644 --- a/drivers/input/touchscreen/cy8ctma140.c +++ b/drivers/input/touchscreen/cy8ctma140.c @@ -16,7 +16,7 @@ * same. */ -#include +#include #include #include #include diff --git a/drivers/input/touchscreen/cyttsp5.c b/drivers/input/touchscreen/cyttsp5.c index 3ca246ab192e..eafe5a9b8964 100644 --- a/drivers/input/touchscreen/cyttsp5.c +++ b/drivers/input/touchscreen/cyttsp5.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #define CYTTSP5_NAME "cyttsp5" #define CY_I2C_DATA_SIZE (2 * 256) diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index e70415f189a5..fda49b2fe088 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -32,7 +32,7 @@ #include #include -#include +#include #define WORK_REGISTER_THRESHOLD 0x00 #define WORK_REGISTER_REPORT_RATE 0x08 diff --git a/drivers/input/touchscreen/eeti_ts.c b/drivers/input/touchscreen/eeti_ts.c index 48c69788b84a..87eb18977b71 100644 --- a/drivers/input/touchscreen/eeti_ts.c +++ b/drivers/input/touchscreen/eeti_ts.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include struct eeti_ts { struct i2c_client *client; diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 365765d40e62..3fd170f75b4a 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include /* Device, Driver information */ #define DEVICE_NAME "elants_i2c" diff --git a/drivers/input/touchscreen/exc3000.c b/drivers/input/touchscreen/exc3000.c index 2e77cfb63f32..fdda8412b164 100644 --- a/drivers/input/touchscreen/exc3000.c +++ b/drivers/input/touchscreen/exc3000.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #define EXC3000_NUM_SLOTS 10 #define EXC3000_SLOTS_PER_FRAME 5 diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 435714f18c23..a3e8a51c9144 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include "goodix.h" #define GOODIX_GPIO_INT_NAME "irq" diff --git a/drivers/input/touchscreen/goodix_berlin_core.c b/drivers/input/touchscreen/goodix_berlin_core.c index 0bfca897ce5a..3fc03cf0ca23 100644 --- a/drivers/input/touchscreen/goodix_berlin_core.c +++ b/drivers/input/touchscreen/goodix_berlin_core.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "goodix_berlin.h" diff --git a/drivers/input/touchscreen/goodix_berlin_spi.c b/drivers/input/touchscreen/goodix_berlin_spi.c index a2d80e84391b..0662e87b8692 100644 --- a/drivers/input/touchscreen/goodix_berlin_spi.c +++ b/drivers/input/touchscreen/goodix_berlin_spi.c @@ -7,7 +7,7 @@ * * Based on goodix_ts_berlin driver. */ -#include +#include #include #include #include diff --git a/drivers/input/touchscreen/hideep.c b/drivers/input/touchscreen/hideep.c index 682abbbe5bd6..a73369e15dda 100644 --- a/drivers/input/touchscreen/hideep.c +++ b/drivers/input/touchscreen/hideep.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #define HIDEEP_TS_NAME "HiDeep Touchscreen" #define HIDEEP_I2C_NAME "hideep_ts" diff --git a/drivers/input/touchscreen/hycon-hy46xx.c b/drivers/input/touchscreen/hycon-hy46xx.c index 2e01d87977c1..b2ff7a45b908 100644 --- a/drivers/input/touchscreen/hycon-hy46xx.c +++ b/drivers/input/touchscreen/hycon-hy46xx.c @@ -15,7 +15,7 @@ #include #include -#include +#include #define HY46XX_CHKSUM_CODE 0x1 #define HY46XX_FINGER_NUM 0x2 diff --git a/drivers/input/touchscreen/hynitron_cstxxx.c b/drivers/input/touchscreen/hynitron_cstxxx.c index f72834859282..1d8ca90dcda6 100644 --- a/drivers/input/touchscreen/hynitron_cstxxx.c +++ b/drivers/input/touchscreen/hynitron_cstxxx.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include /* Per chip data */ struct hynitron_ts_chip_data { diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 4573844c3395..260c83dc23a2 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #define ILI2XXX_POLL_PERIOD 15 diff --git a/drivers/input/touchscreen/ilitek_ts_i2c.c b/drivers/input/touchscreen/ilitek_ts_i2c.c index 5569641f05f6..0dd632724a00 100644 --- a/drivers/input/touchscreen/ilitek_ts_i2c.c +++ b/drivers/input/touchscreen/ilitek_ts_i2c.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #define ILITEK_TS_NAME "ilitek_ts" diff --git a/drivers/input/touchscreen/iqs5xx.c b/drivers/input/touchscreen/iqs5xx.c index 4d226118f3cc..4ebd7565ae6e 100644 --- a/drivers/input/touchscreen/iqs5xx.c +++ b/drivers/input/touchscreen/iqs5xx.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #define IQS5XX_FW_FILE_LEN 64 #define IQS5XX_NUM_RETRIES 10 diff --git a/drivers/input/touchscreen/iqs7211.c b/drivers/input/touchscreen/iqs7211.c index f0a56cde899e..c5d447ee6f53 100644 --- a/drivers/input/touchscreen/iqs7211.c +++ b/drivers/input/touchscreen/iqs7211.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #define IQS7211_PROD_NUM 0x00 diff --git a/drivers/input/touchscreen/melfas_mip4.c b/drivers/input/touchscreen/melfas_mip4.c index b99a0e3c4084..a6946e3d8376 100644 --- a/drivers/input/touchscreen/melfas_mip4.c +++ b/drivers/input/touchscreen/melfas_mip4.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #define MIP4_DEVICE_NAME "mip4_ts" diff --git a/drivers/input/touchscreen/novatek-nvt-ts.c b/drivers/input/touchscreen/novatek-nvt-ts.c index 1a797e410a3f..0afee41ac9de 100644 --- a/drivers/input/touchscreen/novatek-nvt-ts.c +++ b/drivers/input/touchscreen/novatek-nvt-ts.c @@ -15,7 +15,7 @@ #include #include -#include +#include #define NVT_TS_TOUCH_START 0x00 #define NVT_TS_TOUCH_SIZE 6 diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c index 4ede0687beb0..83bf27085ebc 100644 --- a/drivers/input/touchscreen/pixcir_i2c_ts.c +++ b/drivers/input/touchscreen/pixcir_i2c_ts.c @@ -5,7 +5,7 @@ * Copyright (C) 2010-2011 Pixcir, Inc. */ -#include +#include #include #include #include diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 92d75057de2d..f975b53e8825 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include /* Slave I2C mode */ #define RM_BOOT_BLDR 0x02 diff --git a/drivers/input/touchscreen/s6sy761.c b/drivers/input/touchscreen/s6sy761.c index a529217e748f..e1518a75a51b 100644 --- a/drivers/input/touchscreen/s6sy761.c +++ b/drivers/input/touchscreen/s6sy761.c @@ -4,7 +4,7 @@ // Copyright (c) 2017 Samsung Electronics Co., Ltd. // Copyright (c) 2017 Andi Shyti -#include +#include #include #include #include diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index 6a42b27c4599..5ccc96764742 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -24,7 +24,7 @@ #include #include -#include +#include #define SILEAD_TS_NAME "silead_ts" diff --git a/drivers/input/touchscreen/sis_i2c.c b/drivers/input/touchscreen/sis_i2c.c index 2023c6df416f..a625f2ad809d 100644 --- a/drivers/input/touchscreen/sis_i2c.c +++ b/drivers/input/touchscreen/sis_i2c.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #define SIS_I2C_NAME "sis_i2c_ts" diff --git a/drivers/input/touchscreen/surface3_spi.c b/drivers/input/touchscreen/surface3_spi.c index 7efbcd0fde4f..6074b7730e86 100644 --- a/drivers/input/touchscreen/surface3_spi.c +++ b/drivers/input/touchscreen/surface3_spi.c @@ -18,7 +18,7 @@ #include #include -#include +#include #define SURFACE3_PACKET_SIZE 264 diff --git a/drivers/input/touchscreen/wacom_i2c.c b/drivers/input/touchscreen/wacom_i2c.c index 486230985bf0..fd97a83f5664 100644 --- a/drivers/input/touchscreen/wacom_i2c.c +++ b/drivers/input/touchscreen/wacom_i2c.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include /* Bitmasks (for data[3]) */ #define WACOM_TIP_SWITCH BIT(0) diff --git a/drivers/input/touchscreen/wdt87xx_i2c.c b/drivers/input/touchscreen/wdt87xx_i2c.c index 698fc7e0ee7f..27941245e962 100644 --- a/drivers/input/touchscreen/wdt87xx_i2c.c +++ b/drivers/input/touchscreen/wdt87xx_i2c.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #define WDT87XX_NAME "wdt87xx_i2c" #define WDT87XX_FW_NAME "wdt87xx_fw.bin" diff --git a/drivers/input/touchscreen/zet6223.c b/drivers/input/touchscreen/zet6223.c index 27333fded9a9..943634ba9cd9 100644 --- a/drivers/input/touchscreen/zet6223.c +++ b/drivers/input/touchscreen/zet6223.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #define ZET6223_MAX_FINGERS 16 #define ZET6223_MAX_PKT_SIZE (3 + 4 * ZET6223_MAX_FINGERS) diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c index 4b8c4ebfff96..df42fdf36ae3 100644 --- a/drivers/input/touchscreen/zforce_ts.c +++ b/drivers/input/touchscreen/zforce_ts.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #define WAIT_TIMEOUT msecs_to_jiffies(1000) diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c index 509b362d6465..044e4961376c 100644 --- a/drivers/isdn/hardware/mISDN/avmfritz.c +++ b/drivers/isdn/hardware/mISDN/avmfritz.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include "ipac.h" diff --git a/drivers/leds/rgb/leds-mt6370-rgb.c b/drivers/leds/rgb/leds-mt6370-rgb.c index 359ef00498b4..10a0b5b45227 100644 --- a/drivers/leds/rgb/leds-mt6370-rgb.c +++ b/drivers/leds/rgb/leds-mt6370-rgb.c @@ -21,7 +21,7 @@ #include #include -#include +#include enum { MT6370_LED_ISNK1 = 0, diff --git a/drivers/macintosh/adb-iop.c b/drivers/macintosh/adb-iop.c index 2633bc254935..126dd1cfba8e 100644 --- a/drivers/macintosh/adb-iop.c +++ b/drivers/macintosh/adb-iop.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 5228b03b6fe0..1ae2c71bb383 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/md/dm-vdo/murmurhash3.c b/drivers/md/dm-vdo/murmurhash3.c index 3a989efae142..13008b089206 100644 --- a/drivers/md/dm-vdo/murmurhash3.c +++ b/drivers/md/dm-vdo/murmurhash3.c @@ -8,7 +8,7 @@ #include "murmurhash3.h" -#include +#include static inline u64 rotl64(u64 x, s8 r) { diff --git a/drivers/md/dm-vdo/numeric.h b/drivers/md/dm-vdo/numeric.h index dc8c400b21d2..f568dc59e6f1 100644 --- a/drivers/md/dm-vdo/numeric.h +++ b/drivers/md/dm-vdo/numeric.h @@ -6,7 +6,7 @@ #ifndef UDS_NUMERIC_H #define UDS_NUMERIC_H -#include +#include #include #include diff --git a/drivers/media/dvb-frontends/mxl5xx.c b/drivers/media/dvb-frontends/mxl5xx.c index 91e9c378397c..930da176e5bf 100644 --- a/drivers/media/dvb-frontends/mxl5xx.c +++ b/drivers/media/dvb-frontends/mxl5xx.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include "mxl5xx.h" diff --git a/drivers/media/i2c/ccs/ccs-reg-access.c b/drivers/media/i2c/ccs/ccs-reg-access.c index ed79075505e6..a696a0ec8ff5 100644 --- a/drivers/media/i2c/ccs/ccs-reg-access.c +++ b/drivers/media/i2c/ccs/ccs-reg-access.c @@ -9,7 +9,7 @@ * Contact: Sakari Ailus */ -#include +#include #include #include diff --git a/drivers/media/i2c/hi556.c b/drivers/media/i2c/hi556.c index b440f386f062..f31f9886c924 100644 --- a/drivers/media/i2c/hi556.c +++ b/drivers/media/i2c/hi556.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Intel Corporation. -#include +#include #include #include #include diff --git a/drivers/media/i2c/hi846.c b/drivers/media/i2c/hi846.c index 52d9ca68a86c..172772decd3d 100644 --- a/drivers/media/i2c/hi846.c +++ b/drivers/media/i2c/hi846.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2021 Purism SPC -#include +#include #include #include #include diff --git a/drivers/media/i2c/hi847.c b/drivers/media/i2c/hi847.c index 72c60747a839..546833f5b5f5 100644 --- a/drivers/media/i2c/hi847.c +++ b/drivers/media/i2c/hi847.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2022 Intel Corporation. -#include +#include #include #include #include diff --git a/drivers/media/i2c/imx208.c b/drivers/media/i2c/imx208.c index 639e05340dbb..2184c90f7864 100644 --- a/drivers/media/i2c/imx208.c +++ b/drivers/media/i2c/imx208.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #define IMX208_REG_MODE_SELECT 0x0100 #define IMX208_MODE_STANDBY 0x00 diff --git a/drivers/media/i2c/imx258.c b/drivers/media/i2c/imx258.c index 1a99eaaff21a..9e30fce1f223 100644 --- a/drivers/media/i2c/imx258.c +++ b/drivers/media/i2c/imx258.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #define IMX258_REG_MODE_SELECT CCI_REG8(0x0100) #define IMX258_MODE_STANDBY 0x00 diff --git a/drivers/media/i2c/imx290.c b/drivers/media/i2c/imx290.c index 4150e6e4b9a6..458905dfb3e1 100644 --- a/drivers/media/i2c/imx290.c +++ b/drivers/media/i2c/imx290.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/media/i2c/imx319.c b/drivers/media/i2c/imx319.c index 8fe3933f3146..dd1b4ff983dc 100644 --- a/drivers/media/i2c/imx319.c +++ b/drivers/media/i2c/imx319.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2018 Intel Corporation -#include +#include #include #include #include diff --git a/drivers/media/i2c/imx334.c b/drivers/media/i2c/imx334.c index 40863d87d341..a544fc3df39c 100644 --- a/drivers/media/i2c/imx334.c +++ b/drivers/media/i2c/imx334.c @@ -4,7 +4,7 @@ * * Copyright (C) 2021 Intel Corporation */ -#include +#include #include #include diff --git a/drivers/media/i2c/imx335.c b/drivers/media/i2c/imx335.c index 54a1de53d497..fcfd1d851bd4 100644 --- a/drivers/media/i2c/imx335.c +++ b/drivers/media/i2c/imx335.c @@ -4,7 +4,7 @@ * * Copyright (C) 2021 Intel Corporation */ -#include +#include #include #include diff --git a/drivers/media/i2c/imx355.c b/drivers/media/i2c/imx355.c index 0dd25eeea60b..b2dce67c0b6b 100644 --- a/drivers/media/i2c/imx355.c +++ b/drivers/media/i2c/imx355.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2018 Intel Corporation -#include +#include #include #include #include diff --git a/drivers/media/i2c/imx412.c b/drivers/media/i2c/imx412.c index 7d1f7af0a9df..0bfe3046fcc8 100644 --- a/drivers/media/i2c/imx412.c +++ b/drivers/media/i2c/imx412.c @@ -4,7 +4,7 @@ * * Copyright (C) 2021 Intel Corporation */ -#include +#include #include #include diff --git a/drivers/media/i2c/ir-kbd-i2c.c b/drivers/media/i2c/ir-kbd-i2c.c index b37a2aaf8ac0..c84e1e0e6109 100644 --- a/drivers/media/i2c/ir-kbd-i2c.c +++ b/drivers/media/i2c/ir-kbd-i2c.c @@ -35,7 +35,7 @@ * Copyright (C) 2011 Andy Walls */ -#include +#include #include #include #include diff --git a/drivers/media/i2c/og01a1b.c b/drivers/media/i2c/og01a1b.c index e906435fc49a..78d5d406e4b7 100644 --- a/drivers/media/i2c/og01a1b.c +++ b/drivers/media/i2c/og01a1b.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2022 Intel Corporation. -#include +#include #include #include #include diff --git a/drivers/media/i2c/ov01a10.c b/drivers/media/i2c/ov01a10.c index 5606437f37d0..0b9fb1ddbe59 100644 --- a/drivers/media/i2c/ov01a10.c +++ b/drivers/media/i2c/ov01a10.c @@ -3,7 +3,7 @@ * Copyright (c) 2023 Intel Corporation. */ -#include +#include #include #include diff --git a/drivers/media/i2c/ov08x40.c b/drivers/media/i2c/ov08x40.c index 48df077522ad..7ead3c720e0e 100644 --- a/drivers/media/i2c/ov08x40.c +++ b/drivers/media/i2c/ov08x40.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2022 Intel Corporation. -#include +#include #include #include #include diff --git a/drivers/media/i2c/ov2740.c b/drivers/media/i2c/ov2740.c index c48dbcde9877..bd0b2f0f0d45 100644 --- a/drivers/media/i2c/ov2740.c +++ b/drivers/media/i2c/ov2740.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Intel Corporation. -#include +#include #include #include #include diff --git a/drivers/media/i2c/ov5670.c b/drivers/media/i2c/ov5670.c index 2aee85965cf7..f051045d340f 100644 --- a/drivers/media/i2c/ov5670.c +++ b/drivers/media/i2c/ov5670.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2017 Intel Corporation. -#include +#include #include #include #include diff --git a/drivers/media/i2c/ov5675.c b/drivers/media/i2c/ov5675.c index 5b5127f8953f..2833b14ee139 100644 --- a/drivers/media/i2c/ov5675.c +++ b/drivers/media/i2c/ov5675.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Intel Corporation. -#include +#include #include #include #include diff --git a/drivers/media/i2c/ov8856.c b/drivers/media/i2c/ov8856.c index 6ffe10e57b5b..3b94338f55ed 100644 --- a/drivers/media/i2c/ov8856.c +++ b/drivers/media/i2c/ov8856.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Intel Corporation. -#include +#include #include #include #include diff --git a/drivers/media/i2c/ov8858.c b/drivers/media/i2c/ov8858.c index 174c65f76886..326f50a5ab51 100644 --- a/drivers/media/i2c/ov8858.c +++ b/drivers/media/i2c/ov8858.c @@ -5,7 +5,7 @@ * Copyright (C) 2017 Fuzhou Rockchip Electronics Co., Ltd. */ -#include +#include #include #include diff --git a/drivers/media/i2c/ov9282.c b/drivers/media/i2c/ov9282.c index 251a4b534914..9f52af6f047f 100644 --- a/drivers/media/i2c/ov9282.c +++ b/drivers/media/i2c/ov9282.c @@ -4,7 +4,7 @@ * * Copyright (C) 2021 Intel Corporation */ -#include +#include #include #include diff --git a/drivers/media/i2c/ov9734.c b/drivers/media/i2c/ov9734.c index d99728597431..bf9e2adbff34 100644 --- a/drivers/media/i2c/ov9734.c +++ b/drivers/media/i2c/ov9734.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Intel Corporation. -#include +#include #include #include #include diff --git a/drivers/media/i2c/thp7312.c b/drivers/media/i2c/thp7312.c index 75225ff5eff6..c77440ff098c 100644 --- a/drivers/media/i2c/thp7312.c +++ b/drivers/media/i2c/thp7312.c @@ -4,7 +4,7 @@ * Copyright (C) 2023 Ideas on Board Oy */ -#include +#include #include #include diff --git a/drivers/media/i2c/vgxy61.c b/drivers/media/i2c/vgxy61.c index 30378e962016..409d2d4ffb4b 100644 --- a/drivers/media/i2c/vgxy61.c +++ b/drivers/media/i2c/vgxy61.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/media/pci/bt8xx/bttv-cards.c b/drivers/media/pci/bt8xx/bttv-cards.c index 867c1308de23..365b04e5ae4d 100644 --- a/drivers/media/pci/bt8xx/bttv-cards.c +++ b/drivers/media/pci/bt8xx/bttv-cards.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include "bttvp.h" diff --git a/drivers/media/platform/chips-media/coda/coda-jpeg.c b/drivers/media/platform/chips-media/coda/coda-jpeg.c index ba8f41002917..5746892658b1 100644 --- a/drivers/media/platform/chips-media/coda/coda-jpeg.c +++ b/drivers/media/platform/chips-media/coda/coda-jpeg.c @@ -5,7 +5,7 @@ * Copyright (C) 2014 Philipp Zabel, Pengutronix */ -#include +#include #include #include #include diff --git a/drivers/media/platform/renesas/rcar_jpu.c b/drivers/media/platform/renesas/rcar_jpu.c index fff349e45067..e50fe7525a73 100644 --- a/drivers/media/platform/renesas/rcar_jpu.c +++ b/drivers/media/platform/renesas/rcar_jpu.c @@ -14,7 +14,7 @@ * 3) V4L2_CID_JPEG_ACTIVE_MARKER */ -#include +#include #include #include #include diff --git a/drivers/media/platform/verisilicon/hantro_g1_mpeg2_dec.c b/drivers/media/platform/verisilicon/hantro_g1_mpeg2_dec.c index 9aea331e1a3c..e0d6bd0a6e44 100644 --- a/drivers/media/platform/verisilicon/hantro_g1_mpeg2_dec.c +++ b/drivers/media/platform/verisilicon/hantro_g1_mpeg2_dec.c @@ -5,7 +5,7 @@ * Copyright (C) 2018 Rockchip Electronics Co., Ltd. */ -#include +#include #include #include #include "hantro.h" diff --git a/drivers/media/platform/verisilicon/hantro_h1_jpeg_enc.c b/drivers/media/platform/verisilicon/hantro_h1_jpeg_enc.c index 12d69503d6ba..86cc1a07026f 100644 --- a/drivers/media/platform/verisilicon/hantro_h1_jpeg_enc.c +++ b/drivers/media/platform/verisilicon/hantro_h1_jpeg_enc.c @@ -5,7 +5,7 @@ * Copyright (C) 2018 Rockchip Electronics Co., Ltd. */ -#include +#include #include #include "hantro_jpeg.h" #include "hantro.h" diff --git a/drivers/media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c index 8395c4d48dd0..61621b1be8a2 100644 --- a/drivers/media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c +++ b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c @@ -22,7 +22,7 @@ * zigzag, nor linear. */ -#include +#include #include #include "hantro_jpeg.h" #include "hantro.h" diff --git a/drivers/media/platform/verisilicon/rockchip_vpu2_hw_mpeg2_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_mpeg2_dec.c index b66737fab46b..50a3a3eeaa00 100644 --- a/drivers/media/platform/verisilicon/rockchip_vpu2_hw_mpeg2_dec.c +++ b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_mpeg2_dec.c @@ -5,7 +5,7 @@ * Copyright (C) 2018 Rockchip Electronics Co., Ltd. */ -#include +#include #include #include #include "hantro.h" diff --git a/drivers/media/radio/radio-raremono.c b/drivers/media/radio/radio-raremono.c index c3180d53c282..64c7452c05b5 100644 --- a/drivers/media/radio/radio-raremono.c +++ b/drivers/media/radio/radio-raremono.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/radio/si470x/radio-si470x.h b/drivers/media/radio/si470x/radio-si470x.h index e57ab54a27fc..2915c0023fcd 100644 --- a/drivers/media/radio/si470x/radio-si470x.h +++ b/drivers/media/radio/si470x/radio-si470x.h @@ -26,7 +26,7 @@ #include #include #include -#include +#include diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c index 69e630d85262..533faa117517 100644 --- a/drivers/media/rc/ir_toy.c +++ b/drivers/media/rc/ir_toy.c @@ -12,7 +12,7 @@ * Copyright (C) 2011 Peter Kooiman */ -#include +#include #include #include #include diff --git a/drivers/media/rc/redrat3.c b/drivers/media/rc/redrat3.c index 9f2947af33aa..d89a4cfe3c89 100644 --- a/drivers/media/rc/redrat3.c +++ b/drivers/media/rc/redrat3.c @@ -31,7 +31,7 @@ * -- */ -#include +#include #include #include #include diff --git a/drivers/media/tuners/xc2028.c b/drivers/media/tuners/xc2028.c index 352b8a3679b7..8e6638e5f688 100644 --- a/drivers/media/tuners/xc2028.c +++ b/drivers/media/tuners/xc2028.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "tuner-i2c.h" #include "xc2028.h" #include "xc2028-types.h" diff --git a/drivers/media/tuners/xc4000.c b/drivers/media/tuners/xc4000.c index 29bc63021c5a..3cf54d776d36 100644 --- a/drivers/media/tuners/xc4000.c +++ b/drivers/media/tuners/xc4000.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/media/usb/dvb-usb/m920x.c b/drivers/media/usb/dvb-usb/m920x.c index c88a202daf5f..a2054b1b100f 100644 --- a/drivers/media/usb/dvb-usb/m920x.c +++ b/drivers/media/usb/dvb-usb/m920x.c @@ -17,7 +17,7 @@ #include #include "tuner-simple.h" -#include +#include /* debug */ static int dvb_usb_m920x_debug; diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index f0febdc08c2d..0fac689c6350 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index cd9c29532fb0..e00f38dd07d9 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/media/v4l2-core/v4l2-cci.c b/drivers/media/v4l2-core/v4l2-cci.c index 1ff94affbaf3..e9ecf4785946 100644 --- a/drivers/media/v4l2-core/v4l2-cci.c +++ b/drivers/media/v4l2-core/v4l2-cci.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include diff --git a/drivers/media/v4l2-core/v4l2-jpeg.c b/drivers/media/v4l2-core/v4l2-jpeg.c index b8bece739d07..6e2647323522 100644 --- a/drivers/media/v4l2-core/v4l2-jpeg.c +++ b/drivers/media/v4l2-core/v4l2-jpeg.c @@ -9,7 +9,7 @@ * [1] https://www.w3.org/Graphics/JPEG/itu-t81.pdf */ -#include +#include #include #include #include diff --git a/drivers/memstick/host/rtsx_usb_ms.c b/drivers/memstick/host/rtsx_usb_ms.c index 246876ac713c..ffdd8de9ec5d 100644 --- a/drivers/memstick/host/rtsx_usb_ms.c +++ b/drivers/memstick/host/rtsx_usb_ms.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include struct rtsx_usb_ms { struct platform_device *pdev; diff --git a/drivers/mfd/gateworks-gsc.c b/drivers/mfd/gateworks-gsc.c index 6ca867b8f5f1..a3301502ce6a 100644 --- a/drivers/mfd/gateworks-gsc.c +++ b/drivers/mfd/gateworks-gsc.c @@ -20,7 +20,7 @@ #include #include -#include +#include /* * The GSC suffers from an errata where occasionally during diff --git a/drivers/mfd/iqs62x.c b/drivers/mfd/iqs62x.c index 1b465590567c..ee017617d1d1 100644 --- a/drivers/mfd/iqs62x.c +++ b/drivers/mfd/iqs62x.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #define IQS62X_PROD_NUM 0x00 diff --git a/drivers/mfd/ntxec.c b/drivers/mfd/ntxec.c index 4416cd37e539..08c68de0f01b 100644 --- a/drivers/mfd/ntxec.c +++ b/drivers/mfd/ntxec.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #define NTXEC_REG_VERSION 0x00 #define NTXEC_REG_POWEROFF 0x50 diff --git a/drivers/mfd/rave-sp.c b/drivers/mfd/rave-sp.c index ef326d6d566e..c1b78d127a26 100644 --- a/drivers/mfd/rave-sp.c +++ b/drivers/mfd/rave-sp.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include /* * UART protocol using following entities: diff --git a/drivers/mfd/si476x-cmd.c b/drivers/mfd/si476x-cmd.c index c9a0ec084aa8..3bb2decfebd3 100644 --- a/drivers/mfd/si476x-cmd.c +++ b/drivers/mfd/si476x-cmd.c @@ -20,7 +20,7 @@ #include -#include +#include #define msb(x) ((u8)((u16) x >> 8)) #define lsb(x) ((u8)((u16) x & 0x00FF)) diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c index 587427b73914..bbe3967c3a4c 100644 --- a/drivers/misc/altera-stapl/altera.c +++ b/drivers/misc/altera-stapl/altera.c @@ -9,7 +9,7 @@ * Copyright (C) 2010,2011 Igor M. Liplianin */ -#include +#include #include #include #include diff --git a/drivers/misc/bcm-vk/bcm_vk_sg.c b/drivers/misc/bcm-vk/bcm_vk_sg.c index 2e9daaf3e492..d309216ee181 100644 --- a/drivers/misc/bcm-vk/bcm_vk_sg.c +++ b/drivers/misc/bcm-vk/bcm_vk_sg.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index 117b3c24f910..be3d4e0e50cc 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c index bac4df2e5231..93949df3bcff 100644 --- a/drivers/misc/lattice-ecp3-config.c +++ b/drivers/misc/lattice-ecp3-config.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #define FIRMWARE_NAME "lattice-ecp3.bit" diff --git a/drivers/misc/mei/platform-vsc.c b/drivers/misc/mei/platform-vsc.c index d02f6e881139..20a11b299bcd 100644 --- a/drivers/misc/mei/platform-vsc.c +++ b/drivers/misc/mei/platform-vsc.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include "mei_dev.h" #include "vsc-tp.h" diff --git a/drivers/misc/mei/vsc-fw-loader.c b/drivers/misc/mei/vsc-fw-loader.c index 084d0205f97d..9f129bc641f6 100644 --- a/drivers/misc/mei/vsc-fw-loader.c +++ b/drivers/misc/mei/vsc-fw-loader.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include "vsc-tp.h" diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 6490df54a6f5..cdbd2edf4b2e 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -37,7 +37,7 @@ #include #include -#include +#include #define ATMCI_MAX_NR_SLOTS 2 diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index c9caa1ece7ef..8fee7052f2ef 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -26,7 +26,7 @@ #include #include -#include +#include /* NOTES: diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c index af7f21888e27..d859b1a3ab71 100644 --- a/drivers/mmc/host/mvsdio.c +++ b/drivers/mmc/host/mvsdio.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include "mvsdio.h" diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c index 7dfe7c4e0077..20e79109be16 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include struct realtek_pci_sdmmc { diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c index ded9b6849e35..4e86f0a705b6 100644 --- a/drivers/mmc/host/rtsx_usb_sdmmc.c +++ b/drivers/mmc/host/rtsx_usb_sdmmc.c @@ -21,7 +21,7 @@ #include #include -#include +#include #if defined(CONFIG_LEDS_CLASS) || (defined(CONFIG_LEDS_CLASS_MODULE) && \ defined(CONFIG_MMC_REALTEK_USB_MODULE)) diff --git a/drivers/mtd/nand/raw/intel-nand-controller.c b/drivers/mtd/nand/raw/intel-nand-controller.c index 78174c463b36..f0f0522b2fa2 100644 --- a/drivers/mtd/nand/raw/intel-nand-controller.c +++ b/drivers/mtd/nand/raw/intel-nand-controller.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #define EBU_CLC 0x000 #define EBU_CLC_RST 0x00000000u diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index 26648b72e691..aa113a5d88c8 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -84,7 +84,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 3e7526274e34..3bc56517fe7a 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -12,7 +12,7 @@ // Copyright (c) 2019 Martin Sperl // -#include +#include #include #include #include diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c index 65150e762007..8c5be8d1c519 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c @@ -8,7 +8,7 @@ #include "mcp251xfd.h" -#include +#include static const struct regmap_config mcp251xfd_regmap_crc; diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c index 83c18035b2a2..e684991fa391 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c @@ -12,7 +12,7 @@ // Copyright (c) 2019 Martin Sperl // -#include +#include #include "mcp251xfd.h" #include "mcp251xfd-ram.h" diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c index b1de8052a45c..747ae3e8a768 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c @@ -12,7 +12,7 @@ // Copyright (c) 2019 Martin Sperl // -#include +#include #include #include "mcp251xfd.h" diff --git a/drivers/net/can/usb/etas_es58x/es581_4.c b/drivers/net/can/usb/etas_es58x/es581_4.c index 4151b18fd045..1888ca1de7b6 100644 --- a/drivers/net/can/usb/etas_es58x/es581_4.c +++ b/drivers/net/can/usb/etas_es58x/es581_4.c @@ -9,7 +9,7 @@ * Copyright (c) 2020-2022 Vincent Mailhol */ -#include +#include #include #include diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 5e3a72b7c469..71f24dc0a927 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -10,7 +10,7 @@ * Copyright (c) 2020-2022 Vincent Mailhol */ -#include +#include #include #include #include diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.c b/drivers/net/can/usb/etas_es58x/es58x_fd.c index fa87b0b78e3e..84ffa1839bac 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_fd.c +++ b/drivers/net/can/usb/etas_es58x/es58x_fd.c @@ -11,7 +11,7 @@ * Copyright (c) 2020-2022 Vincent Mailhol */ -#include +#include #include #include diff --git a/drivers/net/can/usb/f81604.c b/drivers/net/can/usb/f81604.c index ec8cef7fd2d5..bc0c8903fe77 100644 --- a/drivers/net/can/usb/f81604.c +++ b/drivers/net/can/usb/f81604.c @@ -13,7 +13,7 @@ #include #include -#include +#include /* vendor and product id */ #define F81604_VENDOR_ID 0x2c42 diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 47619e9cb005..41c0a1c399bf 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -6,7 +6,7 @@ * This driver is inspired by the 4.6.2 version of net/can/usb/usb_8dev.c */ -#include +#include #include #include #include diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index b211b6e283a2..c75df1755b3b 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -8,7 +8,7 @@ * * Many thanks to Klaus Hitschler */ -#include +#include #include #include diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c index 308f15d3832e..467da057579e 100644 --- a/drivers/net/dsa/b53/b53_spi.c +++ b/drivers/net/dsa/b53/b53_spi.c @@ -16,7 +16,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include +#include #include #include diff --git a/drivers/net/dsa/microchip/ksz_spi.c b/drivers/net/dsa/microchip/ksz_spi.c index e3e341431f09..1c6652f2b9fe 100644 --- a/drivers/net/dsa/microchip/ksz_spi.c +++ b/drivers/net/dsa/microchip/ksz_spi.c @@ -6,7 +6,7 @@ * Tristram Ha */ -#include +#include #include #include diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c index 3431a7e62b0d..a98b3139606a 100644 --- a/drivers/net/ethernet/adi/adin1110.c +++ b/drivers/net/ethernet/adi/adin1110.c @@ -26,7 +26,7 @@ #include -#include +#include #define ADIN1110_PHY_ID 0x1 diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c index 484fc2b5626f..ca163c8e3729 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #define pr_fmt(fmt) "bcmasp_ethtool: " fmt -#include +#include #include #include #include diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index c7e7dac057a3..f7be886570d8 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -37,7 +37,7 @@ #include #include -#include +#include #include "bcmgenet.h" diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index cd3dc4b89518..0a161a4db242 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include MODULE_AUTHOR("Jeff Garzik "); MODULE_DESCRIPTION("Intel/Digital 21040/1 series PCI Ethernet driver"); diff --git a/drivers/net/ethernet/dec/tulip/eeprom.c b/drivers/net/ethernet/dec/tulip/eeprom.c index d5657ff15e3c..71ff9e6db209 100644 --- a/drivers/net/ethernet/dec/tulip/eeprom.c +++ b/drivers/net/ethernet/dec/tulip/eeprom.c @@ -13,7 +13,7 @@ #include #include #include "tulip.h" -#include +#include diff --git a/drivers/net/ethernet/dec/tulip/tulip.h b/drivers/net/ethernet/dec/tulip/tulip.h index bd786dfbc066..5e010e1fa6f7 100644 --- a/drivers/net/ethernet/dec/tulip/tulip.h +++ b/drivers/net/ethernet/dec/tulip/tulip.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index ecfad43df45a..27e01d780cd0 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_SPARC diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 11b14555802c..8f6b0bf48139 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) /* Copyright 2017-2019 NXP */ -#include +#include #include #include #include diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index aa139b67a55b..3a5bbda235cb 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -146,7 +146,7 @@ #include #include #include -#include +#include #define DRV_NAME "e100" diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c index f81db6c107c8..2702a0da5c3e 100644 --- a/drivers/net/ethernet/intel/ice/ice_fw_update.c +++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2018-2019, Intel Corporation. */ -#include +#include #include #include #include diff --git a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c index ea0884186d76..c06e5ad18b01 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "mtk_wed_regs.h" #include "mtk_wed_wo.h" diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c index ef05ae8f5039..0072d612215e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) Meta Platforms, Inc. and affiliates. */ -#include +#include #include #include #include diff --git a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c index cc54faca2283..515069d5637b 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index 3f10c5365c80..7c2200b49ce4 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -15,7 +15,7 @@ * abstraction builds upon this BAR interface. */ -#include +#include #include #include #include diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index a8286d0032d1..669f9f8fb507 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -9,7 +9,7 @@ * Rolf Neugebauer */ -#include +#include #include #include #include diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c index 508ae6b571ca..addf02c63b1a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c @@ -9,7 +9,7 @@ * Rolf Neugebauer */ -#include +#include #include #include #include diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c index f05dd34ab89f..cfa4db5d3f85 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index df0234a338a8..0bd6477292a6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -7,7 +7,7 @@ * Jason McMullan */ -#include +#include #include #include #include diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c index 2260c2403a83..68862ac062d2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c @@ -10,7 +10,7 @@ * Francois H. Theron */ -#include +#include #include #include #include diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index 1cc001087193..a36d422b5173 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -163,7 +163,7 @@ static int tx_params[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; #include #include /* Processor type for cache alignment. */ #include -#include +#include #include static const char version[] = diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 640ac01689fb..c0515dc63246 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -102,7 +102,7 @@ static int gx_fix; #include #include #include /* Processor type for cache alignment. */ -#include +#include #include /* These identify the driver base version and may not be removed. */ diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 305ec19ccef1..0cc9baaecb1b 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index 15cb96c2506d..f30d4b17c7fb 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include "smsc9420.h" #define DRV_NAME "smsc9420" diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index a94d8dd71aad..2b7034193a00 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/mctp/mctp-i3c.c b/drivers/net/mctp/mctp-i3c.c index 8e989c157caa..1bc87a062686 100644 --- a/drivers/net/mctp/mctp-i3c.c +++ b/drivers/net/mctp/mctp-i3c.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/phy/air_en8811h.c b/drivers/net/phy/air_en8811h.c index 3cdc8c6b30b6..8d076b9609fd 100644 --- a/drivers/net/phy/air_en8811h.c +++ b/drivers/net/phy/air_en8811h.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #define EN8811H_PHY_ID 0x03a2a411 diff --git a/drivers/net/phy/aquantia/aquantia_firmware.c b/drivers/net/phy/aquantia/aquantia_firmware.c index dac6464b5fe2..dab3af80593f 100644 --- a/drivers/net/phy/aquantia/aquantia_firmware.c +++ b/drivers/net/phy/aquantia/aquantia_firmware.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include "aquantia.h" diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c index 874a1b64b115..208e8f561e06 100644 --- a/drivers/net/phy/bcm-phy-ptp.c +++ b/drivers/net/phy/bcm-phy-ptp.c @@ -4,7 +4,7 @@ * Copyright (C) 2022 Jonathan Lemon */ -#include +#include #include #include #include diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index c1ddae36a2ae..738a8822fcf0 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "mscc.h" #include "mscc_ptp.h" diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index c33c3db3cc08..a940b9a67107 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/ppp/ppp_deflate.c b/drivers/net/ppp/ppp_deflate.c index 4d2ff63f2ee2..d93aeacc0dba 100644 --- a/drivers/net/ppp/ppp_deflate.c +++ b/drivers/net/ppp/ppp_deflate.c @@ -16,7 +16,7 @@ #include #include -#include +#include /* * State for a Deflate (de)compressor. diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 4b2971e2bf48..453f8437afe8 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/ppp/ppp_mppe.c b/drivers/net/ppp/ppp_mppe.c index 208f6e24f37c..bcc1eaedf58f 100644 --- a/drivers/net/ppp/ppp_mppe.c +++ b/drivers/net/ppp/ppp_mppe.c @@ -56,7 +56,7 @@ #include #include #include -#include +#include #include "ppp_mppe.h" diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index 45bf59ac8f57..644e99fc3623 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #define PPP_VERSION "2.4.2" diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index 18df7ca66198..252cd757d3a2 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -77,7 +77,7 @@ #include #include #include -#include +#include static unsigned char *encode(unsigned char *cp, unsigned short n); static long decode(unsigned char **cpp); diff --git a/drivers/net/usb/net1080.c b/drivers/net/usb/net1080.c index b0c0c9dd6a02..5d4a1fd2b524 100644 --- a/drivers/net/usb/net1080.c +++ b/drivers/net/usb/net1080.c @@ -17,7 +17,7 @@ #include #include -#include +#include /* diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index 673d3aa83792..3d239b8d1a1b 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -30,7 +30,7 @@ static const char driver_name[] = "sierra_net"; #include #include #include -#include +#include #include #define SWI_USB_REQUEST_GET_FW_ATTR 0x06 diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index abe41330fb69..4d88b02ffa79 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -59,7 +59,7 @@ #include #include -#include +#include #include #include "base.h" diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c index eea4bda77608..d81b2ad0b095 100644 --- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c +++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c @@ -44,7 +44,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include +#include #include "ath5k.h" #include "base.h" diff --git a/drivers/net/wireless/ath/ath5k/pcu.c b/drivers/net/wireless/ath/ath5k/pcu.c index 3f4ce4e9c532..90e0859a8e50 100644 --- a/drivers/net/wireless/ath/ath5k/pcu.c +++ b/drivers/net/wireless/ath/ath5k/pcu.c @@ -24,7 +24,7 @@ * Protocol Control Unit Functions * \*********************************/ -#include +#include #include "ath5k.h" #include "reg.h" diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c index 7ee4e1616f45..4825f9cb9cb8 100644 --- a/drivers/net/wireless/ath/ath5k/phy.c +++ b/drivers/net/wireless/ath/ath5k/phy.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include "ath5k.h" #include "reg.h" diff --git a/drivers/net/wireless/ath/ath5k/reset.c b/drivers/net/wireless/ath/ath5k/reset.c index 9fdb5283b39c..c67f163c0858 100644 --- a/drivers/net/wireless/ath/ath5k/reset.c +++ b/drivers/net/wireless/ath/ath5k/reset.c @@ -25,7 +25,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include +#include #include /* To determine if a card is pci-e */ #include diff --git a/drivers/net/wireless/ath/ath6kl/htc_mbox.c b/drivers/net/wireless/ath/ath6kl/htc_mbox.c index fb5144e2d86c..f8a94d764be6 100644 --- a/drivers/net/wireless/ath/ath6kl/htc_mbox.c +++ b/drivers/net/wireless/ath/ath6kl/htc_mbox.c @@ -21,7 +21,7 @@ #include "hif-ops.h" #include "trace.h" -#include +#include #define CALC_TXRX_PADDED_LEN(dev, len) (__ALIGN_MASK((len), (dev)->block_mask)) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index 944f46cdf34c..73c38a6b4880 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -14,7 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include +#include #include #include "hw.h" #include "ar9003_phy.h" diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c index 51abc470125b..eff894958a73 100644 --- a/drivers/net/wireless/ath/ath9k/debug.c +++ b/drivers/net/wireless/ath/ath9k/debug.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include "ath9k.h" diff --git a/drivers/net/wireless/ath/ath9k/eeprom_4k.c b/drivers/net/wireless/ath/ath9k/eeprom_4k.c index 27b860b0c769..3e16cfe059f3 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom_4k.c +++ b/drivers/net/wireless/ath/ath9k/eeprom_4k.c @@ -14,7 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include +#include #include "hw.h" #include "ar9002_phy.h" diff --git a/drivers/net/wireless/ath/ath9k/eeprom_9287.c b/drivers/net/wireless/ath/ath9k/eeprom_9287.c index d85472ee4d85..c139ac49ccf6 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom_9287.c +++ b/drivers/net/wireless/ath/ath9k/eeprom_9287.c @@ -14,7 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include +#include #include "hw.h" #include "ar9002_phy.h" diff --git a/drivers/net/wireless/ath/ath9k/eeprom_def.c b/drivers/net/wireless/ath/ath9k/eeprom_def.c index 84b31caf8ca6..5ba467cb7425 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom_def.c +++ b/drivers/net/wireless/ath/ath9k/eeprom_def.c @@ -14,7 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include +#include #include "hw.h" #include "ar9002_phy.h" diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index a3733c9b484e..7265766cddbd 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -14,7 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include +#include #include "htc.h" MODULE_FIRMWARE(HTC_7010_MODULE_FW); diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 04a4b9ea61c3..c3a6368bfc68 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include "hw.h" #include "hw-ops.h" diff --git a/drivers/net/wireless/ath/carl9170/mac.c b/drivers/net/wireless/ath/carl9170/mac.c index 6cdbee5beb07..20ceed0dd4be 100644 --- a/drivers/net/wireless/ath/carl9170/mac.c +++ b/drivers/net/wireless/ath/carl9170/mac.c @@ -36,7 +36,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include +#include #include "carl9170.h" #include "cmd.h" diff --git a/drivers/net/wireless/ath/hw.c b/drivers/net/wireless/ath/hw.c index 85955572a705..b301e6fbce6c 100644 --- a/drivers/net/wireless/ath/hw.c +++ b/drivers/net/wireless/ath/hw.c @@ -15,7 +15,7 @@ */ #include -#include +#include #include "ath.h" #include "reg.h" diff --git a/drivers/net/wireless/ath/key.c b/drivers/net/wireless/ath/key.c index 21a93fec284d..0ae436bd9b66 100644 --- a/drivers/net/wireless/ath/key.c +++ b/drivers/net/wireless/ath/key.c @@ -16,7 +16,7 @@ */ #include -#include +#include #include #include "ath.h" diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index 8e56dcf9309d..25b4ef9d3c9a 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include "b43.h" #include "main.h" diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c index 441d6440671b..2370a2e6a2e3 100644 --- a/drivers/net/wireless/broadcom/b43legacy/main.c +++ b/drivers/net/wireless/broadcom/b43legacy/main.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include "b43legacy.h" #include "main.h" diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h index 9ca1b2aadcb5..eed439b84010 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h @@ -7,7 +7,7 @@ #ifndef FWEH_H_ #define FWEH_H_ -#include +#include #include #include #include diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index ce482a3877e9..5dee54819fbd 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 1461dc453ac2..7b936668c1b6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c index 2f8908074303..08841b9a5b81 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c @@ -3,7 +3,7 @@ * Copyright (c) 2019 Broadcom */ -#include +#include #include #include diff --git a/drivers/net/wireless/intel/iwlegacy/3945.c b/drivers/net/wireless/intel/iwlegacy/3945.c index e95800b77f6b..14d2331ee6cb 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945.c +++ b/drivers/net/wireless/intel/iwlegacy/3945.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include "common.h" diff --git a/drivers/net/wireless/intel/iwlegacy/4965.c b/drivers/net/wireless/intel/iwlegacy/4965.c index c34729f576cd..b63e29590b04 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965.c +++ b/drivers/net/wireless/intel/iwlegacy/4965.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include "common.h" #include "4965.h" diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/led.c b/drivers/net/wireless/intel/iwlwifi/dvm/led.c index 71f67a019cf6..5ca85d90a8d6 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/led.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/led.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include "iwl-io.h" #include "iwl-trans.h" #include "iwl-modparams.h" diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/rx.c b/drivers/net/wireless/intel/iwlwifi/dvm/rx.c index e9d2717362cf..7f67e602940c 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/rx.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include "iwl-trans.h" #include "iwl-io.h" diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 047c020f8efa..1a0b5f8d4339 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -4,7 +4,7 @@ * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ -#include +#include #include #include #include "iwl-trans.h" diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index b700c213d10c..afe9bcd3ad46 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "decl.h" #include "cfg.h" diff --git a/drivers/net/wireless/marvell/libertas/cmdresp.c b/drivers/net/wireless/marvell/libertas/cmdresp.c index 74cb7551f427..f2aa659e7714 100644 --- a/drivers/net/wireless/marvell/libertas/cmdresp.c +++ b/drivers/net/wireless/marvell/libertas/cmdresp.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include "cfg.h" diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c index 7894102f03eb..1cff001bdc51 100644 --- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c +++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c @@ -5,7 +5,7 @@ * Copyright 2011-2020 NXP */ -#include +#include #include "decl.h" #include "ioctl.h" #include "util.h" diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c index bcd24c9072ec..4de45a56812d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "mt76x0.h" #include "eeprom.h" #include "../mt76x02_phy.h" diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c index 5d402cf2951c..a5e3392c0b48 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c @@ -4,7 +4,7 @@ * Copyright (C) 2018 Lorenzo Bianconi */ -#include +#include #include "mt76x02_eeprom.h" diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c index 1fe5f5a02f93..156b16c17b2b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include "mt76x2.h" #include "eeprom.h" diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.h b/drivers/net/wireless/mediatek/mt7601u/dma.h index 81e559ec1c7b..cda9c267516e 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.h +++ b/drivers/net/wireless/mediatek/mt7601u/dma.h @@ -7,7 +7,7 @@ #ifndef __MT7601U_DMA_H #define __MT7601U_DMA_H -#include +#include #include #define MT_DMA_HDR_LEN 4 diff --git a/drivers/net/wireless/mediatek/mt7601u/eeprom.c b/drivers/net/wireless/mediatek/mt7601u/eeprom.c index 625bebe60538..d4d31a546556 100644 --- a/drivers/net/wireless/mediatek/mt7601u/eeprom.c +++ b/drivers/net/wireless/mediatek/mt7601u/eeprom.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include "mt7601u.h" #include "eeprom.h" #include "mac.h" diff --git a/drivers/net/wireless/purelifi/plfxlc/usb.c b/drivers/net/wireless/purelifi/plfxlc/usb.c index 15334940287d..56d1139ba8bc 100644 --- a/drivers/net/wireless/purelifi/plfxlc/usb.c +++ b/drivers/net/wireless/purelifi/plfxlc/usb.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include "mac.h" diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index a8a94edf2a70..9ae10f65f2af 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include "zd_def.h" #include "zd_mac.h" diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c index e83f65596a88..93094418fd24 100644 --- a/drivers/nfc/nfcmrvl/fw_dnld.c +++ b/drivers/nfc/nfcmrvl/fw_dnld.c @@ -6,7 +6,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/nfc/nxp-nci/firmware.c b/drivers/nfc/nxp-nci/firmware.c index 119bf305c642..381b5bb75477 100644 --- a/drivers/nfc/nxp-nci/firmware.c +++ b/drivers/nfc/nxp-nci/firmware.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include "nxp-nci.h" diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index a8aced0b8010..049662ffdf97 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index e2a6575b9ff7..a0dfb3f98d5a 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c index a3455f1d67fa..9b7126e1a19d 100644 --- a/drivers/nvme/common/auth.c +++ b/drivers/nvme/common/auth.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 371e14f0a203..5ea0e21709da 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include "nvme.h" diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ba6508455e18..43d73d31c66f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include "nvme.h" #include "fabrics.h" diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c index 8df73a0b3980..89a1a1043d63 100644 --- a/drivers/nvme/host/hwmon.c +++ b/drivers/nvme/host/hwmon.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include "nvme.h" diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index 7347ddf85f00..dc7922f22600 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -5,7 +5,7 @@ */ #include #include -#include +#include #include "nvme.h" diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c8fd0e8f0237..24a2759798d0 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 0288315f0050..87c437fc070d 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -4,7 +4,7 @@ * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH */ -#include +#include #include "trace.h" static const char *nvme_trace_delete_sq(struct trace_seq *p, u8 *cdw10) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 954d4c074770..081f0473cd9e 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include "nvmet.h" u32 nvmet_get_log_page_len(struct nvme_command *cmd) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 7897d02c681d..29f8639cfe7f 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "nvmet.h" diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 1b6264fa5803..ade285308450 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c index 8d1806a82887..9a3548179a8e 100644 --- a/drivers/nvme/target/trace.c +++ b/drivers/nvme/target/trace.c @@ -4,7 +4,7 @@ * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH */ -#include +#include #include "trace.h" static const char *nvmet_trace_admin_identify(struct trace_seq *p, u8 *cdw10) diff --git a/drivers/pci/vpd.c b/drivers/pci/vpd.c index 485a642b9304..e4300f5f304f 100644 --- a/drivers/pci/vpd.c +++ b/drivers/pci/vpd.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include "pci.h" #define PCI_VPD_LRDT_TAG_SIZE 3 diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c index 948b763dc451..d018f36f3a89 100644 --- a/drivers/pcmcia/cistpl.c +++ b/drivers/pcmcia/cistpl.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/peci/controller/peci-aspeed.c b/drivers/peci/controller/peci-aspeed.c index de7046e6b9c4..b93eb6f43b98 100644 --- a/drivers/peci/controller/peci-aspeed.c +++ b/drivers/peci/controller/peci-aspeed.c @@ -2,7 +2,7 @@ // Copyright (c) 2012-2017 ASPEED Technology Inc. // Copyright (c) 2018-2021 Intel Corporation -#include +#include #include #include diff --git a/drivers/peci/request.c b/drivers/peci/request.c index 8d6dd7b6b559..87eefe66743f 100644 --- a/drivers/peci/request.c +++ b/drivers/peci/request.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include "internal.h" diff --git a/drivers/platform/arm64/acer-aspire1-ec.c b/drivers/platform/arm64/acer-aspire1-ec.c index dbb1cce13965..2df42406430d 100644 --- a/drivers/platform/arm64/acer-aspire1-ec.c +++ b/drivers/platform/arm64/acer-aspire1-ec.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2024, Nikita Travkin */ -#include +#include #include #include #include diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index 73f75958e15c..5c9a53dffcf9 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "cros_ec_trace.h" diff --git a/drivers/platform/chrome/cros_ec_proto_test.c b/drivers/platform/chrome/cros_ec_proto_test.c index 7ca9895a0065..3f281996a686 100644 --- a/drivers/platform/chrome/cros_ec_proto_test.c +++ b/drivers/platform/chrome/cros_ec_proto_test.c @@ -5,7 +5,7 @@ #include -#include +#include #include #include diff --git a/drivers/platform/chrome/wilco_ec/properties.c b/drivers/platform/chrome/wilco_ec/properties.c index c2bf4c95c5d2..9951c8db04da 100644 --- a/drivers/platform/chrome/wilco_ec/properties.c +++ b/drivers/platform/chrome/wilco_ec/properties.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include /* Operation code; what the EC should do with the property */ enum ec_property_op { diff --git a/drivers/platform/cznic/turris-omnia-mcu-gpio.c b/drivers/platform/cznic/turris-omnia-mcu-gpio.c index 91da56a704c7..88e208d45882 100644 --- a/drivers/platform/cznic/turris-omnia-mcu-gpio.c +++ b/drivers/platform/cznic/turris-omnia-mcu-gpio.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include "turris-omnia-mcu.h" diff --git a/drivers/platform/cznic/turris-omnia-mcu.h b/drivers/platform/cznic/turris-omnia-mcu.h index fed0d357fea3..57ef5d350043 100644 --- a/drivers/platform/cznic/turris-omnia-mcu.h +++ b/drivers/platform/cznic/turris-omnia-mcu.h @@ -18,7 +18,7 @@ #include #include #include -#include +#include struct i2c_client; struct rtc_device; diff --git a/drivers/platform/surface/aggregator/ssh_msgb.h b/drivers/platform/surface/aggregator/ssh_msgb.h index 438873e06098..80aa568a0759 100644 --- a/drivers/platform/surface/aggregator/ssh_msgb.h +++ b/drivers/platform/surface/aggregator/ssh_msgb.h @@ -8,7 +8,7 @@ #ifndef _SURFACE_AGGREGATOR_SSH_MSGB_H #define _SURFACE_AGGREGATOR_SSH_MSGB_H -#include +#include #include #include diff --git a/drivers/platform/surface/aggregator/ssh_packet_layer.c b/drivers/platform/surface/aggregator/ssh_packet_layer.c index d726b1a86319..6081b0146d5f 100644 --- a/drivers/platform/surface/aggregator/ssh_packet_layer.c +++ b/drivers/platform/surface/aggregator/ssh_packet_layer.c @@ -5,7 +5,7 @@ * Copyright (C) 2019-2022 Maximilian Luz */ -#include +#include #include #include #include diff --git a/drivers/platform/surface/aggregator/ssh_parser.c b/drivers/platform/surface/aggregator/ssh_parser.c index a6f668694365..6cfda85d3b33 100644 --- a/drivers/platform/surface/aggregator/ssh_parser.c +++ b/drivers/platform/surface/aggregator/ssh_parser.c @@ -5,7 +5,7 @@ * Copyright (C) 2019-2022 Maximilian Luz */ -#include +#include #include #include #include diff --git a/drivers/platform/surface/aggregator/ssh_request_layer.c b/drivers/platform/surface/aggregator/ssh_request_layer.c index 90634dcacabf..879ca9ee7ff6 100644 --- a/drivers/platform/surface/aggregator/ssh_request_layer.c +++ b/drivers/platform/surface/aggregator/ssh_request_layer.c @@ -5,7 +5,7 @@ * Copyright (C) 2019-2022 Maximilian Luz */ -#include +#include #include #include #include diff --git a/drivers/platform/surface/aggregator/trace.h b/drivers/platform/surface/aggregator/trace.h index 55cc61bba1da..caf7d3cb5d8b 100644 --- a/drivers/platform/surface/aggregator/trace.h +++ b/drivers/platform/surface/aggregator/trace.h @@ -13,7 +13,7 @@ #include -#include +#include #include TRACE_DEFINE_ENUM(SSH_FRAME_TYPE_DATA_SEQ); diff --git a/drivers/platform/surface/surface3_power.c b/drivers/platform/surface/surface3_power.c index 4c0f92562a79..1ee5239269ae 100644 --- a/drivers/platform/surface/surface3_power.c +++ b/drivers/platform/surface/surface3_power.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #define SURFACE_3_POLL_INTERVAL (2 * HZ) #define SURFACE_3_STRLEN 10 diff --git a/drivers/platform/surface/surface_acpi_notify.c b/drivers/platform/surface/surface_acpi_notify.c index 20f3870915d2..14a9d8a267cb 100644 --- a/drivers/platform/surface/surface_acpi_notify.c +++ b/drivers/platform/surface/surface_acpi_notify.c @@ -11,7 +11,7 @@ * Copyright (C) 2019-2022 Maximilian Luz */ -#include +#include #include #include #include diff --git a/drivers/platform/surface/surface_aggregator_tabletsw.c b/drivers/platform/surface/surface_aggregator_tabletsw.c index c0a1a5869246..ffa36ed92897 100644 --- a/drivers/platform/surface/surface_aggregator_tabletsw.c +++ b/drivers/platform/surface/surface_aggregator_tabletsw.c @@ -5,7 +5,7 @@ * Copyright (C) 2022 Maximilian Luz */ -#include +#include #include #include #include diff --git a/drivers/platform/surface/surface_platform_profile.c b/drivers/platform/surface/surface_platform_profile.c index 3de864bc6610..08db878f1d7d 100644 --- a/drivers/platform/surface/surface_platform_profile.c +++ b/drivers/platform/surface/surface_platform_profile.c @@ -6,7 +6,7 @@ * Copyright (C) 2021-2022 Maximilian Luz */ -#include +#include #include #include #include diff --git a/drivers/platform/x86/asus-tf103c-dock.c b/drivers/platform/x86/asus-tf103c-dock.c index b441d8ca72d3..ca4670d0dc67 100644 --- a/drivers/platform/x86/asus-tf103c-dock.c +++ b/drivers/platform/x86/asus-tf103c-dock.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include static bool fnlock; module_param(fnlock, bool, 0644); diff --git a/drivers/platform/x86/dell/dell-wmi-ddv.c b/drivers/platform/x86/dell/dell-wmi-ddv.c index 0b2299f7a2de..e75cd6e1efe6 100644 --- a/drivers/platform/x86/dell/dell-wmi-ddv.c +++ b/drivers/platform/x86/dell/dell-wmi-ddv.c @@ -31,7 +31,7 @@ #include -#include +#include #define DRIVER_NAME "dell-wmi-ddv" diff --git a/drivers/platform/x86/msi-wmi-platform.c b/drivers/platform/x86/msi-wmi-platform.c index 436fb91a47db..9b5c7f8c79b0 100644 --- a/drivers/platform/x86/msi-wmi-platform.c +++ b/drivers/platform/x86/msi-wmi-platform.c @@ -22,7 +22,7 @@ #include #include -#include +#include #define DRIVER_NAME "msi-wmi-platform" diff --git a/drivers/platform/x86/quickstart.c b/drivers/platform/x86/quickstart.c index df496c7e7171..8d540a1c8602 100644 --- a/drivers/platform/x86/quickstart.c +++ b/drivers/platform/x86/quickstart.c @@ -26,7 +26,7 @@ #include #include -#include +#include #define DRIVER_NAME "quickstart" diff --git a/drivers/power/supply/axp288_fuel_gauge.c b/drivers/power/supply/axp288_fuel_gauge.c index 95d9a35243c2..a3d71fc72064 100644 --- a/drivers/power/supply/axp288_fuel_gauge.c +++ b/drivers/power/supply/axp288_fuel_gauge.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #define PS_STAT_VBUS_TRIGGER (1 << 0) diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index c1737f964840..ba0d22d90429 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/power/supply/cros_peripheral_charger.c b/drivers/power/supply/cros_peripheral_charger.c index d406f2a78449..962a6fd29832 100644 --- a/drivers/power/supply/cros_peripheral_charger.c +++ b/drivers/power/supply/cros_peripheral_charger.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #define DRV_NAME "cros-ec-pchg" #define PCHG_DIR_PREFIX "peripheral" diff --git a/drivers/power/supply/max1720x_battery.c b/drivers/power/supply/max1720x_battery.c index 2bc3dce963a3..33105419e242 100644 --- a/drivers/power/supply/max1720x_battery.c +++ b/drivers/power/supply/max1720x_battery.c @@ -14,7 +14,7 @@ #include #include -#include +#include /* Nonvolatile registers */ #define MAX1720X_NXTABLE0 0x80 diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c index a3d377a32b49..57b6ddefad28 100644 --- a/drivers/power/supply/rk817_charger.c +++ b/drivers/power/supply/rk817_charger.c @@ -8,7 +8,7 @@ * Chris Morgan */ -#include +#include #include #include #include diff --git a/drivers/power/supply/surface_battery.c b/drivers/power/supply/surface_battery.c index 196d290dc596..ebd1edde28f1 100644 --- a/drivers/power/supply/surface_battery.c +++ b/drivers/power/supply/surface_battery.c @@ -6,7 +6,7 @@ * Copyright (C) 2019-2021 Maximilian Luz */ -#include +#include #include #include #include diff --git a/drivers/power/supply/surface_charger.c b/drivers/power/supply/surface_charger.c index 7a6c62d6f883..90b823848c99 100644 --- a/drivers/power/supply/surface_charger.c +++ b/drivers/power/supply/surface_charger.c @@ -6,7 +6,7 @@ * Copyright (C) 2019-2021 Maximilian Luz */ -#include +#include #include #include #include diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c index 209a45a76e6b..b6f1941308b1 100644 --- a/drivers/ptp/ptp_clockmatrix.c +++ b/drivers/ptp/ptp_clockmatrix.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include "ptp_private.h" #include "ptp_clockmatrix.h" diff --git a/drivers/ptp/ptp_fc3.c b/drivers/ptp/ptp_fc3.c index 6ef982862e27..e14e149b746e 100644 --- a/drivers/ptp/ptp_fc3.c +++ b/drivers/ptp/ptp_fc3.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include "ptp_private.h" #include "ptp_fc3.h" diff --git a/drivers/rtc/rtc-max31335.c b/drivers/rtc/rtc-max31335.c index 9a456f537d3b..3fbcf5f6b92f 100644 --- a/drivers/rtc/rtc-max31335.c +++ b/drivers/rtc/rtc-max31335.c @@ -8,7 +8,7 @@ * */ -#include +#include #include #include #include diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index f6b779c12ca7..c32fba550c8e 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -17,7 +17,7 @@ #include #include -#include +#include /* RTC_CTRL register bit fields */ #define PM8xxx_RTC_ENABLE BIT(7) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index ec3834bda111..abf6a82b74af 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c index 5b3ffefae476..6cc1d53165a0 100644 --- a/drivers/scsi/csiostor/csio_lnode.c +++ b/drivers/scsi/csiostor/csio_lnode.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c index 05e1a63e00c3..8329f0cab4e7 100644 --- a/drivers/scsi/csiostor/csio_scsi.c +++ b/drivers/scsi/csiostor/csio_scsi.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/cxlflash/lunmgt.c b/drivers/scsi/cxlflash/lunmgt.c index 52405c6462f8..962c797fda07 100644 --- a/drivers/scsi/cxlflash/lunmgt.c +++ b/drivers/scsi/cxlflash/lunmgt.c @@ -8,7 +8,7 @@ * Copyright (C) 2015 IBM Corporation */ -#include +#include #include #include diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index e4b45b7e3277..60d62b93d624 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c index 2d356fe2457a..b375509d1470 100644 --- a/drivers/scsi/cxlflash/superpipe.c +++ b/drivers/scsi/cxlflash/superpipe.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/cxlflash/vlun.c b/drivers/scsi/cxlflash/vlun.c index 35326e311991..32e807703377 100644 --- a/drivers/scsi/cxlflash/vlun.c +++ b/drivers/scsi/cxlflash/vlun.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 4eb0837298d4..1bf5948d1188 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index e044ed09d7e0..0c49414c1f35 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -51,7 +51,7 @@ #include #include #include -#include +#include #include #include "hpsa_cmd.h" #include "hpsa.h" diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index b2b643c6dbbe..fde7145835de 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -13,7 +13,7 @@ #ifndef _IPR_H #define _IPR_H -#include +#include #include #include #include diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index 384f48ff64d7..60d621ad0024 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include diff --git a/drivers/scsi/libfc/fc_elsct.c b/drivers/scsi/libfc/fc_elsct.c index 8d3006edbe12..4fa18a317f77 100644 --- a/drivers/scsi/libfc/fc_elsct.c +++ b/drivers/scsi/libfc/fc_elsct.c @@ -10,7 +10,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/scsi/libfc/fc_encode.h b/drivers/scsi/libfc/fc_encode.h index 6b7e4ca6b7b5..02e31db31d68 100644 --- a/drivers/scsi/libfc/fc_encode.h +++ b/drivers/scsi/libfc/fc_encode.h @@ -7,7 +7,7 @@ #ifndef _FC_ENCODE_H_ #define _FC_ENCODE_H_ -#include +#include #include #include diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index ab06e9aeb613..310fa5add5f0 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -79,7 +79,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 308cb4872f96..c25979d96808 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -55,7 +55,7 @@ #include #include -#include +#include #include diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 0fda8905eabd..2b1bf990a9dc 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 4e6bb3d0f163..2b8004eb6f1b 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include "sas_internal.h" diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index d70da2736c94..fec23c723730 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 0cef5d089f34..55c3e2c2bf8f 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 0eaede8275da..11c974bffa72 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 4ecf5284c0fc..8e75e2e279a4 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index fcb0fa31536b..14eeac08660f 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 728cced42b0e..f2a55aa5fe65 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -54,7 +54,7 @@ #include #include #include -#include +#include #include "mpt3sas_base.h" diff --git a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c index 1d64e5056a8a..2b04f0852dec 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c +++ b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include "mpt3sas_base.h" diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h index 68df771e2975..19b01f7c4767 100644 --- a/drivers/scsi/mvsas/mv_sas.h +++ b/drivers/scsi/mvsas/mv_sas.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index bfc2b835e612..a7e64b867c8e 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 3392feb15cb4..1469d0c54e45 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/qla2xxx/qla_dsd.h b/drivers/scsi/qla2xxx/qla_dsd.h index 20788054b91b..52e060f83b37 100644 --- a/drivers/scsi/qla2xxx/qla_dsd.h +++ b/drivers/scsi/qla2xxx/qla_dsd.h @@ -1,7 +1,7 @@ #ifndef _QLA_DSD_H_ #define _QLA_DSD_H_ -#include +#include /* 32-bit data segment descriptor (8 bytes) */ struct dsd32 { diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index d7551b1443e4..11eadb3bd36e 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 7e7460a747a4..ceaf1c7b1d17 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index ee69bd35889d..a77e0499b738 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/scsi_common.c b/drivers/scsi/scsi_common.c index 04749fde1636..e1a2a62b6910 100644 --- a/drivers/scsi/scsi_common.c +++ b/drivers/scsi/scsi_common.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include MODULE_DESCRIPTION("SCSI functions used by both the initiator and the target code"); diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index d95f417e24c0..de15fc0df104 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -47,7 +47,7 @@ #include -#include +#include #include #include diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 612489afe8d2..10154d78e336 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -48,7 +48,7 @@ #include -#include +#include /* * These should *probably* be handled by the host itself. diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0561b318dade..adee6f60c966 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/scsi_proto_test.c b/drivers/scsi/scsi_proto_test.c index 7fa0a78a2ad1..c093389edabb 100644 --- a/drivers/scsi/scsi_proto_test.c +++ b/drivers/scsi/scsi_proto_test.c @@ -3,7 +3,7 @@ * Copyright 2023 Google LLC */ #include -#include +#include #include static void test_scsi_proto(struct kunit *test) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index c0b72199b4fa..042329b74c6e 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/scsi_trace.c b/drivers/scsi/scsi_trace.c index 3e47c4472a80..b3baae91e7a2 100644 --- a/drivers/scsi/scsi_trace.c +++ b/drivers/scsi/scsi_trace.c @@ -5,7 +5,7 @@ */ #include #include -#include +#include #include #define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f) diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c index dd69342bbe78..19e6c3852d50 100644 --- a/drivers/scsi/scsicam.c +++ b/drivers/scsi/scsicam.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 41e2dfa2d67d..ca4bc0ac76ad 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -57,7 +57,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index c8b9654d30f0..ee2b74238758 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index e22c7f5e652b..2c61624cb4b0 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 7fd5a8c813dc..870f37b70546 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include "smartpqi.h" #include "smartpqi_sis.h" diff --git a/drivers/scsi/smartpqi/smartpqi_sas_transport.c b/drivers/scsi/smartpqi/smartpqi_sas_transport.c index a981d0377948..93e96705754e 100644 --- a/drivers/scsi/smartpqi/smartpqi_sas_transport.c +++ b/drivers/scsi/smartpqi/smartpqi_sas_transport.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "smartpqi.h" static struct pqi_sas_phy *pqi_alloc_sas_phy(struct pqi_sas_port *pqi_sas_port) diff --git a/drivers/scsi/smartpqi/smartpqi_sis.c b/drivers/scsi/smartpqi/smartpqi_sis.c index ca1df36b83f7..ae5a264d062d 100644 --- a/drivers/scsi/smartpqi/smartpqi_sis.c +++ b/drivers/scsi/smartpqi/smartpqi_sis.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "smartpqi.h" #include "smartpqi_sis.h" diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 3f491019103e..198bec87bb8e 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -52,7 +52,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index d50bad3a2ce9..beb88f25dbb9 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -46,7 +46,7 @@ static const char *verstr = "20160209"; #include #include -#include +#include #include #include diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c index 24c3971f2ef1..64fc4f41da77 100644 --- a/drivers/soc/qcom/socinfo.c +++ b/drivers/soc/qcom/socinfo.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include diff --git a/drivers/spi/spi-airoha-snfi.c b/drivers/spi/spi-airoha-snfi.c index 94458df53eae..1369691a997b 100644 --- a/drivers/spi/spi-airoha-snfi.c +++ b/drivers/spi/spi-airoha-snfi.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include /* SPI */ #define REG_SPI_CTRL_BASE 0x1FA10000 diff --git a/drivers/spi/spi-dln2.c b/drivers/spi/spi-dln2.c index d319dc357fef..4ba1d9245c9f 100644 --- a/drivers/spi/spi-dln2.c +++ b/drivers/spi/spi-dln2.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #define DLN2_SPI_MODULE_ID 0x02 #define DLN2_SPI_CMD(cmd) DLN2_CMD(cmd, DLN2_SPI_MODULE_ID) diff --git a/drivers/spi/spi-npcm-pspi.c b/drivers/spi/spi-npcm-pspi.c index a7feb20b06ee..30aa37b0c3b8 100644 --- a/drivers/spi/spi-npcm-pspi.c +++ b/drivers/spi/spi-npcm-pspi.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c index eee9ff4bfa5b..4730e4ba8901 100644 --- a/drivers/spi/spi-orion.c +++ b/drivers/spi/spi-orion.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #define DRIVER_NAME "orion_spi" diff --git a/drivers/spi/spi-rpc-if.c b/drivers/spi/spi-rpc-if.c index b468a95972bf..c24dad51a0e9 100644 --- a/drivers/spi/spi-rpc-if.c +++ b/drivers/spi/spi-rpc-if.c @@ -14,7 +14,7 @@ #include -#include +#include static void rpcif_spi_mem_prepare(struct spi_device *spi_dev, const struct spi_mem_op *spi_op, diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 6f12e4fb2e2e..3519656515ea 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -27,7 +27,7 @@ #include #include -#include +#include #define SH_MSIOF_FLAG_FIXED_DTDL_200 BIT(0) diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c index 4a18cf896194..07b155980e71 100644 --- a/drivers/spi/spi-uniphier.c +++ b/drivers/spi/spi-uniphier.c @@ -15,7 +15,7 @@ #include #include -#include +#include #define SSI_TIMEOUT_MS 2000 #define SSI_POLL_TIMEOUT_US 200 diff --git a/drivers/spi/spi-xcomm.c b/drivers/spi/spi-xcomm.c index 846f00e23b71..3bd0149d8f4e 100644 --- a/drivers/spi/spi-xcomm.c +++ b/drivers/spi/spi-xcomm.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #define SPI_XCOMM_SETTINGS_LEN_OFFSET 10 #define SPI_XCOMM_SETTINGS_3WIRE BIT(6) diff --git a/drivers/staging/media/av7110/av7110.c b/drivers/staging/media/av7110/av7110.c index 728b3892a20c..bc9a2a40afcb 100644 --- a/drivers/staging/media/av7110/av7110.c +++ b/drivers/staging/media/av7110/av7110.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c index 834329886ea2..c400d4f8ff9a 100644 --- a/drivers/staging/rtl8192e/rtl819x_BAProc.c +++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c @@ -5,7 +5,7 @@ * Contact Information: wlanfae */ #include -#include +#include #include #include "rtllib.h" #include "rtl819x_BA.h" diff --git a/drivers/staging/rtl8723bs/core/rtw_ap.c b/drivers/staging/rtl8723bs/core/rtw_ap.c index e55b4f7e0aef..a6dc88dd4ba1 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ap.c +++ b/drivers/staging/rtl8723bs/core/rtw_ap.c @@ -6,7 +6,7 @@ ******************************************************************************/ #include -#include +#include void init_mlme_ap_info(struct adapter *padapter) { diff --git a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c index 5a76069a8222..0ed420f3d096 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c +++ b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c @@ -7,7 +7,7 @@ #include #include -#include +#include u8 RTW_WPA_OUI_TYPE[] = { 0x00, 0x50, 0xf2, 1 }; u16 RTW_WPA_VERSION = 1; diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c index bbdd5fce28a1..4d4bec47d187 100644 --- a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c +++ b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include static struct mlme_handler mlme_sta_tbl[] = { {WIFI_ASSOCREQ, "OnAssocReq", &OnAssocReq}, diff --git a/drivers/staging/rtl8723bs/core/rtw_recv.c b/drivers/staging/rtl8723bs/core/rtw_recv.c index b30f026789b6..a389ba5ecc6f 100644 --- a/drivers/staging/rtl8723bs/core/rtw_recv.c +++ b/drivers/staging/rtl8723bs/core/rtw_recv.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include static u8 SNAP_ETH_TYPE_IPX[2] = {0x81, 0x37}; static u8 SNAP_ETH_TYPE_APPLETALK_AARP[2] = {0x80, 0xf3}; diff --git a/drivers/staging/rtl8723bs/os_dep/recv_linux.c b/drivers/staging/rtl8723bs/os_dep/recv_linux.c index 746f45cf9aac..ca808ded61ac 100644 --- a/drivers/staging/rtl8723bs/os_dep/recv_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/recv_linux.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include void rtw_os_free_recvframe(union recv_frame *precvframe) { diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c index acfc39683c87..3698f2eb097e 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_target.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 1d25e64b068a..6002283cbeba 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c index 9c4aa01b6351..f60b156ede12 100644 --- a/drivers/target/iscsi/iscsi_target_tmr.c +++ b/drivers/target/iscsi/iscsi_target_tmr.c @@ -8,7 +8,7 @@ * ******************************************************************************/ -#include +#include #include #include #include diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index b604fcae21e1..3b89b5a70331 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include "sbp_target.h" diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 01751faad386..10250aca5a81 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index bf4892544cfd..7d43d92c44d4 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 6600ae44f29d..43f47e3aa448 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 94e6cd4e7e43..2d78ef74633c 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index a3e09adc4e76..c8dc92a7d63e 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 80b7d85030d0..4f4ad6af416c 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index f98ebb18666b..440e07b1d5cd 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 6a02561cc20c..fe8beb7dbab1 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 50290abc07bc..ea14a3835681 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 73d0d6133ac8..05d29201b730 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 4128631c9dfd..877ce58c0a70 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index 21783cd71c15..34ab628809e8 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 5ee03d1cba2b..639fc358ed0f 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c index bbe2e29612fa..45329284f52f 100644 --- a/drivers/target/tcm_fc/tfc_io.c +++ b/drivers/target/tcm_fc/tfc_io.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index 593540da9346..d6afaba52ea5 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c index 7c9f4023babc..5e94a45eba3e 100644 --- a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c +++ b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include "../thermal_hwmon.h" diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c index fda63918d1eb..cde5f1c86353 100644 --- a/drivers/tty/serial/max3100.c +++ b/drivers/tty/serial/max3100.c @@ -32,7 +32,7 @@ #include #include -#include +#include #define MAX3100_C (1<<14) #define MAX3100_D (0<<14) diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index da33c6c4691c..79b33d998d43 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -48,7 +48,7 @@ #include #include -#include +#include #define HEADER_SIZE 4u #define CON_BUF_SIZE (IS_ENABLED(CONFIG_BASE_SMALL) ? 256 : PAGE_SIZE) diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 5891cdacd0b3..8402151330fe 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -7,7 +7,7 @@ * Can Guo */ -#include +#include #include #include #include diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index fe313800aed0..265f21133b63 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 24a32e2fd75e..fc55fdab526b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -36,7 +36,7 @@ #include "ufs-fault-injection.h" #include "ufs_bsg.h" #include "ufshcd-crypto.h" -#include +#include #define CREATE_TRACE_POINTS #include "ufs_trace.h" diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index 9ec318ef52bf..5867e6338562 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -8,7 +8,7 @@ * */ -#include +#include #include #include #include diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c index 8f3b9a0a38e1..0dd85d2635b9 100644 --- a/drivers/usb/atm/cxacru.c +++ b/drivers/usb/atm/cxacru.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include "usbatm.h" diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index 16703815be0c..e8e43c38aa1b 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include "usbatm.h" diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 605fea461102..6b37d1c47fce 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 6830be4419e2..86ee39db013f 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #define DRIVER_AUTHOR "Oliver Neukum" diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 1ff7d901fede..500dc35e6477 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/usb/fotg210/fotg210-hcd.c b/drivers/usb/fotg210/fotg210-hcd.c index 8c5aaf860635..3d404d19a205 100644 --- a/drivers/usb/fotg210/fotg210-hcd.c +++ b/drivers/usb/fotg210/fotg210-hcd.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include "fotg210.h" diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index f45d5bedda68..f25dd2cb5d03 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "u_os_desc.h" diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index c626bb73ea59..2920f8000bbd 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index e11d8c0edf06..08e0d1c511e8 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -188,7 +188,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index ef2ffde625c3..d295ade8fa67 100644 --- a/drivers/usb/gadget/function/f_printer.c +++ b/drivers/usb/gadget/function/f_printer.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c index 90906d714736..15bb3aa12aa8 100644 --- a/drivers/usb/gadget/function/f_tcm.c +++ b/drivers/usb/gadget/function/f_tcm.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "tcm.h" #include "u_tcm.h" diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c index 12c5d9cf450c..afd75d72412c 100644 --- a/drivers/usb/gadget/function/rndis.c +++ b/drivers/usb/gadget/function/rndis.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include "u_rndis.h" diff --git a/drivers/usb/gadget/function/storage_common.h b/drivers/usb/gadget/function/storage_common.h index 0a544a82cbf8..ced5d2b09234 100644 --- a/drivers/usb/gadget/function/storage_common.h +++ b/drivers/usb/gadget/function/storage_common.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #ifndef DEBUG #undef VERBOSE_DEBUG diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index a9edd60fbbf7..57a851151225 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index 40870227999a..fc1e06246d9d 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "u_tcm.h" diff --git a/drivers/usb/gadget/u_os_desc.h b/drivers/usb/gadget/u_os_desc.h index 5d7d35c8cc31..f8b9f0faa9b1 100644 --- a/drivers/usb/gadget/u_os_desc.h +++ b/drivers/usb/gadget/u_os_desc.h @@ -13,7 +13,7 @@ #ifndef __U_OS_DESC_H__ #define __U_OS_DESC_H__ -#include +#include #include #define USB_EXT_PROP_DW_SIZE 0 diff --git a/drivers/usb/gadget/udc/bdc/bdc.h b/drivers/usb/gadget/udc/bdc/bdc.h index 8d00b1239f21..2f4abf6f8f77 100644 --- a/drivers/usb/gadget/udc/bdc/bdc.h +++ b/drivers/usb/gadget/udc/bdc/bdc.h @@ -20,7 +20,7 @@ #include #include #include -#include +#include #define BRCM_BDC_NAME "bdc" #define BRCM_BDC_DESC "Broadcom USB Device Controller driver" diff --git a/drivers/usb/gadget/udc/bdc/bdc_ep.c b/drivers/usb/gadget/udc/bdc/bdc_ep.c index fa88f210ecd5..f995cfa9b99e 100644 --- a/drivers/usb/gadget/udc/bdc/bdc_ep.c +++ b/drivers/usb/gadget/udc/bdc/bdc_ep.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/gadget/udc/bdc/bdc_udc.c b/drivers/usb/gadget/udc/bdc/bdc_udc.c index 53ffaf4e2e37..23826fd7a8e6 100644 --- a/drivers/usb/gadget/udc/bdc/bdc_udc.c +++ b/drivers/usb/gadget/udc/bdc/bdc_udc.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include "bdc.h" diff --git a/drivers/usb/gadget/udc/cdns2/cdns2-ep0.c b/drivers/usb/gadget/udc/cdns2/cdns2-ep0.c index fa12a5d46f2e..a5a9d395fd0d 100644 --- a/drivers/usb/gadget/udc/cdns2/cdns2-ep0.c +++ b/drivers/usb/gadget/udc/cdns2/cdns2-ep0.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include "cdns2-gadget.h" #include "cdns2-trace.h" diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index ff7bee78bcc4..8820d9924448 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #define DRIVER_DESC "USB Host+Gadget Emulator" #define DRIVER_VERSION "02 May 2005" diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c index 3432ebfae978..0cabd4eee6ac 100644 --- a/drivers/usb/gadget/udc/fsl_udc_core.c +++ b/drivers/usb/gadget/udc/fsl_udc_core.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include #include "fsl_usb2_udc.h" diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c index 5ffb3d5c635b..b860c2e76449 100644 --- a/drivers/usb/gadget/udc/goku_udc.c +++ b/drivers/usb/gadget/udc/goku_udc.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include "goku_udc.h" diff --git a/drivers/usb/gadget/udc/mv_udc_core.c b/drivers/usb/gadget/udc/mv_udc_core.c index 78308b64955d..71012b282891 100644 --- a/drivers/usb/gadget/udc/mv_udc_core.c +++ b/drivers/usb/gadget/udc/mv_udc_core.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include "mv_udc.h" diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c index 19bbc38f3d35..9230db57dab7 100644 --- a/drivers/usb/gadget/udc/net2272.c +++ b/drivers/usb/gadget/udc/net2272.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include "net2272.h" diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index 1b929c519cd7..b2903e4bbf54 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -56,7 +56,7 @@ #include #include -#include +#include #define DRIVER_DESC "PLX NET228x/USB338x USB Peripheral Controller" #define DRIVER_VERSION "2005 Sept 27/v3.0" diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index e13b8ec8ef8a..61a45e4657d5 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/usb/gadget/udc/pxa25x_udc.c b/drivers/usb/gadget/udc/pxa25x_udc.c index 1ac26cb49ecf..7c96fc9f680f 100644 --- a/drivers/usb/gadget/udc/pxa25x_udc.c +++ b/drivers/usb/gadget/udc/pxa25x_udc.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/gadget/udc/snps_udc_core.c b/drivers/usb/gadget/udc/snps_udc_core.c index 2fc5d4d277bc..cd89532adec2 100644 --- a/drivers/usb/gadget/udc/snps_udc_core.c +++ b/drivers/usb/gadget/udc/snps_udc_core.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include "amd5536udc.h" static void udc_setup_endpoints(struct udc *dev); diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 802bfafb1012..cbc0b86fcc36 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #if defined(CONFIG_PPC_PS3) #include diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c index a52c3d858f3e..31059c8f94e6 100644 --- a/drivers/usb/host/isp1362-hcd.c +++ b/drivers/usb/host/isp1362-hcd.c @@ -83,7 +83,7 @@ #include #include -#include +#include static int dbg_level; #ifdef ISP1362_DEBUG diff --git a/drivers/usb/host/ohci-da8xx.c b/drivers/usb/host/ohci-da8xx.c index d9adae53466b..d2b67da76762 100644 --- a/drivers/usb/host/ohci-da8xx.c +++ b/drivers/usb/host/ohci-da8xx.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include "ohci.h" diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 5cec7640e913..9b24181fee60 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -44,7 +44,7 @@ #include #include -#include +#include #include diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c index 3f871fe62b90..ca3859463ba1 100644 --- a/drivers/usb/host/oxu210hp-hcd.c +++ b/drivers/usb/host/oxu210hp-hcd.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index 2b871540bb50..92f2d1238448 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -54,7 +54,7 @@ #include #include #include -#include +#include #include "sl811.h" diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index d27c30ac17fd..8d774f19271e 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -10,7 +10,7 @@ #include -#include +#include #include #include "xhci.h" diff --git a/drivers/usb/host/xhci-pci-renesas.c b/drivers/usb/host/xhci-pci-renesas.c index 30cc5a1380a5..65fc9319d5e7 100644 --- a/drivers/usb/host/xhci-pci-renesas.c +++ b/drivers/usb/host/xhci-pci-renesas.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include "xhci.h" #include "xhci-trace.h" diff --git a/drivers/usb/isp1760/isp1760-hcd.c b/drivers/usb/isp1760/isp1760-hcd.c index 0e5e4cb74c87..add2d2e3b61b 100644 --- a/drivers/usb/isp1760/isp1760-hcd.c +++ b/drivers/usb/isp1760/isp1760-hcd.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include "isp1760-core.h" diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c index 1a8d5e80b9ae..01ceafc4ab78 100644 --- a/drivers/usb/misc/usb-ljca.c +++ b/drivers/usb/misc/usb-ljca.c @@ -18,7 +18,7 @@ #include #include -#include +#include /* command flags */ #define LJCA_ACK_FLAG BIT(0) diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c index 2b2164e028b3..ce6f25a9650b 100644 --- a/drivers/usb/musb/musb_virthub.c +++ b/drivers/usb/musb/musb_virthub.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include "musb_core.h" diff --git a/drivers/usb/phy/phy-fsl-usb.c b/drivers/usb/phy/phy-fsl-usb.c index 1ebbf189a535..c5c6b818998e 100644 --- a/drivers/usb/phy/phy-fsl-usb.c +++ b/drivers/usb/phy/phy-fsl-usb.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include "phy-fsl-usb.h" diff --git a/drivers/usb/serial/aircable.c b/drivers/usb/serial/aircable.c index aa517242d060..2a76f1f0ee4f 100644 --- a/drivers/usb/serial/aircable.c +++ b/drivers/usb/serial/aircable.c @@ -35,7 +35,7 @@ * */ -#include +#include #include #include #include diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 02945ccf531d..d10e4c4848a0 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #define DEFAULT_BAUD_RATE 9600 #define DEFAULT_TIMEOUT 1000 diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index ce9134bb30f3..e29569d65991 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include "cypress_m8.h" diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index a2c0bebc041f..d36155b6d2bf 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include #include "kl5kusb105.h" diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index e5a139ed5d90..2bce8cc03aca 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/usb/serial/mxuport.c b/drivers/usb/serial/mxuport.c index 57e4f2b215d8..ad5fdf55a02e 100644 --- a/drivers/usb/serial/mxuport.c +++ b/drivers/usb/serial/mxuport.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include /* Definitions for the vendor ID and device ID */ #define MX_USBSERIAL_VID 0x110A diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index ab48f8875249..ad41363e3cea 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include "pl2303.h" diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c index 4167a45d1be3..a317bdbd00ad 100644 --- a/drivers/usb/serial/quatech2.c +++ b/drivers/usb/serial/quatech2.c @@ -9,7 +9,7 @@ * */ -#include +#include #include #include #include diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 4a017eb6a65b..1cf5aad4c23a 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -11,7 +11,7 @@ #include #include #include -#include +#include /* -------------------------------------------------------------------------- */ diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index b3ec799fc873..ba58d11907bc 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include "ucsi.h" enum enum_fw_mode { diff --git a/drivers/usb/typec/ucsi/ucsi_stm32g0.c b/drivers/usb/typec/ucsi/ucsi_stm32g0.c index ddbec2b78c8e..6923fad31d79 100644 --- a/drivers/usb/typec/ucsi/ucsi_stm32g0.c +++ b/drivers/usb/typec/ucsi/ucsi_stm32g0.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "ucsi.h" diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 006ffacf1c56..7db9bbdfb038 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/video/fbdev/aty/mach64_accel.c b/drivers/video/fbdev/aty/mach64_accel.c index e4b2c89baee2..826fb04de64c 100644 --- a/drivers/video/fbdev/aty/mach64_accel.c +++ b/drivers/video/fbdev/aty/mach64_accel.c @@ -5,7 +5,7 @@ */ #include -#include +#include #include #include