From 80f0a1f99983296be587325004acf72dd11eccd8 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Mon, 13 Sep 2021 12:02:56 +0200 Subject: [PATCH 001/235] workqueue: annotate alloc_workqueue() as printf This also enables checking of allows alloc_ordered_workqueue(). Signed-off-by: Rolf Eike Beer Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 2ebef6b1a3d6..74d3c1efd9bb 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -399,9 +399,8 @@ extern struct workqueue_struct *system_freezable_power_efficient_wq; * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ -struct workqueue_struct *alloc_workqueue(const char *fmt, - unsigned int flags, - int max_active, ...); +__printf(1, 4) struct workqueue_struct * +alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); /** * alloc_ordered_workqueue - allocate an ordered workqueue From 22b1255792c033781dbe42b63e28501d38032b7e Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Mon, 13 Sep 2021 13:09:14 +0800 Subject: [PATCH 002/235] docs/cgroup: remove some duplicate words When I tried to add some new entries to cgroup-v2.rst, I found that the description of memory.events had some repetitive words, so I tried to delete them. Signed-off-by: Chunguang Xu Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index babbe04c8d37..fc53ae0e96b9 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1226,7 +1226,7 @@ PAGE_SIZE multiple when read back. Note that all fields in this file are hierarchical and the file modified event can be generated due to an event down the - hierarchy. For for the local events at the cgroup level see + hierarchy. For the local events at the cgroup level see memory.events.local. low From b94f9ac79a7395c2d6171cc753cc27942df0be73 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 9 Sep 2021 22:42:56 -0400 Subject: [PATCH 003/235] cgroup/cpuset: Change references of cpuset_mutex to cpuset_rwsem Since commit 1243dc518c9d ("cgroup/cpuset: Convert cpuset_mutex to percpu_rwsem"), cpuset_mutex has been replaced by cpuset_rwsem which is a percpu rwsem. However, the comments in kernel/cgroup/cpuset.c still reference cpuset_mutex which are now incorrect. Change all the references of cpuset_mutex to cpuset_rwsem. Fixes: 1243dc518c9d ("cgroup/cpuset: Convert cpuset_mutex to percpu_rwsem") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 56 ++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index df1ccf4558f8..2a9695ccb65f 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -311,17 +311,19 @@ static struct cpuset top_cpuset = { if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) /* - * There are two global locks guarding cpuset structures - cpuset_mutex and + * There are two global locks guarding cpuset structures - cpuset_rwsem and * callback_lock. We also require taking task_lock() when dereferencing a * task's cpuset pointer. See "The task_lock() exception", at the end of this - * comment. + * comment. The cpuset code uses only cpuset_rwsem write lock. Other + * kernel subsystems can use cpuset_read_lock()/cpuset_read_unlock() to + * prevent change to cpuset structures. * * A task must hold both locks to modify cpusets. If a task holds - * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it + * cpuset_rwsem, it blocks others wanting that rwsem, ensuring that it * is the only task able to also acquire callback_lock and be able to * modify cpusets. It can perform various checks on the cpuset structure * first, knowing nothing will change. It can also allocate memory while - * just holding cpuset_mutex. While it is performing these checks, various + * just holding cpuset_rwsem. While it is performing these checks, various * callback routines can briefly acquire callback_lock to query cpusets. * Once it is ready to make the changes, it takes callback_lock, blocking * everyone else. @@ -393,7 +395,7 @@ static inline bool is_in_v2_mode(void) * One way or another, we guarantee to return some non-empty subset * of cpu_online_mask. * - * Call with callback_lock or cpuset_mutex held. + * Call with callback_lock or cpuset_rwsem held. */ static void guarantee_online_cpus(struct task_struct *tsk, struct cpumask *pmask) @@ -435,7 +437,7 @@ static void guarantee_online_cpus(struct task_struct *tsk, * One way or another, we guarantee to return some non-empty subset * of node_states[N_MEMORY]. * - * Call with callback_lock or cpuset_mutex held. + * Call with callback_lock or cpuset_rwsem held. */ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) { @@ -447,7 +449,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) /* * update task's spread flag if cpuset's page/slab spread flag is set * - * Call with callback_lock or cpuset_mutex held. + * Call with callback_lock or cpuset_rwsem held. */ static void cpuset_update_task_spread_flag(struct cpuset *cs, struct task_struct *tsk) @@ -468,7 +470,7 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs, * * One cpuset is a subset of another if all its allowed CPUs and * Memory Nodes are a subset of the other, and its exclusive flags - * are only set if the other's are set. Call holding cpuset_mutex. + * are only set if the other's are set. Call holding cpuset_rwsem. */ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) @@ -577,7 +579,7 @@ static inline void free_cpuset(struct cpuset *cs) * If we replaced the flag and mask values of the current cpuset * (cur) with those values in the trial cpuset (trial), would * our various subset and exclusive rules still be valid? Presumes - * cpuset_mutex held. + * cpuset_rwsem held. * * 'cur' is the address of an actual, in-use cpuset. Operations * such as list traversal that depend on the actual address of the @@ -700,7 +702,7 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr, rcu_read_unlock(); } -/* Must be called with cpuset_mutex held. */ +/* Must be called with cpuset_rwsem held. */ static inline int nr_cpusets(void) { /* jump label reference count + the top-level cpuset */ @@ -726,7 +728,7 @@ static inline int nr_cpusets(void) * domains when operating in the severe memory shortage situations * that could cause allocation failures below. * - * Must be called with cpuset_mutex held. + * Must be called with cpuset_rwsem held. * * The three key local variables below are: * cp - cpuset pointer, used (together with pos_css) to perform a @@ -1005,7 +1007,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[], * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * - * Call with cpuset_mutex held. Takes cpus_read_lock(). + * Call with cpuset_rwsem held. Takes cpus_read_lock(). */ static void rebuild_sched_domains_locked(void) { @@ -1078,7 +1080,7 @@ void rebuild_sched_domains(void) * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed * * Iterate through each task of @cs updating its cpus_allowed to the - * effective cpuset's. As this function is called with cpuset_mutex held, + * effective cpuset's. As this function is called with cpuset_rwsem held, * cpuset membership stays stable. */ static void update_tasks_cpumask(struct cpuset *cs) @@ -1347,7 +1349,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, * * On legacy hierarchy, effective_cpus will be the same with cpu_allowed. * - * Called with cpuset_mutex held + * Called with cpuset_rwsem held */ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) { @@ -1704,12 +1706,12 @@ static void *cpuset_being_rebound; * @cs: the cpuset in which each task's mems_allowed mask needs to be changed * * Iterate through each task of @cs updating its mems_allowed to the - * effective cpuset's. As this function is called with cpuset_mutex held, + * effective cpuset's. As this function is called with cpuset_rwsem held, * cpuset membership stays stable. */ static void update_tasks_nodemask(struct cpuset *cs) { - static nodemask_t newmems; /* protected by cpuset_mutex */ + static nodemask_t newmems; /* protected by cpuset_rwsem */ struct css_task_iter it; struct task_struct *task; @@ -1722,7 +1724,7 @@ static void update_tasks_nodemask(struct cpuset *cs) * take while holding tasklist_lock. Forks can happen - the * mpol_dup() cpuset_being_rebound check will catch such forks, * and rebind their vma mempolicies too. Because we still hold - * the global cpuset_mutex, we know that no other rebind effort + * the global cpuset_rwsem, we know that no other rebind effort * will be contending for the global variable cpuset_being_rebound. * It's ok if we rebind the same mm twice; mpol_rebind_mm() * is idempotent. Also migrate pages in each mm to new nodes. @@ -1768,7 +1770,7 @@ static void update_tasks_nodemask(struct cpuset *cs) * * On legacy hierarchy, effective_mems will be the same with mems_allowed. * - * Called with cpuset_mutex held + * Called with cpuset_rwsem held */ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) { @@ -1821,7 +1823,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) * mempolicies and if the cpuset is marked 'memory_migrate', * migrate the tasks pages to the new memory. * - * Call with cpuset_mutex held. May take callback_lock during call. + * Call with cpuset_rwsem held. May take callback_lock during call. * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, * lock each such tasks mm->mmap_lock, scan its vma's and rebind * their mempolicies to the cpusets new mems_allowed. @@ -1911,7 +1913,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) * @cs: the cpuset in which each task's spread flags needs to be changed * * Iterate through each task of @cs updating its spread flags. As this - * function is called with cpuset_mutex held, cpuset membership stays + * function is called with cpuset_rwsem held, cpuset membership stays * stable. */ static void update_tasks_flags(struct cpuset *cs) @@ -1931,7 +1933,7 @@ static void update_tasks_flags(struct cpuset *cs) * cs: the cpuset to update * turning_on: whether the flag is being set or cleared * - * Call with cpuset_mutex held. + * Call with cpuset_rwsem held. */ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, @@ -1980,7 +1982,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, * cs: the cpuset to update * new_prs: new partition root state * - * Call with cpuset_mutex held. + * Call with cpuset_rwsem held. */ static int update_prstate(struct cpuset *cs, int new_prs) { @@ -2167,7 +2169,7 @@ static int fmeter_getrate(struct fmeter *fmp) static struct cpuset *cpuset_attach_old_cs; -/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ +/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */ static int cpuset_can_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; @@ -2219,7 +2221,7 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset) } /* - * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach() + * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach() * but we can't allocate it dynamically there. Define it global and * allocate from cpuset_init(). */ @@ -2227,7 +2229,7 @@ static cpumask_var_t cpus_attach; static void cpuset_attach(struct cgroup_taskset *tset) { - /* static buf protected by cpuset_mutex */ + /* static buf protected by cpuset_rwsem */ static nodemask_t cpuset_attach_nodemask_to; struct task_struct *task; struct task_struct *leader; @@ -2417,7 +2419,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, * operation like this one can lead to a deadlock through kernfs * active_ref protection. Let's break the protection. Losing the * protection is okay as we check whether @cs is online after - * grabbing cpuset_mutex anyway. This only happens on the legacy + * grabbing cpuset_rwsem anyway. This only happens on the legacy * hierarchies. */ css_get(&cs->css); @@ -3672,7 +3674,7 @@ void __cpuset_memory_pressure_bump(void) * - Used for /proc//cpuset. * - No need to task_lock(tsk) on this tsk->cpuset reference, as it * doesn't really matter if tsk->cpuset changes after we read it, - * and we take cpuset_mutex, keeping cpuset_attach() from changing it + * and we take cpuset_rwsem, keeping cpuset_attach() from changing it * anyway. */ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, From c0002d11d79900f8aa5c8375336434940d6afedf Mon Sep 17 00:00:00 2001 From: ArthurChiao Date: Wed, 8 Sep 2021 16:08:15 +0800 Subject: [PATCH 004/235] cgroupv2, docs: fix misinformation in "device controller" section Hotmail was rejected by the mailing list, switched to gmail to resend. 1. Clarify cgroup BPF program type and attach type; 2. Fix file path broken. Signed-off-by: ArthurChiao Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index fc53ae0e96b9..4d8c27eca96b 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2170,19 +2170,19 @@ existing device files. Cgroup v2 device controller has no interface files and is implemented on top of cgroup BPF. To control access to device files, a user may -create bpf programs of the BPF_CGROUP_DEVICE type and attach them -to cgroups. On an attempt to access a device file, corresponding -BPF programs will be executed, and depending on the return value -the attempt will succeed or fail with -EPERM. +create bpf programs of type BPF_PROG_TYPE_CGROUP_DEVICE and attach +them to cgroups with BPF_CGROUP_DEVICE flag. On an attempt to access a +device file, corresponding BPF programs will be executed, and depending +on the return value the attempt will succeed or fail with -EPERM. -A BPF_CGROUP_DEVICE program takes a pointer to the bpf_cgroup_dev_ctx -structure, which describes the device access attempt: access type -(mknod/read/write) and device (type, major and minor numbers). -If the program returns 0, the attempt fails with -EPERM, otherwise -it succeeds. +A BPF_PROG_TYPE_CGROUP_DEVICE program takes a pointer to the +bpf_cgroup_dev_ctx structure, which describes the device access attempt: +access type (mknod/read/write) and device (type, major and minor numbers). +If the program returns 0, the attempt fails with -EPERM, otherwise it +succeeds. -An example of BPF_CGROUP_DEVICE program may be found in the kernel -source tree in the tools/testing/selftests/bpf/progs/dev_cgroup.c file. +An example of BPF_PROG_TYPE_CGROUP_DEVICE program may be found in +tools/testing/selftests/bpf/progs/dev_cgroup.c in the kernel source tree. RDMA From d67ed2510d28a1eb33171010d35cf52178cfcbdd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 24 Sep 2021 20:29:51 -0700 Subject: [PATCH 005/235] xtensa: use CONFIG_USE_OF instead of CONFIG_OF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_OF can be set by a randconfig or by a user -- without setting the early flattree option (OF_EARLY_FLATTREE). This causes build errors. However, if randconfig or a user sets USE_OF in the Xtensa config, the right kconfig symbols are set to fix the build. Fixes these build errors: ../arch/xtensa/kernel/setup.c:67:19: error: ‘__dtb_start’ undeclared here (not in a function); did you mean ‘dtb_start’? 67 | void *dtb_start = __dtb_start; | ^~~~~~~~~~~ ../arch/xtensa/kernel/setup.c: In function 'xtensa_dt_io_area': ../arch/xtensa/kernel/setup.c:201:14: error: implicit declaration of function 'of_flat_dt_is_compatible'; did you mean 'of_machine_is_compatible'? [-Werror=implicit-function-declaration] 201 | if (!of_flat_dt_is_compatible(node, "simple-bus")) ../arch/xtensa/kernel/setup.c:204:18: error: implicit declaration of function 'of_get_flat_dt_prop' [-Werror=implicit-function-declaration] 204 | ranges = of_get_flat_dt_prop(node, "ranges", &len); ../arch/xtensa/kernel/setup.c:204:16: error: assignment to 'const __be32 *' {aka 'const unsigned int *'} from 'int' makes pointer from integer without a cast [-Werror=int-conversion] 204 | ranges = of_get_flat_dt_prop(node, "ranges", &len); | ^ ../arch/xtensa/kernel/setup.c: In function 'early_init_devtree': ../arch/xtensa/kernel/setup.c:228:9: error: implicit declaration of function 'early_init_dt_scan'; did you mean 'early_init_devtree'? [-Werror=implicit-function-declaration] 228 | early_init_dt_scan(params); ../arch/xtensa/kernel/setup.c:229:9: error: implicit declaration of function 'of_scan_flat_dt' [-Werror=implicit-function-declaration] 229 | of_scan_flat_dt(xtensa_dt_io_area, NULL); xtensa-elf-ld: arch/xtensa/mm/mmu.o:(.text+0x0): undefined reference to `xtensa_kio_paddr' Fixes: da844a81779e ("xtensa: add device trees support") Fixes: 6cb971114f63 ("xtensa: remap io area defined in device tree") Signed-off-by: Randy Dunlap Signed-off-by: Max Filippov --- arch/xtensa/include/asm/kmem_layout.h | 2 +- arch/xtensa/kernel/setup.c | 12 ++++++------ arch/xtensa/mm/mmu.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/xtensa/include/asm/kmem_layout.h b/arch/xtensa/include/asm/kmem_layout.h index 7cbf68ca7106..6fc05cba61a2 100644 --- a/arch/xtensa/include/asm/kmem_layout.h +++ b/arch/xtensa/include/asm/kmem_layout.h @@ -78,7 +78,7 @@ #endif #define XCHAL_KIO_SIZE 0x10000000 -#if (!XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY) && defined(CONFIG_OF) +#if (!XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY) && defined(CONFIG_USE_OF) #define XCHAL_KIO_PADDR xtensa_get_kio_paddr() #ifndef __ASSEMBLY__ extern unsigned long xtensa_kio_paddr; diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index ed184106e4cf..ee9082a142fe 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -63,7 +63,7 @@ extern unsigned long initrd_end; extern int initrd_below_start_ok; #endif -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF void *dtb_start = __dtb_start; #endif @@ -125,7 +125,7 @@ __tagtable(BP_TAG_INITRD, parse_tag_initrd); #endif /* CONFIG_BLK_DEV_INITRD */ -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF static int __init parse_tag_fdt(const bp_tag_t *tag) { @@ -135,7 +135,7 @@ static int __init parse_tag_fdt(const bp_tag_t *tag) __tagtable(BP_TAG_FDT, parse_tag_fdt); -#endif /* CONFIG_OF */ +#endif /* CONFIG_USE_OF */ static int __init parse_tag_cmdline(const bp_tag_t* tag) { @@ -183,7 +183,7 @@ static int __init parse_bootparam(const bp_tag_t *tag) } #endif -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF #if !XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY unsigned long xtensa_kio_paddr = XCHAL_KIO_DEFAULT_PADDR; @@ -232,7 +232,7 @@ void __init early_init_devtree(void *params) strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); } -#endif /* CONFIG_OF */ +#endif /* CONFIG_USE_OF */ /* * Initialize architecture. (Early stage) @@ -253,7 +253,7 @@ void __init init_arch(bp_tag_t *bp_start) if (bp_start) parse_bootparam(bp_start); -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF early_init_devtree(dtb_start); #endif diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index 7e4d97dc8bd8..38acda4f04e8 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -101,7 +101,7 @@ void init_mmu(void) void init_kio(void) { -#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_OF) +#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_USE_OF) /* * Update the IO area mapping in case xtensa_kio_paddr has changed */ From 6489f8d0e1d93a3603d8dad8125797559e4cf2a2 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 27 Sep 2021 09:46:33 -0700 Subject: [PATCH 006/235] xtensa: call irqchip_init only when CONFIG_USE_OF is selected During boot time kernel configured with OF=y but USE_OF=n displays the following warnings and hangs shortly after starting userspace: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at kernel/irq/irqdomain.c:695 irq_create_mapping_affinity+0x29/0xc0 irq_create_mapping_affinity(, 6) called with NULL domain CPU: 0 PID: 0 Comm: swapper Not tainted 5.15.0-rc3-00001-gd67ed2510d28 #30 Call Trace: __warn+0x69/0xc4 warn_slowpath_fmt+0x6c/0x94 irq_create_mapping_affinity+0x29/0xc0 local_timer_setup+0x40/0x88 time_init+0xb1/0xe8 start_kernel+0x31d/0x3f4 _startup+0x13b/0x13b ---[ end trace 1e6630e1c5eda35b ]--- ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at arch/xtensa/kernel/time.c:141 local_timer_setup+0x58/0x88 error: can't map timer irq CPU: 0 PID: 0 Comm: swapper Tainted: G W 5.15.0-rc3-00001-gd67ed2510d28 #30 Call Trace: __warn+0x69/0xc4 warn_slowpath_fmt+0x6c/0x94 local_timer_setup+0x58/0x88 time_init+0xb1/0xe8 start_kernel+0x31d/0x3f4 _startup+0x13b/0x13b ---[ end trace 1e6630e1c5eda35c ]--- Failed to request irq 0 (timer) Fix that by calling irqchip_init only when CONFIG_USE_OF is selected and calling legacy interrupt controller init otherwise. Fixes: da844a81779e ("xtensa: add device trees support") Signed-off-by: Max Filippov --- arch/xtensa/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index 764b54bef701..15051a8a1539 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -143,7 +143,7 @@ unsigned xtensa_get_ext_irq_no(unsigned irq) void __init init_IRQ(void) { -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF irqchip_init(); #else #ifdef CONFIG_HAVE_SMP From 0d67e332e6df72f43eaa21228daa3a79e23093f3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Sep 2021 14:15:10 +0200 Subject: [PATCH 007/235] module: fix clang CFI with MODULE_UNLOAD=n When CONFIG_MODULE_UNLOAD is disabled, the module->exit member is not defined, causing a build failure: kernel/module.c:4493:8: error: no member named 'exit' in 'struct module' mod->exit = *exit; add an #ifdef block around this. Fixes: cf68fffb66d6 ("add support for Clang CFI") Acked-by: Kees Cook Reviewed-by: Sami Tolvanen Reviewed-by: Miroslav Benes Signed-off-by: Arnd Bergmann Signed-off-by: Jessica Yu --- kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/module.c b/kernel/module.c index 40ec9a030eec..5c26a76e800b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4489,8 +4489,10 @@ static void cfi_init(struct module *mod) /* Fix init/exit functions to point to the CFI jump table */ if (init) mod->init = *init; +#ifdef CONFIG_MODULE_UNLOAD if (exit) mod->exit = *exit; +#endif cfi_module_add(mod, module_addr_min); #endif From 23c216b335d1fbd716076e8263b54a714ea3cf0e Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 30 Sep 2021 13:44:54 +1000 Subject: [PATCH 008/235] powerpc/iommu: Report the correct most efficient DMA mask for PCI devices According to dma-api.rst, the dma_get_required_mask() helper should return "the mask that the platform requires to operate efficiently". Which in the case of PPC64 means the bypass mask and not a mask from an IOMMU table which is shorter and slower to use due to map/unmap operations (especially expensive on "pseries"). However the existing implementation ignores the possibility of bypassing and returns the IOMMU table mask on the pseries platform which makes some drivers (mpt3sas is one example) choose 32bit DMA even though bypass is supported. The powernv platform sort of handles it by having a bigger default window with a mask >=40 but it only works as drivers choose 63/64bit if the required mask is >32 which is rather pointless. This reintroduces the bypass capability check to let drivers make a better choice of the DMA mask. Fixes: f1565c24b596 ("powerpc: use the generic dma_ops_bypass mode") Signed-off-by: Alexey Kardashevskiy Reviewed-by: Christoph Hellwig Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210930034454.95794-1-aik@ozlabs.ru --- arch/powerpc/kernel/dma-iommu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 111249fd619d..038ce8d9061d 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -184,6 +184,15 @@ u64 dma_iommu_get_required_mask(struct device *dev) struct iommu_table *tbl = get_iommu_table_base(dev); u64 mask; + if (dev_is_pci(dev)) { + u64 bypass_mask = dma_direct_get_required_mask(dev); + + if (dma_iommu_dma_supported(dev, bypass_mask)) { + dev_info(dev, "%s: returning bypass mask 0x%llx\n", __func__, bypass_mask); + return bypass_mask; + } + } + if (!tbl) return 0; From 8ec59ac3ad29891c0afef627640df36f2daa0349 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 29 Sep 2021 09:35:40 +0200 Subject: [PATCH 009/235] ALSA: usb-audio: Fix a missing error check in scarlett gen2 mixer The check of the returned error code is missing in scarlett2_update_monitor_other(). Let's fix it. Fixes: d5bda7e03982 ("ALSA: usb-audio: scarlett2: Add support for the talkback feature") Reported-by: kernel test robot Cc: Link: https://lore.kernel.org/r/202109131831.9IodEzRx-lkp@intel.com Link: https://lore.kernel.org/r/20210929073540.9611-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 3d5848d5481b..53ebabf42472 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -2450,6 +2450,8 @@ static int scarlett2_update_monitor_other(struct usb_mixer_interface *mixer) err = scarlett2_usb_get_config(mixer, SCARLETT2_CONFIG_TALKBACK_MAP, 1, &bitmap); + if (err < 0) + return err; for (i = 0; i < num_mixes; i++, bitmap >>= 1) private->talkback_map[i] = bitmap & 1; } From eb676622846b34a751e2ff9b5910a5322a4e0000 Mon Sep 17 00:00:00 2001 From: John Liu Date: Thu, 30 Sep 2021 13:53:16 +0200 Subject: [PATCH 010/235] ALSA: hda/realtek: Enable 4-speaker output for Dell Precision 5560 laptop The Dell Precision 5560 laptop appears to use the 4-speakers-on-ALC289 audio just like its sibling product XPS 9510, so it requires the same quirk to enable woofer output. Tested on my Dell Precision 5560. Signed-off-by: John Liu Cc: Link: https://lore.kernel.org/r/20210930115316.659-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4407f7da57c4..f17bfc6007d3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8452,6 +8452,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a61, "Dell XPS 15 9510", ALC289_FIXUP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From 1f8763c59c4ec6254d629fe77c0a52220bd907aa Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 30 Sep 2021 13:41:14 +0200 Subject: [PATCH 011/235] ALSA: seq: Fix a potential UAF by wrong private_free call order John Keeping reported and posted a patch for a potential UAF in rawmidi sequencer destruction: the snd_rawmidi_dev_seq_free() may be called after the associated rawmidi object got already freed. After a deeper look, it turned out that the bug is rather the incorrect private_free call order for a snd_seq_device. The snd_seq_device private_free gets called at the release callback of the sequencer device object, while this was rather expected to be executed at the snd_device call chains that runs at the beginning of the whole card-free procedure. It's been broken since the rewrite of sequencer-device binding (although it hasn't surfaced because the sequencer device release happens usually right along with the card device release). This patch corrects the private_free call to be done in the right place, at snd_seq_device_dev_free(). Fixes: 7c37ae5c625a ("ALSA: seq: Rewrite sequencer device binding with standard bus") Reported-and-tested-by: John Keeping Cc: Link: https://lore.kernel.org/r/20210930114114.8645-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq_device.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sound/core/seq_device.c b/sound/core/seq_device.c index 382275c5b193..7f3fd8eb016f 100644 --- a/sound/core/seq_device.c +++ b/sound/core/seq_device.c @@ -156,6 +156,8 @@ static int snd_seq_device_dev_free(struct snd_device *device) struct snd_seq_device *dev = device->device_data; cancel_autoload_drivers(); + if (dev->private_free) + dev->private_free(dev); put_device(&dev->dev); return 0; } @@ -183,11 +185,7 @@ static int snd_seq_device_dev_disconnect(struct snd_device *device) static void snd_seq_dev_release(struct device *dev) { - struct snd_seq_device *sdev = to_seq_dev(dev); - - if (sdev->private_free) - sdev->private_free(sdev); - kfree(sdev); + kfree(to_seq_dev(dev)); } /* From 2b987fe84429361c7f189568c476d1bd00d2ff7e Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Fri, 1 Oct 2021 14:28:56 +0800 Subject: [PATCH 012/235] ALSA: hda - Enable headphone mic on Dell Latitude laptops with ALC3254 The headphone mic is not working on Dell Latitude laptops with ALC3254. The codec vendor id is 0x10ec0295 and share the same pincfg as defined in ALC295_STANDARD_PINS. So the ALC269_FIXUP_DELL1_MIC_NO_PRESENCE will be applied per alc269_pin_fixup_tbl[] but actually the headphone mic is using NID 0x1b instead of 0x1a. The ALC269_FIXUP_DELL4_MIC_NO_PRESENCE need to be applied instead. Use ALC269_FIXUP_DELL4_MIC_NO_PRESENCE for particular models before a generic fixup comes out. Signed-off-by: Chris Chiu Cc: Link: https://lore.kernel.org/r/20211001062856.1037901-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f17bfc6007d3..6bf6151347d8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8453,6 +8453,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a61, "Dell XPS 15 9510", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x0a9d, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0a9e, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From 06f2ac3d4219bbbfd93d79e01966a42053084f11 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 30 Sep 2021 23:42:01 -0500 Subject: [PATCH 013/235] x86/sev: Return an error on a returned non-zero SW_EXITINFO1[31:0] After returning from a VMGEXIT NAE event, SW_EXITINFO1[31:0] is checked for a value of 1, which indicates an error and that SW_EXITINFO2 contains exception information. However, future versions of the GHCB specification may define new values for SW_EXITINFO1[31:0], so really any non-zero value should be treated as an error. Fixes: 597cfe48212a ("x86/boot/compressed/64: Setup a GHCB-based VC Exception handler") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: # 5.10+ Link: https://lkml.kernel.org/r/efc772af831e9e7f517f0439b13b41f56bad8784.1633063321.git.thomas.lendacky@amd.com --- arch/x86/kernel/sev-shared.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 9f90f460a28c..bf1033a62e48 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -130,6 +130,8 @@ static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, } else { ret = ES_VMM_ERROR; } + } else if (ghcb->save.sw_exit_info_1 & 0xffffffff) { + ret = ES_VMM_ERROR; } else { ret = ES_OK; } From 6e3cd95234dc1eda488f4f487c281bac8fef4d9b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 30 Sep 2021 19:21:39 +0200 Subject: [PATCH 014/235] x86/hpet: Use another crystalball to evaluate HPET usability On recent Intel systems the HPET stops working when the system reaches PC10 idle state. The approach of adding PCI ids to the early quirks to disable HPET on these systems is a whack a mole game which makes no sense. Check for PC10 instead and force disable HPET if supported. The check is overbroad as it does not take ACPI, intel_idle enablement and command line parameters into account. That's fine as long as there is at least PMTIMER available to calibrate the TSC frequency. The decision can be overruled by adding "hpet=force" on the kernel command line. Remove the related early PCI quirks for affected Ice Cake and Coffin Lake systems as they are not longer required. That should also cover all other systems, i.e. Tiger Rag and newer generations, which are most likely affected by this as well. Fixes: Yet another hardware trainwreck Reported-by: Jakub Kicinski Signed-off-by: Thomas Gleixner Tested-by: Jakub Kicinski Reviewed-by: Rafael J. Wysocki Cc: stable@vger.kernel.org Cc: Kai-Heng Feng Cc: Bjorn Helgaas --- arch/x86/kernel/early-quirks.c | 6 --- arch/x86/kernel/hpet.c | 81 ++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 38837dad46e6..391a4e2b8604 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -714,12 +714,6 @@ static struct chipset early_qrk[] __initdata = { */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, - { PCI_VENDOR_ID_INTEL, 0x3e20, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, - { PCI_VENDOR_ID_INTEL, 0x3ec4, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, - { PCI_VENDOR_ID_INTEL, 0x8a12, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_BROADCOM, 0x4331, PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, {} diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 42fc41dd0e1f..882213df3713 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -10,6 +10,7 @@ #include #include #include +#include #undef pr_fmt #define pr_fmt(fmt) "hpet: " fmt @@ -916,6 +917,83 @@ static bool __init hpet_counting(void) return false; } +static bool __init mwait_pc10_supported(void) +{ + unsigned int eax, ebx, ecx, mwait_substates; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (!cpu_feature_enabled(X86_FEATURE_MWAIT)) + return false; + + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return false; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); + + return (ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) && + (ecx & CPUID5_ECX_INTERRUPT_BREAK) && + (mwait_substates & (0xF << 28)); +} + +/* + * Check whether the system supports PC10. If so force disable HPET as that + * stops counting in PC10. This check is overbroad as it does not take any + * of the following into account: + * + * - ACPI tables + * - Enablement of intel_idle + * - Command line arguments which limit intel_idle C-state support + * + * That's perfectly fine. HPET is a piece of hardware designed by committee + * and the only reasons why it is still in use on modern systems is the + * fact that it is impossible to reliably query TSC and CPU frequency via + * CPUID or firmware. + * + * If HPET is functional it is useful for calibrating TSC, but this can be + * done via PMTIMER as well which seems to be the last remaining timer on + * X86/INTEL platforms that has not been completely wreckaged by feature + * creep. + * + * In theory HPET support should be removed altogether, but there are older + * systems out there which depend on it because TSC and APIC timer are + * dysfunctional in deeper C-states. + * + * It's only 20 years now that hardware people have been asked to provide + * reliable and discoverable facilities which can be used for timekeeping + * and per CPU timer interrupts. + * + * The probability that this problem is going to be solved in the + * forseeable future is close to zero, so the kernel has to be cluttered + * with heuristics to keep up with the ever growing amount of hardware and + * firmware trainwrecks. Hopefully some day hardware people will understand + * that the approach of "This can be fixed in software" is not sustainable. + * Hope dies last... + */ +static bool __init hpet_is_pc10_damaged(void) +{ + unsigned long long pcfg; + + /* Check whether PC10 substates are supported */ + if (!mwait_pc10_supported()) + return false; + + /* Check whether PC10 is enabled in PKG C-state limit */ + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, pcfg); + if ((pcfg & 0xF) < 8) + return false; + + if (hpet_force_user) { + pr_warn("HPET force enabled via command line, but dysfunctional in PC10.\n"); + return false; + } + + pr_info("HPET dysfunctional in PC10. Force disabled.\n"); + boot_hpet_disable = true; + return true; +} + /** * hpet_enable - Try to setup the HPET timer. Returns 1 on success. */ @@ -929,6 +1007,9 @@ int __init hpet_enable(void) if (!is_hpet_capable()) return 0; + if (hpet_is_pc10_damaged()) + return 0; + hpet_set_mapping(); if (!hpet_virt_address) return 0; From d8c23ead708b40a16413163f5f93e07fbd4f077d Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Wed, 22 Sep 2021 09:39:21 -0700 Subject: [PATCH 015/235] kunit: tool: better handling of quasi-bool args (--json, --raw_output) Problem: What does this do? $ kunit.py run --json Well, it runs all the tests and prints test results out as JSON. And next is $ kunit.py run my-test-suite --json This runs just `my-test-suite` and prints results out as JSON. But what about? $ kunit.py run --json my-test-suite This runs all the tests and stores the json results in a "my-test-suite" file. Why: --json, and now --raw_output are actually string flags. They just have a default value. --json in particular takes the name of an output file. It was intended that you'd do $ kunit.py run --json=my_output_file my-test-suite if you ever wanted to specify the value. Workaround: It doesn't seem like there's a way to make https://docs.python.org/3/library/argparse.html only accept arg values after a '='. I believe that `--json` should "just work" regardless of where it is. So this patch automatically rewrites a bare `--json` to `--json=stdout`. That makes the examples above work the same way. Add a regression test that can catch this for --raw_output. Fixes: 6a499c9c42d0 ("kunit: tool: make --raw_output support only showing kunit output") Signed-off-by: Daniel Latypov Tested-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 24 ++++++++++++++++++++++-- tools/testing/kunit/kunit_tool_test.py | 8 ++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 5a931456e718..ac35c61f65f5 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -16,7 +16,7 @@ assert sys.version_info >= (3, 7), "Python version is too old" from collections import namedtuple from enum import Enum, auto -from typing import Iterable +from typing import Iterable, Sequence import kunit_config import kunit_json @@ -186,6 +186,26 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, exec_result.elapsed_time)) return parse_result +# Problem: +# $ kunit.py run --json +# works as one would expect and prints the parsed test results as JSON. +# $ kunit.py run --json suite_name +# would *not* pass suite_name as the filter_glob and print as json. +# argparse will consider it to be another way of writing +# $ kunit.py run --json=suite_name +# i.e. it would run all tests, and dump the json to a `suite_name` file. +# So we hackily automatically rewrite --json => --json=stdout +pseudo_bool_flag_defaults = { + '--json': 'stdout', + '--raw_output': 'kunit', +} +def massage_argv(argv: Sequence[str]) -> Sequence[str]: + def massage_arg(arg: str) -> str: + if arg not in pseudo_bool_flag_defaults: + return arg + return f'{arg}={pseudo_bool_flag_defaults[arg]}' + return list(map(massage_arg, argv)) + def add_common_opts(parser) -> None: parser.add_argument('--build_dir', help='As in the make command, it specifies the build ' @@ -303,7 +323,7 @@ def main(argv, linux=None): help='Specifies the file to read results from.', type=str, nargs='?', metavar='input_file') - cli_args = parser.parse_args(argv) + cli_args = parser.parse_args(massage_argv(argv)) if get_kernel_root_path(): os.chdir(get_kernel_root_path()) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 619c4554cbff..1edcc8373b4e 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -408,6 +408,14 @@ class KUnitMainTest(unittest.TestCase): self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) + def test_run_raw_output_does_not_take_positional_args(self): + # --raw_output is a string flag, but we don't want it to consume + # any positional arguments, only ones after an '=' + self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) + kunit.main(['run', '--raw_output', 'filter_glob'], self.linux_source_mock) + self.linux_source_mock.run_kernel.assert_called_once_with( + args=None, build_dir='.kunit', filter_glob='filter_glob', timeout=300) + def test_exec_timeout(self): timeout = 3453 kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock) From f62314b1ced25c58b86e044fc951cd6a1ea234cf Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Thu, 9 Sep 2021 15:24:36 +0800 Subject: [PATCH 016/235] kunit: fix reference count leak in kfree_at_end The reference counting issue happens in the normal path of kfree_at_end(). When kunit_alloc_and_get_resource() is invoked, the function forgets to handle the returned resource object, whose refcount increased inside, causing a refcount leak. Fix this issue by calling kunit_alloc_resource() instead of kunit_alloc_and_get_resource(). Fixed the following when applying: Shuah Khan CHECK: Alignment should match open parenthesis + kunit_alloc_resource(test, NULL, kfree_res_free, GFP_KERNEL, (void *)to_free); Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Reviewed-by: Daniel Latypov Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- lib/kunit/executor_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/kunit/executor_test.c b/lib/kunit/executor_test.c index cdbe54b16501..e14a18af573d 100644 --- a/lib/kunit/executor_test.c +++ b/lib/kunit/executor_test.c @@ -116,8 +116,8 @@ static void kfree_at_end(struct kunit *test, const void *to_free) /* kfree() handles NULL already, but avoid allocating a no-op cleanup. */ if (IS_ERR_OR_NULL(to_free)) return; - kunit_alloc_and_get_resource(test, NULL, kfree_res_free, GFP_KERNEL, - (void *)to_free); + kunit_alloc_resource(test, NULL, kfree_res_free, GFP_KERNEL, + (void *)to_free); } static struct kunit_suite *alloc_fake_suite(struct kunit *test, From 6558b646ce1c2a872fe1c2c7cb116f05a2c1950f Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Wed, 22 Sep 2021 17:57:18 +0100 Subject: [PATCH 017/235] i2c: acpi: fix resource leak in reconfiguration device addition acpi_i2c_find_adapter_by_handle() calls bus_find_device() which takes a reference on the adapter which is never released which will result in a reference count leak and render the adapter unremovable. Make sure to put the adapter after creating the client in the same manner that we do for OF. Fixes: 525e6fabeae2 ("i2c / ACPI: add support for ACPI reconfigure notifications") Signed-off-by: Jamie Iles Acked-by: Mika Westerberg [wsa: fixed title] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index aaeeacc12121..546cc935e035 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -454,6 +454,7 @@ static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value, break; i2c_acpi_register_device(adapter, adev, &info); + put_device(&adapter->dev); break; case ACPI_RECONFIG_DEVICE_REMOVE: if (!acpi_device_enumerated(adev)) From 3bce7703c7ba648bd9e174dc1413f422b7998833 Mon Sep 17 00:00:00 2001 From: Kewei Xu Date: Fri, 17 Sep 2021 18:14:14 +0800 Subject: [PATCH 018/235] i2c: mediatek: Add OFFSET_EXT_CONF setting back In the commit be5ce0e97cc7 ("i2c: mediatek: Add i2c ac-timing adjust support"), we miss setting OFFSET_EXT_CONF register if i2c->dev_comp->timing_adjust is false, now add it back. Fixes: be5ce0e97cc7 ("i2c: mediatek: Add i2c ac-timing adjust support") Signed-off-by: Kewei Xu Reviewed-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 477480d1de6b..7d4b3eb7077a 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -41,6 +41,8 @@ #define I2C_HANDSHAKE_RST 0x0020 #define I2C_FIFO_ADDR_CLR 0x0001 #define I2C_DELAY_LEN 0x0002 +#define I2C_ST_START_CON 0x8001 +#define I2C_FS_START_CON 0x1800 #define I2C_TIME_CLR_VALUE 0x0000 #define I2C_TIME_DEFAULT_VALUE 0x0003 #define I2C_WRRD_TRANAC_VALUE 0x0002 @@ -480,6 +482,7 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) { u16 control_reg; u16 intr_stat_reg; + u16 ext_conf_val; mtk_i2c_writew(i2c, I2C_CHN_CLR_FLAG, OFFSET_START); intr_stat_reg = mtk_i2c_readw(i2c, OFFSET_INTR_STAT); @@ -518,8 +521,13 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) if (i2c->dev_comp->ltiming_adjust) mtk_i2c_writew(i2c, i2c->ltiming_reg, OFFSET_LTIMING); + if (i2c->speed_hz <= I2C_MAX_STANDARD_MODE_FREQ) + ext_conf_val = I2C_ST_START_CON; + else + ext_conf_val = I2C_FS_START_CON; + if (i2c->dev_comp->timing_adjust) { - mtk_i2c_writew(i2c, i2c->ac_timing.ext, OFFSET_EXT_CONF); + ext_conf_val = i2c->ac_timing.ext; mtk_i2c_writew(i2c, i2c->ac_timing.inter_clk_div, OFFSET_CLOCK_DIV); mtk_i2c_writew(i2c, I2C_SCL_MIS_COMP_VALUE, @@ -544,6 +552,7 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) OFFSET_HS_STA_STO_AC_TIMING); } } + mtk_i2c_writew(i2c, ext_conf_val, OFFSET_EXT_CONF); /* If use i2c pin from PMIC mt6397 side, need set PATH_DIR first */ if (i2c->have_pmic) From 06cc978d3ff226072780f74897800b33e78abb57 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 2 Oct 2021 18:23:02 +0900 Subject: [PATCH 019/235] block: genhd: fix double kfree() in __alloc_disk_node() syzbot is reporting use-after-free read at bdev_free_inode() [1], for kfree() from __alloc_disk_node() is called before bdev_free_inode() (which is called after RCU grace period) reads bdev->bd_disk and calls kfree(bdev->bd_disk). Fix use-after-free read followed by double kfree() problem by making sure that bdev->bd_disk is NULL when calling iput(). Link: https://syzkaller.appspot.com/bug?extid=8281086e8a6fbfbd952a [1] Reported-by: syzbot Signed-off-by: Tetsuo Handa Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/e6dd13c5-8db0-4392-6e78-a42ee5d2a1c4@i-love.sakura.ne.jp Signed-off-by: Jens Axboe --- block/bdev.c | 2 +- block/genhd.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/block/bdev.c b/block/bdev.c index cf2780cb44a7..485a258b0ab3 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -490,7 +490,6 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) bdev = I_BDEV(inode); mutex_init(&bdev->bd_fsfreeze_mutex); spin_lock_init(&bdev->bd_size_lock); - bdev->bd_disk = disk; bdev->bd_partno = partno; bdev->bd_inode = inode; bdev->bd_stats = alloc_percpu(struct disk_stats); @@ -498,6 +497,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) iput(inode); return NULL; } + bdev->bd_disk = disk; return bdev; } diff --git a/block/genhd.c b/block/genhd.c index 7b6e5e1cf956..496e8458c357 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1268,6 +1268,7 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, out_destroy_part_tbl: xa_destroy(&disk->part_tbl); + disk->part0->bd_disk = NULL; iput(disk->part0->bd_inode); out_free_bdi: bdi_put(disk->bdi); From bb4a23c994aebcd96c567a0be8e964d516bd4a61 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Wed, 1 Sep 2021 02:46:19 +0000 Subject: [PATCH 020/235] riscv/vdso: Refactor asm/vdso.h The asm/vdso.h will be included in vdso.lds.S in the next patch, the following cleanup is needed to avoid syntax error: 1.the declaration of sys_riscv_flush_icache() is moved into asm/syscall.h. 2.the definition of struct vdso_data is moved into kernel/vdso.c. 2.the definition of VDSO_SYMBOL is placed under "#ifndef __ASSEMBLY__". Also remove the redundant linux/types.h include. Signed-off-by: Tong Tiangen Reviewed-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/syscall.h | 1 + arch/riscv/include/asm/vdso.h | 16 ++++++++++------ arch/riscv/kernel/syscall_table.c | 1 - arch/riscv/kernel/vdso.c | 5 ++++- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index b933b1583c9f..34fbb3ea21d5 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -82,4 +82,5 @@ static inline int syscall_get_arch(struct task_struct *task) #endif } +asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t); #endif /* _ASM_RISCV_SYSCALL_H */ diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 893e47195e30..a4a979c89ea0 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -16,18 +16,22 @@ #ifdef CONFIG_MMU #include -#include +/* + * All systems with an MMU have a VDSO, but systems without an MMU don't + * support shared libraries and therefor don't have one. + */ +#ifdef CONFIG_MMU -#ifndef CONFIG_GENERIC_TIME_VSYSCALL -struct vdso_data { -}; -#endif +#ifndef __ASSEMBLY__ +#include #define VDSO_SYMBOL(base, name) \ (void __user *)((unsigned long)(base) + __vdso_##name##_offset) #endif /* CONFIG_MMU */ -asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t); +#endif /* !__ASSEMBLY__ */ + +#endif /* CONFIG_MMU */ #endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index a63c667c27b3..44b1420a2270 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #undef __SYSCALL diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 25a3b8849599..72e93d218335 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -12,10 +12,13 @@ #include #include #include +#include + #ifdef CONFIG_GENERIC_TIME_VSYSCALL #include #else -#include +struct vdso_data { +}; #endif extern char vdso_start[], vdso_end[]; From 78a743cd82a35ca0724179fc22834f06a2151fc2 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Wed, 1 Sep 2021 02:46:20 +0000 Subject: [PATCH 021/235] riscv/vdso: Move vdso data page up front As commit 601255ae3c98 ("arm64: vdso: move data page before code pages"), the same issue exists on riscv, testcase is shown below, make sure that vdso.so is bigger than page size, struct timespec tp; clock_gettime(5, &tp); printf("tv_sec: %ld, tv_nsec: %ld\n", tp.tv_sec, tp.tv_nsec); without this patch, test result : tv_sec: 0, tv_nsec: 0 with this patch, test result : tv_sec: 1629271537, tv_nsec: 748000000 Move the vdso data page in front of the VDSO area to fix the issue. Fixes: ad5d1122b82fb ("riscv: use vDSO common flow to reduce the latency of the time-related functions") Signed-off-by: Tong Tiangen Reviewed-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vdso.h | 2 ++ arch/riscv/kernel/vdso.c | 48 ++++++++++++++++++------------- arch/riscv/kernel/vdso/vdso.lds.S | 3 +- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index a4a979c89ea0..208e31bc5d1c 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -22,6 +22,8 @@ */ #ifdef CONFIG_MMU +#define __VVAR_PAGES 1 + #ifndef __ASSEMBLY__ #include diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 72e93d218335..e7bd92d8749b 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -23,6 +23,13 @@ struct vdso_data { extern char vdso_start[], vdso_end[]; +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + +#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) + static unsigned int vdso_pages __ro_after_init; static struct page **vdso_pagelist __ro_after_init; @@ -41,7 +48,7 @@ static int __init vdso_init(void) vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; vdso_pagelist = - kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); + kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL); if (unlikely(vdso_pagelist == NULL)) { pr_err("vdso: pagelist allocation failed\n"); return -ENOMEM; @@ -66,7 +73,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, unsigned long vdso_base, vdso_len; int ret; - vdso_len = (vdso_pages + 1) << PAGE_SHIFT; + BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + + vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT; mmap_write_lock(mm); vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0); @@ -75,29 +84,28 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, goto end; } + mm->context.vdso = NULL; + ret = install_special_mapping(mm, vdso_base, VVAR_SIZE, + (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]); + if (unlikely(ret)) + goto end; + + ret = + install_special_mapping(mm, vdso_base + VVAR_SIZE, + vdso_pages << PAGE_SHIFT, + (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), + vdso_pagelist); + + if (unlikely(ret)) + goto end; + /* * Put vDSO base into mm struct. We need to do this before calling * install_special_mapping or the perf counter mmap tracking code * will fail to recognise it as a vDSO (since arch_vma_name fails). */ - mm->context.vdso = (void *)vdso_base; + mm->context.vdso = (void *)vdso_base + VVAR_SIZE; - ret = - install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, - (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), - vdso_pagelist); - - if (unlikely(ret)) { - mm->context.vdso = NULL; - goto end; - } - - vdso_base += (vdso_pages << PAGE_SHIFT); - ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, - (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]); - - if (unlikely(ret)) - mm->context.vdso = NULL; end: mmap_write_unlock(mm); return ret; @@ -108,7 +116,7 @@ const char *arch_vma_name(struct vm_area_struct *vma) if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso)) return "[vdso]"; if (vma->vm_mm && (vma->vm_start == - (long)vma->vm_mm->context.vdso + PAGE_SIZE)) + (long)vma->vm_mm->context.vdso - VVAR_SIZE)) return "[vdso_data]"; return NULL; } diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index e6f558bca71b..e9111f700af0 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -3,12 +3,13 @@ * Copyright (C) 2012 Regents of the University of California */ #include +#include OUTPUT_ARCH(riscv) SECTIONS { - PROVIDE(_vdso_data = . + PAGE_SIZE); + PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text From 8bb0ab3ae7a4dbe6cf32deb830cf2bdbf5736867 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Wed, 1 Sep 2021 02:46:21 +0000 Subject: [PATCH 022/235] riscv/vdso: make arch_setup_additional_pages wait for mmap_sem for write killable riscv architectures relying on mmap_sem for write in their arch_setup_additional_pages. If the waiting task gets killed by the oom killer it would block oom_reaper from asynchronous address space reclaim and reduce the chances of timely OOM resolving. Wait for the lock in the killable mode and return with EINTR if the task got killed while waiting. Signed-off-by: Tong Tiangen Reviewed-by: Kefeng Wang Fixes: 76d2a0493a17 ("RISC-V: Init and Halt Code") Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index e7bd92d8749b..b70956d80408 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -77,7 +77,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT; - mmap_write_lock(mm); + if (mmap_write_lock_killable(mm)) + return -EINTR; + vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { ret = vdso_base; From 1f8d398e1cd8813f8ec16d55c086e8270a9c18ab Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Fri, 1 Oct 2021 15:31:10 +0200 Subject: [PATCH 023/235] ALSA: hda/realtek: Complete partial device name to avoid ambiguity The string "Clevo X170" is not enough to unambiguously identify the correct device. Fixing it so another Clevo barebone name starting with "X170" can be added without causing confusion. Signed-off-by: Werner Sembach Cc: Link: https://lore.kernel.org/r/20211001133111.428249-2-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6bf6151347d8..45e1f570f1e6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2537,7 +2537,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), - SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170SM", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x9506, "Clevo P955HQ", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x950a, "Clevo P955H[PR]", ALC1220_FIXUP_CLEVO_P950), From cc03069a397005da24f6783835c274d5aedf6043 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Fri, 1 Oct 2021 15:31:11 +0200 Subject: [PATCH 024/235] ALSA: hda/realtek: Add quirk for Clevo X170KM-G This applies a SND_PCI_QUIRK(...) to the Clevo X170KM-G barebone. This fixes the issue of the devices internal Speaker not working. Signed-off-by: Werner Sembach Cc: Link: https://lore.kernel.org/r/20211001133111.428249-3-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 45e1f570f1e6..0331fae5525e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2538,6 +2538,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170SM", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x7715, "Clevo X170KM-G", ALC1220_FIXUP_CLEVO_PB51ED), SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x9506, "Clevo P955HQ", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x950a, "Clevo P955H[PR]", ALC1220_FIXUP_CLEVO_P950), From 686cb8b9f6b46787f035afe8fbd132a74e6b1bdd Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 27 Sep 2021 15:06:14 +0800 Subject: [PATCH 025/235] bpf, s390: Fix potential memory leak about jit_data Make sure to free jit_data through kfree() in the error path. Fixes: 1c8f9b91c456 ("bpf: s390: add JIT support for multi-function programs") Signed-off-by: Tiezhu Yang Acked-by: Ilya Leoshkevich Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 840d8594437d..1a374d021e25 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1826,7 +1826,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); if (jit.addrs == NULL) { fp = orig_fp; - goto out; + goto free_addrs; } /* * Three initial passes: From a46044a92add6a400f4dada7b943b30221f7cc80 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 22 Sep 2021 15:55:12 +0200 Subject: [PATCH 026/235] s390/pci: fix zpci_zdev_put() on reserve Since commit 2a671f77ee49 ("s390/pci: fix use after free of zpci_dev") the reference count of a zpci_dev is incremented between pcibios_add_device() and pcibios_release_device() which was supposed to prevent the zpci_dev from being freed while the common PCI code has access to it. It was missed however that the handling of zPCI availability events assumed that once zpci_zdev_put() was called no later availability event would still see the device. With the previously mentioned commit however this assumption no longer holds and we must make sure that we only drop the initial long-lived reference the zPCI subsystem holds exactly once. Do so by introducing a zpci_device_reserved() function that handles when a device is reserved. Here we make sure the zpci_dev will not be considered for further events by removing it from the zpci_list. This also means that the device actually stays in the ZPCI_FN_STATE_RESERVED state between the time we know it has been reserved and the final reference going away. We thus need to consider it a real state instead of just a conceptual state after the removal. The final cleanup of PCI resources, removal from zbus, and destruction of the IOMMU stays in zpci_release_device() to make sure holders of the reference do see valid data until the release. Fixes: 2a671f77ee49 ("s390/pci: fix use after free of zpci_dev") Cc: stable@vger.kernel.org Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 2 ++ arch/s390/pci/pci.c | 45 ++++++++++++++++++++++++++---- arch/s390/pci/pci_event.c | 4 +-- drivers/pci/hotplug/s390_pci_hpc.c | 9 +----- 4 files changed, 45 insertions(+), 15 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index e4803ec51110..6b3c366af78e 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -207,6 +207,8 @@ int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh); int zpci_deconfigure_device(struct zpci_dev *zdev); +void zpci_device_reserved(struct zpci_dev *zdev); +bool zpci_is_device_configured(struct zpci_dev *zdev); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index e7e6788d75a8..b833155ce838 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -92,7 +92,7 @@ void zpci_remove_reserved_devices(void) spin_unlock(&zpci_list_lock); list_for_each_entry_safe(zdev, tmp, &remove, entry) - zpci_zdev_put(zdev); + zpci_device_reserved(zdev); } int pci_domain_nr(struct pci_bus *bus) @@ -751,6 +751,14 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state) return ERR_PTR(rc); } +bool zpci_is_device_configured(struct zpci_dev *zdev) +{ + enum zpci_state state = zdev->state; + + return state != ZPCI_FN_STATE_RESERVED && + state != ZPCI_FN_STATE_STANDBY; +} + /** * zpci_scan_configured_device() - Scan a freshly configured zpci_dev * @zdev: The zpci_dev to be configured @@ -822,6 +830,31 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) return 0; } +/** + * zpci_device_reserved() - Mark device as resverved + * @zdev: the zpci_dev that was reserved + * + * Handle the case that a given zPCI function was reserved by another system. + * After a call to this function the zpci_dev can not be found via + * get_zdev_by_fid() anymore but may still be accessible via existing + * references though it will not be functional anymore. + */ +void zpci_device_reserved(struct zpci_dev *zdev) +{ + if (zdev->has_hp_slot) + zpci_exit_slot(zdev); + /* + * Remove device from zpci_list as it is going away. This also + * makes sure we ignore subsequent zPCI events for this device. + */ + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); + zdev->state = ZPCI_FN_STATE_RESERVED; + zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + zpci_zdev_put(zdev); +} + void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); @@ -843,6 +876,12 @@ void zpci_release_device(struct kref *kref) case ZPCI_FN_STATE_STANDBY: if (zdev->has_hp_slot) zpci_exit_slot(zdev); + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); + zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + fallthrough; + case ZPCI_FN_STATE_RESERVED: if (zdev->has_resources) zpci_cleanup_bus_resources(zdev); zpci_bus_device_unregister(zdev); @@ -851,10 +890,6 @@ void zpci_release_device(struct kref *kref) default: break; } - - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); zpci_dbg(3, "rem fid:%x\n", zdev->fid); kfree(zdev); } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index c856f80cb21b..5b8d647523f9 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -140,7 +140,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) /* The 0x0304 event may immediately reserve the device */ if (!clp_get_state(zdev->fid, &state) && state == ZPCI_FN_STATE_RESERVED) { - zpci_zdev_put(zdev); + zpci_device_reserved(zdev); } } break; @@ -151,7 +151,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0308: /* Standby -> Reserved */ if (!zdev) break; - zpci_zdev_put(zdev); + zpci_device_reserved(zdev); break; default: break; diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index 014868752cd4..dcefdb42ac46 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -62,14 +62,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value) struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, hotplug_slot); - switch (zdev->state) { - case ZPCI_FN_STATE_STANDBY: - *value = 0; - break; - default: - *value = 1; - break; - } + *value = zpci_is_device_configured(zdev) ? 1 : 0; return 0; } From 1dbdd99b511c966be9147ad72991a2856ac76f22 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 4 Oct 2021 16:22:07 +0900 Subject: [PATCH 027/235] block: decode QUEUE_FLAG_HCTX_ACTIVE in debugfs output While debugging an issue we've found that $DEBUGFS/block/$disk/state doesn't decode QUEUE_FLAG_HCTX_ACTIVE but only displays its numerical value. Add QUEUE_FLAG(HCTX_ACTIVE) to the blk_queue_flag_name array so it'll get decoded properly. Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/4351076388918075bd80ef07756f9d2ce63be12c.1633332053.git.johannes.thumshirn@wdc.com Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 4b66d2776eda..3b38d15723de 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -129,6 +129,7 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(PCI_P2PDMA), QUEUE_FLAG_NAME(ZONE_RESETALL), QUEUE_FLAG_NAME(RQ_ALLOC_TIME), + QUEUE_FLAG_NAME(HCTX_ACTIVE), QUEUE_FLAG_NAME(NOWAIT), }; #undef QUEUE_FLAG_NAME From 52f57396c75acd77ebcdf3d20aed24ed248e9f79 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Mon, 23 Aug 2021 17:45:01 +0300 Subject: [PATCH 028/235] i2c: mlxcpld: Fix criteria for frequency setting Value for getting frequency capability wrongly has been taken from register offset instead of register value. Fixes: 66b0c2846ba8 ("i2c: mlxcpld: Add support for I2C bus frequency setting") Signed-off-by: Vadim Pasternak Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mlxcpld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-mlxcpld.c b/drivers/i2c/busses/i2c-mlxcpld.c index 4e0b7c2882ce..6d41c3db8a2b 100644 --- a/drivers/i2c/busses/i2c-mlxcpld.c +++ b/drivers/i2c/busses/i2c-mlxcpld.c @@ -495,7 +495,7 @@ mlxcpld_i2c_set_frequency(struct mlxcpld_i2c_priv *priv, return err; /* Set frequency only if it is not 100KHz, which is default. */ - switch ((data->reg & data->mask) >> data->bit) { + switch ((regval & data->mask) >> data->bit) { case MLXCPLD_I2C_FREQ_1000KHZ: freq = MLXCPLD_I2C_FREQ_1000KHZ_SET; break; From fa1049135c15b4930ce7ea757a81b1b78908f304 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Mon, 23 Aug 2021 17:45:02 +0300 Subject: [PATCH 029/235] i2c: mlxcpld: Modify register setting for 400KHz frequency Change setting for 400KHz frequency support by more accurate value. Fixes: 66b0c2846ba8 ("i2c: mlxcpld: Add support for I2C bus frequency setting") Signed-off-by: Vadim Pasternak Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mlxcpld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-mlxcpld.c b/drivers/i2c/busses/i2c-mlxcpld.c index 6d41c3db8a2b..015e11c4663f 100644 --- a/drivers/i2c/busses/i2c-mlxcpld.c +++ b/drivers/i2c/busses/i2c-mlxcpld.c @@ -49,7 +49,7 @@ #define MLXCPLD_LPCI2C_NACK_IND 2 #define MLXCPLD_I2C_FREQ_1000KHZ_SET 0x04 -#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0f +#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0c #define MLXCPLD_I2C_FREQ_100KHZ_SET 0x42 enum mlxcpld_i2c_frequency { From 59a4e0d5511ba61353ea9a4efdb1b86c23ecf134 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Sat, 2 Oct 2021 17:21:20 -0700 Subject: [PATCH 030/235] RISC-V: Include clone3() on rv32 As far as I can tell this should be enabled on rv32 as well, I'm not sure why it's rv64-only. checksyscalls is complaining about our lack of clone3() on rv32. Fixes: 56ac5e213933 ("riscv: enable sys_clone3 syscall for rv64") Signed-off-by: Palmer Dabbelt Reviewed-by: Arnd Bergmann Acked-by: Christian Brauner Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/unistd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h index 4b989ae15d59..8062996c2dfd 100644 --- a/arch/riscv/include/uapi/asm/unistd.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -18,9 +18,10 @@ #ifdef __LP64__ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_SYS_CLONE3 #endif /* __LP64__ */ +#define __ARCH_WANT_SYS_CLONE3 + #include /* From bb8958d5dc79acbd071397abb57b8756375fe1ce Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sat, 18 Sep 2021 18:02:21 +0200 Subject: [PATCH 031/235] riscv: Flush current cpu icache before other cpus On SiFive Unmatched, I recently fell onto the following BUG when booting: [ 0.000000] ftrace: allocating 36610 entries in 144 pages [ 0.000000] Oops - illegal instruction [#1] [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.13.1+ #5 [ 0.000000] Hardware name: SiFive HiFive Unmatched A00 (DT) [ 0.000000] epc : riscv_cpuid_to_hartid_mask+0x6/0xae [ 0.000000] ra : __sbi_rfence_v02+0xc8/0x10a [ 0.000000] epc : ffffffff80007240 ra : ffffffff80009964 sp : ffffffff81803e10 [ 0.000000] gp : ffffffff81a1ea70 tp : ffffffff8180f500 t0 : ffffffe07fe30000 [ 0.000000] t1 : 0000000000000004 t2 : 0000000000000000 s0 : ffffffff81803e60 [ 0.000000] s1 : 0000000000000000 a0 : ffffffff81a22238 a1 : ffffffff81803e10 [ 0.000000] a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 [ 0.000000] a5 : 0000000000000000 a6 : ffffffff8000989c a7 : 0000000052464e43 [ 0.000000] s2 : ffffffff81a220c8 s3 : 0000000000000000 s4 : 0000000000000000 [ 0.000000] s5 : 0000000000000000 s6 : 0000000200000100 s7 : 0000000000000001 [ 0.000000] s8 : ffffffe07fe04040 s9 : ffffffff81a22c80 s10: 0000000000001000 [ 0.000000] s11: 0000000000000004 t3 : 0000000000000001 t4 : 0000000000000008 [ 0.000000] t5 : ffffffcf04000808 t6 : ffffffe3ffddf188 [ 0.000000] status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000002 [ 0.000000] [] riscv_cpuid_to_hartid_mask+0x6/0xae [ 0.000000] [] sbi_remote_fence_i+0x1e/0x26 [ 0.000000] [] flush_icache_all+0x12/0x1a [ 0.000000] [] patch_text_nosync+0x26/0x32 [ 0.000000] [] ftrace_init_nop+0x52/0x8c [ 0.000000] [] ftrace_process_locs.isra.0+0x29c/0x360 [ 0.000000] [] ftrace_init+0x80/0x130 [ 0.000000] [] start_kernel+0x5c4/0x8f6 [ 0.000000] ---[ end trace f67eb9af4d8d492b ]--- [ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task! [ 0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]--- While ftrace is looping over a list of addresses to patch, it always failed when patching the same function: riscv_cpuid_to_hartid_mask. Looking at the backtrace, the illegal instruction is encountered in this same function. However, patch_text_nosync, after patching the instructions, calls flush_icache_range. But looking at what happens in this function: flush_icache_range -> flush_icache_all -> sbi_remote_fence_i -> __sbi_rfence_v02 -> riscv_cpuid_to_hartid_mask The icache and dcache of the current cpu are never synchronized between the patching of riscv_cpuid_to_hartid_mask and calling this same function. So fix this by flushing the current cpu's icache before asking for the other cpus to do the same. Signed-off-by: Alexandre Ghiti Fixes: fab957c11efe ("RISC-V: Atomic and Locking Code") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/cacheflush.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 094118663285..89f81067e09e 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -16,6 +16,8 @@ static void ipi_remote_fence_i(void *info) void flush_icache_all(void) { + local_flush_icache_all(); + if (IS_ENABLED(CONFIG_RISCV_SBI)) sbi_remote_fence_i(NULL); else From c5336400ca8b5f83123d965ce1eb9ac4a604cc95 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 30 Sep 2021 19:51:57 +0800 Subject: [PATCH 032/235] scsi: acornscsi: Remove scsi_cmd_to_tag() reference Commit 756fb6a895af ("scsi: acornscsi: Remove tagged queuing vestiges") mistakenly introduced a reference to function scsi_cmd_to_tag(). This function does not exist as it was removed from an earlier series version when I upstreamed the named commit - originally authored By Hannes - but this reference still remained. Fix by replacing the reference to scsi_cmd_to_tag() with scsi_cmd_to_rq(scsi_scmd)->tag, which scsi_cmd_to_tag() was a wrapper for. Link: https://lore.kernel.org/r/1633002717-79765-1-git-send-email-john.garry@huawei.com Fixes: 756fb6a895af ("scsi: acornscsi: Remove tagged queuing vestiges") Reported-by: Arnd Bergmann Tested-by: Arnd Bergmann Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/arm/acornscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c index b4cb5fb19998..0cc62c1b0825 100644 --- a/drivers/scsi/arm/acornscsi.c +++ b/drivers/scsi/arm/acornscsi.c @@ -1776,7 +1776,7 @@ int acornscsi_reconnect_finish(AS_Host *host) host->scsi.disconnectable = 0; if (host->SCpnt->device->id == host->scsi.reconnected.target && host->SCpnt->device->lun == host->scsi.reconnected.lun && - scsi_cmd_to_tag(host->SCpnt) == host->scsi.reconnected.tag) { + scsi_cmd_to_rq(host->SCpnt)->tag == host->scsi.reconnected.tag) { #if (DEBUG & (DEBUG_QUEUES|DEBUG_DISCON)) DBG(host->SCpnt, printk("scsi%d.%c: reconnected", host->host->host_no, acornscsi_target(host))); From f5ef336fd2e4c36dedae4e7ca66cf5349d6fda62 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 22 Sep 2021 12:10:59 +0300 Subject: [PATCH 033/235] scsi: ufs: core: Fix task management completion The UFS driver uses blk_mq_tagset_busy_iter() when identifying task management requests to complete, however blk_mq_tagset_busy_iter() doesn't work. blk_mq_tagset_busy_iter() only iterates requests dispatched by the block layer. That appears as if it might have started since commit 37f4a24c2469 ("blk-mq: centralise related handling into blk_mq_get_driver_tag") which removed 'data->hctx->tags->rqs[rq->tag] = rq' from blk_mq_rq_ctx_init() which gets called: blk_get_request blk_mq_alloc_request __blk_mq_alloc_request blk_mq_rq_ctx_init Since UFS task management requests are not dispatched by the block layer, hctx->tags->rqs[rq->tag] remains NULL, and since blk_mq_tagset_busy_iter() relies on finding requests using hctx->tags->rqs[rq->tag], UFS task management requests are never found by blk_mq_tagset_busy_iter(). By using blk_mq_tagset_busy_iter(), the UFS driver was relying on internal details of the block layer, which was fragile and subsequently got broken. Fix by removing the use of blk_mq_tagset_busy_iter() and having the driver keep track of task management requests. Link: https://lore.kernel.org/r/20210922091059.4040-1-adrian.hunter@intel.com Fixes: 1235fc569e0b ("scsi: ufs: core: Fix task management request completion timeout") Fixes: 69a6c269c097 ("scsi: ufs: Use blk_{get,put}_request() to allocate and free TMFs") Cc: stable@vger.kernel.org Tested-by: Bart Van Assche Reviewed-by: Bart Van Assche Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 52 +++++++++++++++++---------------------- drivers/scsi/ufs/ufshcd.h | 1 + 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 188de6f91050..95be7ecdfe10 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6377,27 +6377,6 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba, u32 intr_status) return retval; } -struct ctm_info { - struct ufs_hba *hba; - unsigned long pending; - unsigned int ncpl; -}; - -static bool ufshcd_compl_tm(struct request *req, void *priv, bool reserved) -{ - struct ctm_info *const ci = priv; - struct completion *c; - - WARN_ON_ONCE(reserved); - if (test_bit(req->tag, &ci->pending)) - return true; - ci->ncpl++; - c = req->end_io_data; - if (c) - complete(c); - return true; -} - /** * ufshcd_tmc_handler - handle task management function completion * @hba: per adapter instance @@ -6408,18 +6387,24 @@ static bool ufshcd_compl_tm(struct request *req, void *priv, bool reserved) */ static irqreturn_t ufshcd_tmc_handler(struct ufs_hba *hba) { - unsigned long flags; - struct request_queue *q = hba->tmf_queue; - struct ctm_info ci = { - .hba = hba, - }; + unsigned long flags, pending, issued; + irqreturn_t ret = IRQ_NONE; + int tag; + + pending = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL); spin_lock_irqsave(hba->host->host_lock, flags); - ci.pending = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL); - blk_mq_tagset_busy_iter(q->tag_set, ufshcd_compl_tm, &ci); + issued = hba->outstanding_tasks & ~pending; + for_each_set_bit(tag, &issued, hba->nutmrs) { + struct request *req = hba->tmf_rqs[tag]; + struct completion *c = req->end_io_data; + + complete(c); + ret = IRQ_HANDLED; + } spin_unlock_irqrestore(hba->host->host_lock, flags); - return ci.ncpl ? IRQ_HANDLED : IRQ_NONE; + return ret; } /** @@ -6542,9 +6527,9 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, ufshcd_hold(hba, false); spin_lock_irqsave(host->host_lock, flags); - blk_mq_start_request(req); task_tag = req->tag; + hba->tmf_rqs[req->tag] = req; treq->upiu_req.req_header.dword_0 |= cpu_to_be32(task_tag); memcpy(hba->utmrdl_base_addr + task_tag, treq, sizeof(*treq)); @@ -6585,6 +6570,7 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, } spin_lock_irqsave(hba->host->host_lock, flags); + hba->tmf_rqs[req->tag] = NULL; __clear_bit(task_tag, &hba->outstanding_tasks); spin_unlock_irqrestore(hba->host->host_lock, flags); @@ -9635,6 +9621,12 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) err = PTR_ERR(hba->tmf_queue); goto free_tmf_tag_set; } + hba->tmf_rqs = devm_kcalloc(hba->dev, hba->nutmrs, + sizeof(*hba->tmf_rqs), GFP_KERNEL); + if (!hba->tmf_rqs) { + err = -ENOMEM; + goto free_tmf_queue; + } /* Reset the attached device */ ufshcd_device_reset(hba); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index f0da5d3db1fa..41f6e06f9185 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -828,6 +828,7 @@ struct ufs_hba { struct blk_mq_tag_set tmf_tag_set; struct request_queue *tmf_queue; + struct request **tmf_rqs; struct uic_command *active_uic_cmd; struct mutex uic_cmd_mutex; From a013c71c6315d6e9d6364d12251b98c75c9a2861 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Oct 2021 13:38:51 +0300 Subject: [PATCH 034/235] scsi: elx: efct: Delete stray unlock statement It's not holding the lock at this stage and the IRQ "flags" are not correct so it would restore something bogus. Delete the unlock statement. Link: https://lore.kernel.org/r/20211004103851.GE25015@kili Fixes: 3e6414003bf9 ("scsi: elx: efct: SCSI I/O handling routines") Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/elx/efct/efct_scsi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/elx/efct/efct_scsi.c b/drivers/scsi/elx/efct/efct_scsi.c index 40fb3a724c76..cf2e41dd354c 100644 --- a/drivers/scsi/elx/efct/efct_scsi.c +++ b/drivers/scsi/elx/efct/efct_scsi.c @@ -32,7 +32,7 @@ efct_scsi_io_alloc(struct efct_node *node) struct efct *efct; struct efct_xport *xport; struct efct_io *io; - unsigned long flags = 0; + unsigned long flags; efct = node->efct; @@ -44,7 +44,6 @@ efct_scsi_io_alloc(struct efct_node *node) if (!io) { efc_log_err(efct, "IO alloc Failed\n"); atomic_add_return(1, &xport->io_alloc_failed_count); - spin_unlock_irqrestore(&node->active_ios_lock, flags); return NULL; } From 69a3a7bc7239170557dfc2c4ad5786a0c3d8759a Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 4 Oct 2021 16:12:10 -0700 Subject: [PATCH 035/235] scsi: lpfc: Fix memory overwrite during FC-GS I/O abort handling When an FC-GS I/O is aborted by lpfc, the driver requires a node pointer for a dereference operation. In the abort I/O routine, the driver miscasts a context pointer to the wrong data type and overwrites a single byte outside of the allocated space. This miscast is done in the abort I/O function handler because the handler works on both FC-GS and FC-LS commands. However, the code neglected to get the correct job location for the node. Fix this by acquiring the necessary node pointer from the correct job structure depending on the I/O type. Link: https://lore.kernel.org/r/20211004231210.35524-1-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 78ce38d7251c..026a1196a54d 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -12292,12 +12292,12 @@ void lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_iocbq *rspiocb) { - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1; + struct lpfc_nodelist *ndlp = NULL; IOCB_t *irsp = &rspiocb->iocb; /* ELS cmd tag completes */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "0139 Ignoring ELS cmd tag x%x completion Data: " + "0139 Ignoring ELS cmd code x%x completion Data: " "x%x x%x x%x\n", irsp->ulpIoTag, irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout); @@ -12305,10 +12305,13 @@ lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, * Deref the ndlp after free_iocb. sli_release_iocb will access the ndlp * if exchange is busy. */ - if (cmdiocb->iocb.ulpCommand == CMD_GEN_REQUEST64_CR) + if (cmdiocb->iocb.ulpCommand == CMD_GEN_REQUEST64_CR) { + ndlp = cmdiocb->context_un.ndlp; lpfc_ct_free_iocb(phba, cmdiocb); - else + } else { + ndlp = (struct lpfc_nodelist *) cmdiocb->context1; lpfc_els_free_iocb(phba, cmdiocb); + } lpfc_nlp_put(ndlp); } From 258aad75c62146453d03028a44f2f1590d58e1f6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 4 Oct 2021 16:06:08 -0500 Subject: [PATCH 036/235] scsi: iscsi: Fix iscsi_task use after free Commit d39df158518c ("scsi: iscsi: Have abort handler get ref to conn") added iscsi_get_conn()/iscsi_put_conn() calls during abort handling but then also changed the handling of the case where we detect an already completed task where we now end up doing a goto to the common put/cleanup code. This results in a iscsi_task use after free, because the common cleanup code will do a put on the iscsi_task. This reverts the goto and moves the iscsi_get_conn() to after we've checked if the iscsi_task is valid. Link: https://lore.kernel.org/r/20211004210608.9962-1-michael.christie@oracle.com Fixes: d39df158518c ("scsi: iscsi: Have abort handler get ref to conn") Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 4683c183e9d4..5bc91d34df63 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2281,11 +2281,6 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) return FAILED; } - conn = session->leadconn; - iscsi_get_conn(conn->cls_conn); - conn->eh_abort_cnt++; - age = session->age; - spin_lock(&session->back_lock); task = (struct iscsi_task *)sc->SCp.ptr; if (!task || !task->sc) { @@ -2293,8 +2288,16 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) ISCSI_DBG_EH(session, "sc completed while abort in progress\n"); spin_unlock(&session->back_lock); - goto success; + spin_unlock_bh(&session->frwd_lock); + mutex_unlock(&session->eh_mutex); + return SUCCESS; } + + conn = session->leadconn; + iscsi_get_conn(conn->cls_conn); + conn->eh_abort_cnt++; + age = session->age; + ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n", sc, task->itt); __iscsi_get_task(task); spin_unlock(&session->back_lock); From 5963e5262180129f1be7556bd96994b6e52f3178 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 4 Oct 2021 09:40:50 +0200 Subject: [PATCH 037/235] ALSA: usb-audio: Enable rate validation for Scarlett devices The Scarlett device series from Focusrite Novation seem requiring the sample rate validations as we've done for MOTU devices; otherwise the driver probes invalid audioformat entries that contain the sample rates that actually don't work, and this may result in an incomplete setup as reported recently. This patch adds the needed quirk flag for enabling the sample rate validation for Focusrite Novation devices. Fixes: fe773b8711e3 ("ALSA: usb-audio: workaround for iface reset issue") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=214493 Link: https://lore.kernel.org/r/20211004074050.28241-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 6ee6d24c847f..889c855addfc 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1900,6 +1900,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY | QUIRK_FLAG_IFACE_DELAY), VENDOR_FLG(0x07fd, /* MOTU */ QUIRK_FLAG_VALIDATE_RATES), + VENDOR_FLG(0x1235, /* Focusrite Novation */ + QUIRK_FLAG_VALIDATE_RATES), VENDOR_FLG(0x152a, /* Thesycon devices */ QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x1de7, /* Phoenix Audio */ From 0432523f4807a83902857347bd73eb817ef0a742 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 22 Sep 2021 12:16:35 +0200 Subject: [PATCH 038/235] xen/privcmd: replace kcalloc() by kvcalloc() when allocating empty pages Osstest has been suffering test failures for a little while from order-4 allocation failures, resulting from alloc_empty_pages() calling kcalloc(). As there's no need for physically contiguous space here, switch to kvcalloc(). Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/6d698901-98a4-05be-c421-bcd0713f5335@suse.com Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 720a7b7abd46..a3f2031aa3d9 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -420,7 +420,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) int rc; struct page **pages; - pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); + pages = kvcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); if (pages == NULL) return -ENOMEM; @@ -428,7 +428,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) if (rc != 0) { pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, numpgs, rc); - kfree(pages); + kvfree(pages); return -ENOMEM; } BUG_ON(vma->vm_private_data != NULL); @@ -912,7 +912,7 @@ static void privcmd_close(struct vm_area_struct *vma) else pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n", numpgs, rc); - kfree(pages); + kvfree(pages); } static vm_fault_t privcmd_fault(struct vm_fault *vmf) From e11423d6721dd63b23fb41ade5e8d0b448b17780 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 22 Sep 2021 12:17:48 +0200 Subject: [PATCH 039/235] xen/privcmd: fix error handling in mmap-resource processing xen_pfn_t is the same size as int only on 32-bit builds (and not even on Arm32). Hence pfns[] can't be used directly to read individual error values returned from xen_remap_domain_mfn_array(); every other error indicator would be skipped/ignored on 64-bit. Fixes: 3ad0876554ca ("xen/privcmd: add IOCTL_PRIVCMD_MMAP_RESOURCE") Cc: stable@vger.kernel.org Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/aa6d6a67-6889-338a-a910-51e889f792d5@suse.com Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index a3f2031aa3d9..5af2a295e32f 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -803,11 +803,12 @@ static long privcmd_ioctl_mmap_resource(struct file *file, unsigned int domid = (xdata.flags & XENMEM_rsrc_acq_caller_owned) ? DOMID_SELF : kdata.dom; - int num; + int num, *errs = (int *)pfns; + BUILD_BUG_ON(sizeof(*errs) > sizeof(*pfns)); num = xen_remap_domain_mfn_array(vma, kdata.addr & PAGE_MASK, - pfns, kdata.num, (int *)pfns, + pfns, kdata.num, errs, vma->vm_page_prot, domid, vma->vm_private_data); @@ -817,7 +818,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, unsigned int i; for (i = 0; i < num; i++) { - rc = pfns[i]; + rc = errs[i]; if (rc < 0) break; } From 97315723c463679a9ecf803d6479fca24c3efda0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 22 Sep 2021 12:18:25 +0200 Subject: [PATCH 040/235] xen/privcmd: drop "pages" parameter from xen_remap_pfn() The function doesn't use it and all of its callers say in a comment that their respective arguments are to be non-NULL only in auto-translated mode. Since xen_remap_domain_mfn_array() isn't supposed to be used by non-PV, drop the parameter there as well. It was bogusly passed as non- NULL (PRIV_VMA_LOCKED) by its only caller anyway. For xen_remap_domain_gfn_range(), otoh, it's not clear at all why this wouldn't want / might not need to gain auto-translated support down the road, so the parameter is retained there despite now remaining unused (and the only caller passing NULL); correct a respective comment as well. Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/036ad8a2-46f9-ac3d-6219-bdc93ab9e10b@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/mmu_pv.c | 2 +- drivers/xen/privcmd.c | 5 ++--- include/xen/xen-ops.h | 15 ++++++--------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 8d751939c6f3..3359c23573c5 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2398,7 +2398,7 @@ static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data) int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, - unsigned int domid, bool no_translate, struct page **pages) + unsigned int domid, bool no_translate) { int err = 0; struct remap_data rmd; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 5af2a295e32f..3369734108af 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -257,7 +257,7 @@ static long privcmd_ioctl_mmap(struct file *file, void __user *udata) LIST_HEAD(pagelist); struct mmap_gfn_state state; - /* We only support privcmd_ioctl_mmap_batch for auto translated. */ + /* We only support privcmd_ioctl_mmap_batch for non-auto-translated. */ if (xen_feature(XENFEAT_auto_translated_physmap)) return -ENOSYS; @@ -810,8 +810,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, kdata.addr & PAGE_MASK, pfns, kdata.num, errs, vma->vm_page_prot, - domid, - vma->vm_private_data); + domid); if (num < 0) rc = num; else if (num != kdata.num) { diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index db28e79b77ee..a3584a357f35 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -52,12 +52,12 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); #if defined(CONFIG_XEN_PV) int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, - unsigned int domid, bool no_translate, struct page **pages); + unsigned int domid, bool no_translate); #else static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, unsigned int domid, - bool no_translate, struct page **pages) + bool no_translate) { BUG(); return 0; @@ -134,7 +134,7 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, */ BUG_ON(err_ptr == NULL); return xen_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid, - false, pages); + false); } /* @@ -146,7 +146,6 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, * @err_ptr: Returns per-MFN error status. * @prot: page protection mask * @domid: Domain owning the pages - * @pages: Array of pages if this domain has an auto-translated physmap * * @mfn and @err_ptr may point to the same buffer, the MFNs will be * overwritten by the error codes after they are mapped. @@ -157,14 +156,13 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, static inline int xen_remap_domain_mfn_array(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *mfn, int nr, int *err_ptr, - pgprot_t prot, unsigned int domid, - struct page **pages) + pgprot_t prot, unsigned int domid) { if (xen_feature(XENFEAT_auto_translated_physmap)) return -EOPNOTSUPP; return xen_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid, - true, pages); + true); } /* xen_remap_domain_gfn_range() - map a range of foreign frames @@ -188,8 +186,7 @@ static inline int xen_remap_domain_gfn_range(struct vm_area_struct *vma, if (xen_feature(XENFEAT_auto_translated_physmap)) return -EOPNOTSUPP; - return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false, - pages); + return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false); } int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, From 9172b5c4a778da1f855b2e3780b1afabb3cfd523 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:16:15 +0200 Subject: [PATCH 041/235] xen/x86: prevent PVH type from getting clobbered Like xen_start_flags, xen_domain_type gets set before .bss gets cleared. Hence this variable also needs to be prevented from getting put in .bss, which is possible because XEN_NATIVE is an enumerator evaluating to zero. Any use prior to init_hvm_pv_info() setting the variable again would lead to wrong decisions; one such case is xenboot_console_setup() when called as a result of "earlyprintk=xen". Use __ro_after_init as more applicable than either __section(".data") or __read_mostly. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/d301677b-6f22-5ae6-bd36-458e1f323d0b@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c79bd0af2e8c..f252faf5028f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -52,9 +52,6 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); DEFINE_PER_CPU(uint32_t, xen_vcpu_id); EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); -enum xen_domain_type xen_domain_type = XEN_NATIVE; -EXPORT_SYMBOL_GPL(xen_domain_type); - unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; EXPORT_SYMBOL(machine_to_phys_mapping); unsigned long machine_to_phys_nr; @@ -69,9 +66,11 @@ __read_mostly int xen_have_vector_callback; EXPORT_SYMBOL_GPL(xen_have_vector_callback); /* - * NB: needs to live in .data because it's used by xen_prepare_pvh which runs - * before clearing the bss. + * NB: These need to live in .data or alike because they're used by + * xen_prepare_pvh() which runs before clearing the bss. */ +enum xen_domain_type __ro_after_init xen_domain_type = XEN_NATIVE; +EXPORT_SYMBOL_GPL(xen_domain_type); uint32_t xen_start_flags __section(".data") = 0; EXPORT_SYMBOL(xen_start_flags); From cae7d81a3730dfe08623f8c1083230c8d0987639 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:16:47 +0200 Subject: [PATCH 042/235] xen/x86: allow PVH Dom0 without XEN_PV=y Decouple XEN_DOM0 from XEN_PV, converting some existing uses of XEN_DOM0 to a new XEN_PV_DOM0. (I'm not convinced all are really / should really be PV-specific, but for starters I've tried to be conservative.) For PVH Dom0 the hypervisor populates MADT with only x2APIC entries, so without x2APIC support enabled in the kernel things aren't going to work very well. (As opposed, DomU-s would only ever see LAPIC entries in MADT as of now.) Note that this then requires PVH Dom0 to be 64-bit, as X86_X2APIC depends on X86_64. In the course of this xen_running_on_version_or_later() needs to be available more broadly. Move it from a PV-specific to a generic file, considering that what it does isn't really PV-specific at all anyway. Note that xen/interface/version.h cannot be included on its own; in enlighten.c, which uses SCHEDOP_* anyway, include xen/interface/sched.h first to resolve the apparently sole missing type (xen_ulong_t). Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/983bb72f-53df-b6af-14bd-5e088bd06a08@suse.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/pci.h | 11 +++++++---- arch/x86/pci/xen.c | 15 +++++++++------ arch/x86/xen/Kconfig | 19 ++++++++++++------- arch/x86/xen/Makefile | 2 +- arch/x86/xen/enlighten.c | 17 +++++++++++++++++ arch/x86/xen/enlighten_pv.c | 16 ---------------- arch/x86/xen/xen-ops.h | 2 +- drivers/xen/Kconfig | 4 ++-- 8 files changed, 49 insertions(+), 37 deletions(-) diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 3506d8c598c1..4557f7cb0fa6 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -14,16 +14,19 @@ static inline int pci_xen_hvm_init(void) return -1; } #endif -#if defined(CONFIG_XEN_DOM0) +#ifdef CONFIG_XEN_PV_DOM0 int __init pci_xen_initial_domain(void); -int xen_find_device_domain_owner(struct pci_dev *dev); -int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); -int xen_unregister_device_domain_owner(struct pci_dev *dev); #else static inline int __init pci_xen_initial_domain(void) { return -1; } +#endif +#ifdef CONFIG_XEN_DOM0 +int xen_find_device_domain_owner(struct pci_dev *dev); +int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); +int xen_unregister_device_domain_owner(struct pci_dev *dev); +#else static inline int xen_find_device_domain_owner(struct pci_dev *dev) { return -1; diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 3d41a09c2c14..5debe4ac6f81 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -113,7 +113,7 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, false /* no mapping of GSI to PIRQ */); } -#ifdef CONFIG_XEN_DOM0 +#ifdef CONFIG_XEN_PV_DOM0 static int xen_register_gsi(u32 gsi, int triggering, int polarity) { int rc, irq; @@ -261,7 +261,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return irq; } -#ifdef CONFIG_XEN_DOM0 +#ifdef CONFIG_XEN_PV_DOM0 static bool __read_mostly pci_seg_supported = true; static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) @@ -375,10 +375,10 @@ static void xen_initdom_restore_msi_irqs(struct pci_dev *dev) WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret); } } -#else /* CONFIG_XEN_DOM0 */ +#else /* CONFIG_XEN_PV_DOM0 */ #define xen_initdom_setup_msi_irqs NULL #define xen_initdom_restore_msi_irqs NULL -#endif /* !CONFIG_XEN_DOM0 */ +#endif /* !CONFIG_XEN_PV_DOM0 */ static void xen_teardown_msi_irqs(struct pci_dev *dev) { @@ -555,7 +555,7 @@ int __init pci_xen_hvm_init(void) return 0; } -#ifdef CONFIG_XEN_DOM0 +#ifdef CONFIG_XEN_PV_DOM0 int __init pci_xen_initial_domain(void) { int irq; @@ -583,6 +583,9 @@ int __init pci_xen_initial_domain(void) } return 0; } +#endif + +#ifdef CONFIG_XEN_DOM0 struct xen_device_domain_owner { domid_t domain; @@ -656,4 +659,4 @@ int xen_unregister_device_domain_owner(struct pci_dev *dev) return 0; } EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner); -#endif +#endif /* CONFIG_XEN_DOM0 */ diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index afc1da68b06d..6bcd3d8ca6ac 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -43,13 +43,9 @@ config XEN_PV_SMP def_bool y depends on XEN_PV && SMP -config XEN_DOM0 - bool "Xen PV Dom0 support" - default y - depends on XEN_PV && PCI_XEN && SWIOTLB_XEN - depends on X86_IO_APIC && ACPI && PCI - help - Support running as a Xen PV Dom0 guest. +config XEN_PV_DOM0 + def_bool y + depends on XEN_PV && XEN_DOM0 config XEN_PVHVM def_bool y @@ -86,3 +82,12 @@ config XEN_PVH def_bool n help Support for running as a Xen PVH guest. + +config XEN_DOM0 + bool "Xen Dom0 support" + default XEN_PV + depends on (XEN_PV && SWIOTLB_XEN) || (XEN_PVH && X86_64) + depends on X86_IO_APIC && ACPI && PCI + select X86_X2APIC if XEN_PVH && X86_64 + help + Support running as a Xen Dom0 guest. diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 40b5779fce21..4953260e281c 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -45,7 +45,7 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o -obj-$(CONFIG_XEN_DOM0) += vga.o +obj-$(CONFIG_XEN_PV_DOM0) += vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f252faf5028f..501466038075 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -10,6 +10,8 @@ #include #include +#include +#include #include #include @@ -257,6 +259,21 @@ int xen_vcpu_setup(int cpu) return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0); } +/* Check if running on Xen version (major, minor) or later */ +bool xen_running_on_version_or_later(unsigned int major, unsigned int minor) +{ + unsigned int version; + + if (!xen_domain()) + return false; + + version = HYPERVISOR_xen_version(XENVER_version, NULL); + if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) || + ((version >> 16) > major)) + return true; + return false; +} + void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 6e0d0754f94f..e46953249e39 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -142,22 +142,6 @@ static void __init xen_pv_guest_late_init(void) #endif } -/* Check if running on Xen version (major, minor) or later */ -bool -xen_running_on_version_or_later(unsigned int major, unsigned int minor) -{ - unsigned int version; - - if (!xen_domain()) - return false; - - version = HYPERVISOR_xen_version(XENVER_version, NULL); - if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) || - ((version >> 16) > major)) - return true; - return false; -} - static __read_mostly unsigned int cpuid_leaf5_ecx_val; static __read_mostly unsigned int cpuid_leaf5_edx_val; diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 8d7ec49a35fb..6d9b2b3e0c1c 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -109,7 +109,7 @@ static inline void xen_uninit_lock_cpu(int cpu) struct dom0_vga_console_info; -#ifdef CONFIG_XEN_DOM0 +#ifdef CONFIG_XEN_PV_DOM0 void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); #else static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 22f5aff0c136..1b2c3aca6887 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -241,7 +241,7 @@ config XEN_PRIVCMD config XEN_ACPI_PROCESSOR tristate "Xen ACPI processor" - depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ + depends on XEN && XEN_PV_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ default m help This ACPI processor uploads Power Management information to the Xen @@ -259,7 +259,7 @@ config XEN_ACPI_PROCESSOR config XEN_MCE_LOG bool "Xen platform mcelog" - depends on XEN_DOM0 && X86_MCE + depends on XEN_PV_DOM0 && X86_MCE help Allow kernel fetching MCE error from Xen platform and converting it into Linux mcelog format for mcelog tools From adf330a7cd64a8bb959dc48a9c282285c1d5b4d5 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:17:41 +0200 Subject: [PATCH 043/235] xen/x86: make "earlyprintk=xen" work better for PVH Dom0 The xen_hvm_early_write() path better wouldn't be taken in this case; while port 0xE9 can be used, the hypercall path is quite a bit more efficient. Put that first, as it may also work for DomU-s (see also xen_raw_console_write()). While there also bail from the function when the first domU_write_console() failed - later ones aren't going to succeed. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/4fd89dcb-cfc5-c740-2e94-bb271e432d3e@suse.com Signed-off-by: Juergen Gross --- drivers/tty/hvc/hvc_xen.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 92c9a476defc..ee7ea8d762fa 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -621,17 +621,16 @@ static void xenboot_write_console(struct console *console, const char *string, unsigned int linelen, off = 0; const char *pos; + if (dom0_write_console(0, string, len) >= 0) + return; + if (!xen_pv_domain()) { xen_hvm_early_write(0, string, len); return; } - dom0_write_console(0, string, len); - - if (xen_initial_domain()) + if (domU_write_console(0, "(early) ", 8) < 0) return; - - domU_write_console(0, "(early) ", 8); while (off < len && NULL != (pos = strchr(string+off, '\n'))) { linelen = pos-string+off; if (off + linelen > len) From 8e24d9bfc44d3bd884669ef8b344112fe41c9826 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:18:12 +0200 Subject: [PATCH 044/235] xen/x86: allow "earlyprintk=xen" to work for PV Dom0 With preferred consoles "tty" and "hvc" announced as preferred, registering "xenboot" early won't result in use of the console: It also needs to be registered as preferred. Generalize this from being DomU- only so far. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/d4a34540-a476-df2c-bca6-732d0d58c5f0@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index e46953249e39..b1c0e06dcaa8 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1348,7 +1348,6 @@ asmlinkage __visible void __init xen_start_kernel(void) boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN; if (!xen_initial_domain()) { - add_preferred_console("xenboot", 0, NULL); if (pci_xen) x86_init.pci.arch_init = pci_xen_init; x86_platform.set_legacy_features = @@ -1393,6 +1392,7 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif } + add_preferred_console("xenboot", 0, NULL); if (!boot_params.screen_info.orig_video_isVGA) add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); From 42bc9716bc1df21b55b303fe243f8575b3af24f9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:18:45 +0200 Subject: [PATCH 045/235] xen/x86: make "earlyprintk=xen" work for HVM/PVH DomU xenboot_write_console() is dealing with these quite fine so I don't see why xenboot_console_setup() would return -ENOENT in this case. Adjust documentation accordingly. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/3d212583-700e-8b2d-727a-845ef33ac265@suse.com Signed-off-by: Juergen Gross --- Documentation/admin-guide/kernel-parameters.txt | 2 +- drivers/tty/hvc/hvc_xen.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdb22006f713..259d03fc38d1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1255,7 +1255,7 @@ The VGA and EFI output is eventually overwritten by the real console. - The xen output can only be used by Xen PV guests. + The xen option can only be used in Xen domains. The sclp output can only be used on s390. diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index ee7ea8d762fa..f1c99b4b89b2 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -607,10 +607,8 @@ static int __init xenboot_console_setup(struct console *console, char *string) { static struct xencons_info xenboot; - if (xen_initial_domain()) + if (xen_initial_domain() || !xen_pv_domain()) return 0; - if (!xen_pv_domain()) - return -ENODEV; return xencons_info_pv_init(&xenboot, 0); } From 4d1ab432acc9391a5ae13c629dbb5882c29fd1b0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:19:16 +0200 Subject: [PATCH 046/235] xen/x86: generalize preferred console model from PV to PVH Dom0 Without announcing hvc0 as preferred it won't get used as long as tty0 gets registered earlier. This is particularly problematic with there not being any screen output for PVH Dom0 when the screen is in graphics mode, as the necessary information doesn't get conveyed yet from the hypervisor. Follow PV's model, but be conservative and do this for Dom0 only for now. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/582328b6-c86c-37f3-d802-5539b7a86736@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 12 ++++++++++++ arch/x86/xen/enlighten_pv.c | 8 +------- arch/x86/xen/enlighten_pvh.c | 3 +++ arch/x86/xen/xen-ops.h | 2 ++ 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 501466038075..c5b1fd606c6f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -3,6 +3,7 @@ #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG #include #endif +#include #include #include #include @@ -18,6 +19,7 @@ #include #include #include +#include #include "xen-ops.h" #include "smp.h" @@ -274,6 +276,16 @@ bool xen_running_on_version_or_later(unsigned int major, unsigned int minor) return false; } +void __init xen_add_preferred_consoles(void) +{ + add_preferred_console("xenboot", 0, NULL); + if (!boot_params.screen_info.orig_video_isVGA) + add_preferred_console("tty", 0, NULL); + add_preferred_console("hvc", 0, NULL); + if (boot_params.screen_info.orig_video_isVGA) + add_preferred_console("tty", 0, NULL); +} + void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index b1c0e06dcaa8..fb3095ba3bf1 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -1392,12 +1391,7 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif } - add_preferred_console("xenboot", 0, NULL); - if (!boot_params.screen_info.orig_video_isVGA) - add_preferred_console("tty", 0, NULL); - add_preferred_console("hvc", 0, NULL); - if (boot_params.screen_info.orig_video_isVGA) - add_preferred_console("tty", 0, NULL); + xen_add_preferred_consoles(); #ifdef CONFIG_PCI /* PCI BIOS service won't work from a PV guest. */ diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 0d5e34b9e6f9..9029c5ab60e2 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -36,6 +36,9 @@ void __init xen_pvh_init(struct boot_params *boot_params) pfn = __pa(hypercall_page); wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + if (xen_initial_domain()) + x86_init.oem.arch_setup = xen_add_preferred_consoles; + xen_efi_init(boot_params); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 6d9b2b3e0c1c..524d1243a0ce 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -118,6 +118,8 @@ static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, } #endif +void xen_add_preferred_consoles(void); + void __init xen_init_apic(void); #ifdef CONFIG_XEN_EFI From 079c4baa2aad05e8007faa24b2411c1457f60d74 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:19:48 +0200 Subject: [PATCH 047/235] xen/x86: hook up xen_banner() also for PVH This was effectively lost while dropping PVHv1 code. Move the function and arrange for it to be called the same way as done in PV mode. Clearly this then needs re-introducing the XENFEAT_mmu_pt_update_preserve_ad check that was recently removed, as that's a PV-only feature. Since the string pointed at by pv_info.name describes the mode, drop "paravirtualized" from the log message while moving the code. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/de03054d-a20d-2114-bb86-eec28e17b3b8@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 14 ++++++++++++++ arch/x86/xen/enlighten_pv.c | 11 ----------- arch/x86/xen/enlighten_pvh.c | 1 + arch/x86/xen/xen-ops.h | 1 + 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c5b1fd606c6f..57efb484e7df 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -261,6 +261,20 @@ int xen_vcpu_setup(int cpu) return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0); } +void __init xen_banner(void) +{ + unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); + struct xen_extraversion extra; + + HYPERVISOR_xen_version(XENVER_extraversion, &extra); + + pr_info("Booting kernel on %s\n", pv_info.name); + pr_info("Xen version: %u.%u%s%s\n", + version >> 16, version & 0xffff, extra.extraversion, + xen_feature(XENFEAT_mmu_pt_update_preserve_ad) + ? " (preserve-AD)" : ""); +} + /* Check if running on Xen version (major, minor) or later */ bool xen_running_on_version_or_later(unsigned int major, unsigned int minor) { diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index fb3095ba3bf1..a7b7d674f500 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -108,17 +108,6 @@ struct tls_descs { */ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); -static void __init xen_banner(void) -{ - unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); - struct xen_extraversion extra; - HYPERVISOR_xen_version(XENVER_extraversion, &extra); - - pr_info("Booting paravirtualized kernel on %s\n", pv_info.name); - pr_info("Xen version: %d.%d%s (preserve-AD)\n", - version >> 16, version & 0xffff, extra.extraversion); -} - static void __init xen_pv_init_platform(void) { populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP)); diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 9029c5ab60e2..b20bd5439837 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -38,6 +38,7 @@ void __init xen_pvh_init(struct boot_params *boot_params) if (xen_initial_domain()) x86_init.oem.arch_setup = xen_add_preferred_consoles; + x86_init.oem.banner = xen_banner; xen_efi_init(boot_params); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 524d1243a0ce..8bc8b72a205d 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -51,6 +51,7 @@ void __init xen_remap_memory(void); phys_addr_t __init xen_find_free_area(phys_addr_t size); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); +void xen_banner(void); void xen_enable_sysenter(void); void xen_enable_syscall(void); void xen_vcpu_restore(void); From 59f7e5374175ce5d776efeb12a1e61cd6b1f82fb Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:20:42 +0200 Subject: [PATCH 048/235] x86/PVH: adjust function/data placement Two of the variables can live in .init.data, allowing the open-coded placing in .data to go away. Another "variable" is used to communicate a size value only to very early assembly code, which hence can be both const and live in .init.*. Additionally two functions were lacking __init annotations. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/3b0bb22e-43f4-e459-c5cb-169f996b5669@suse.com Signed-off-by: Juergen Gross --- arch/x86/platform/pvh/enlighten.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 9ac7457f52a3..ed0442e35434 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -16,15 +16,15 @@ /* * PVH variables. * - * pvh_bootparams and pvh_start_info need to live in the data segment since + * pvh_bootparams and pvh_start_info need to live in a data segment since * they are used after startup_{32|64}, which clear .bss, are invoked. */ -struct boot_params pvh_bootparams __section(".data"); -struct hvm_start_info pvh_start_info __section(".data"); +struct boot_params __initdata pvh_bootparams; +struct hvm_start_info __initdata pvh_start_info; -unsigned int pvh_start_info_sz = sizeof(pvh_start_info); +const unsigned int __initconst pvh_start_info_sz = sizeof(pvh_start_info); -static u64 pvh_get_root_pointer(void) +static u64 __init pvh_get_root_pointer(void) { return pvh_start_info.rsdp_paddr; } @@ -107,7 +107,7 @@ void __init __weak xen_pvh_init(struct boot_params *boot_params) BUG(); } -static void hypervisor_specific_init(bool xen_guest) +static void __init hypervisor_specific_init(bool xen_guest) { if (xen_guest) xen_pvh_init(&pvh_bootparams); From 9c11112c0ec7ec322cd495320c3ab9fa8bdc1bbc Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:21:13 +0200 Subject: [PATCH 049/235] xen/x86: adjust data placement Both xen_pvh and xen_start_flags get written just once early during init. Using the respective annotation then allows the open-coded placing in .data to go away. Additionally the former, like the latter, wants exporting, or else xen_pvh_domain() can't be used from modules. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/8155ed26-5a1d-c06f-42d8-596d26e75849@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/enlighten_pvh.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 57efb484e7df..95d970359e17 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,7 +75,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback); */ enum xen_domain_type __ro_after_init xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); -uint32_t xen_start_flags __section(".data") = 0; +uint32_t __ro_after_init xen_start_flags; EXPORT_SYMBOL(xen_start_flags); /* diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index b20bd5439837..bcae606bbc5c 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include @@ -18,10 +19,11 @@ /* * PVH variables. * - * The variable xen_pvh needs to live in the data segment since it is used + * The variable xen_pvh needs to live in a data segment since it is used * after startup_{32|64} is invoked, which will clear the .bss segment. */ -bool xen_pvh __section(".data") = 0; +bool __ro_after_init xen_pvh; +EXPORT_SYMBOL_GPL(xen_pvh); void __init xen_pvh_init(struct boot_params *boot_params) { From 5aec98913095ed3b4424ed6c5fdeb6964e9734da Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 5 Oct 2021 14:35:14 +0800 Subject: [PATCH 050/235] ALSA: hda/realtek - ALC236 headset MIC recording issue In power save mode, the recording voice from headset mic will 2s more delay. Add this patch will solve this issue. [ minor coding style fix by tiwai ] Signed-off-by: Kailang Yang Tested-by: Kai-Heng Feng Cc: Link: https://lore.kernel.org/r/ccb0cdd5bbd7486eabbd8d987d384cb0@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0331fae5525e..0689f43fc7af 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -526,6 +526,8 @@ static void alc_shutup_pins(struct hda_codec *codec) struct alc_spec *spec = codec->spec; switch (codec->core.vendor_id) { + case 0x10ec0236: + case 0x10ec0256: case 0x10ec0283: case 0x10ec0286: case 0x10ec0288: @@ -3529,7 +3531,8 @@ static void alc256_shutup(struct hda_codec *codec) /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly * when booting with headset plugged. So skip setting it for the codec alc257 */ - if (codec->core.vendor_id != 0x10ec0257) + if (spec->codec_variant != ALC269_TYPE_ALC257 && + spec->codec_variant != ALC269_TYPE_ALC256) alc_update_coef_idx(codec, 0x46, 0, 3 << 12); if (!spec->no_shutup_pins) From ffac30be2a06b2516b2ce2afa2dcb2cf8af65a52 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 6 Sep 2021 12:12:59 +0800 Subject: [PATCH 051/235] drm/i915/audio: Use BIOS provided value for RKL HDA link Commit 989634fb49ad ("drm/i915/audio: set HDA link parameters in driver") makes HDMI audio on Lenovo P350 disappear. So in addition to TGL, extend the logic to RKL to use BIOS provided value to fix the regression. Fixes: 989634fb49ad ("drm/i915/audio: set HDA link parameters in driver") Reviewed-by: Kai Vehmanen Signed-off-by: Kai-Heng Feng Link: https://patchwork.freedesktop.org/patch/msgid/20210906041300.508458-1-kai.heng.feng@canonical.com (cherry picked from commit c6b40ee330fe09b332715bb7ec1467e4fcbe2e65) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_audio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 532237588511..4e0f96bf6158 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -1308,8 +1308,9 @@ static void i915_audio_component_init(struct drm_i915_private *dev_priv) else aud_freq = aud_freq_init; - /* use BIOS provided value for TGL unless it is a known bad value */ - if (IS_TIGERLAKE(dev_priv) && aud_freq_init != AUD_FREQ_TGL_BROKEN) + /* use BIOS provided value for TGL and RKL unless it is a known bad value */ + if ((IS_TIGERLAKE(dev_priv) || IS_ROCKETLAKE(dev_priv)) && + aud_freq_init != AUD_FREQ_TGL_BROKEN) aud_freq = aud_freq_init; drm_dbg_kms(&dev_priv->drm, "use AUD_FREQ_CNTRL of 0x%x (init value 0x%x)\n", From 0c94777386495d6e0a9735d48ffd2abb8d680d7f Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 28 Jul 2021 14:41:31 +0200 Subject: [PATCH 052/235] drm/i915: Fix runtime pm handling in i915_gem_shrink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We forgot to call intel_runtime_pm_put on error, fix it! Signed-off-by: Maarten Lankhorst Fixes: cf41a8f1dc1e ("drm/i915: Finally remove obj->mm.lock.") Cc: Thomas Hellström Cc: Daniel Vetter Cc: # v5.13+ Reviewed-by: Thomas Hellström Reviewed-by: Niranjana Vishwanathapura Link: https://patchwork.freedesktop.org/patch/msgid/20210830121006.2978297-9-maarten.lankhorst@linux.intel.com (cherry picked from commit 239f3c2ee18376587026efecaea5250fa5926d20) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index e382b7f2353b..5ab136ffdeb2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -118,7 +118,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; - int err; + int err = 0; /* CHV + VTD workaround use stop_machine(); need to trylock vm->mutex */ bool trylock_vm = !ww && intel_vm_no_concurrent_access_wa(i915); @@ -242,12 +242,15 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, list_splice_tail(&still_in_list, phase->list); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); if (err) - return err; + break; } if (shrink & I915_SHRINK_BOUND) intel_runtime_pm_put(&i915->runtime_pm, wakeref); + if (err) + return err; + if (nr_scanned) *nr_scanned += scanned; return count; From a532cde31de3cae6ed60e60d6f9379771f652809 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 29 Sep 2021 16:28:27 +0300 Subject: [PATCH 053/235] drm/i915/tc: Fix TypeC port init/resume time sanitization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atm during driver loading and system resume TypeC ports are accessed before their HW/SW state is synced. Move the TypeC port sanitization to the encoder's sync_state hook to fix this. v2: Handle the encoder disabled case in gen11_dsi_sync_state() as well (Jose, Jani) Fixes: f9e76a6e68d3 ("drm/i915: Add an encoder hook to sanitize its state during init/resume") Cc: José Roberto de Souza Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210929132833.2253961-1-imre.deak@intel.com (cherry picked from commit 7194dc998dfffca096c30b3cd39625158608992d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/icl_dsi.c | 10 ++++++++-- drivers/gpu/drm/i915/display/intel_ddi.c | 8 +++++++- drivers/gpu/drm/i915/display/intel_display.c | 20 +++++--------------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 43ec7fcd3f5d..a3eae3f3eadc 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1577,8 +1577,14 @@ static void gen11_dsi_sync_state(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *intel_crtc; + enum pipe pipe; + + if (!crtc_state) + return; + + intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + pipe = intel_crtc->pipe; /* wa verify 1409054076:icl,jsl,ehl */ if (DISPLAY_VER(dev_priv) == 11 && pipe == PIPE_B && diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 9903a78df896..bd184325d0c7 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3807,7 +3807,13 @@ void hsw_ddi_get_config(struct intel_encoder *encoder, static void intel_ddi_sync_state(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - if (intel_crtc_has_dp_encoder(crtc_state)) + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + if (intel_phy_is_tc(i915, phy)) + intel_tc_port_sanitize(enc_to_dig_port(encoder)); + + if (crtc_state && intel_crtc_has_dp_encoder(crtc_state)) intel_dp_sync_state(encoder, crtc_state); } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 134a6acbd8fb..17f44ffea586 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -13082,18 +13082,16 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) readout_plane_state(dev_priv); for_each_intel_encoder(dev, encoder) { + struct intel_crtc_state *crtc_state = NULL; + pipe = 0; if (encoder->get_hw_state(encoder, &pipe)) { - struct intel_crtc_state *crtc_state; - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); crtc_state = to_intel_crtc_state(crtc->base.state); encoder->base.crtc = &crtc->base; intel_encoder_get_config(encoder, crtc_state); - if (encoder->sync_state) - encoder->sync_state(encoder, crtc_state); /* read out to slave crtc as well for bigjoiner */ if (crtc_state->bigjoiner) { @@ -13108,6 +13106,9 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) encoder->base.crtc = NULL; } + if (encoder->sync_state) + encoder->sync_state(encoder, crtc_state); + drm_dbg_kms(&dev_priv->drm, "[ENCODER:%d:%s] hw state readout: %s, pipe %c\n", encoder->base.base.id, encoder->base.name, @@ -13390,17 +13391,6 @@ intel_modeset_setup_hw_state(struct drm_device *dev, intel_modeset_readout_hw_state(dev); /* HW state is read out, now we need to sanitize this mess. */ - - /* Sanitize the TypeC port mode upfront, encoders depend on this */ - for_each_intel_encoder(dev, encoder) { - enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - - /* We need to sanitize only the MST primary port. */ - if (encoder->type != INTEL_OUTPUT_DP_MST && - intel_phy_is_tc(dev_priv, phy)) - intel_tc_port_sanitize(enc_to_dig_port(encoder)); - } - get_encoder_power_domains(dev_priv); if (HAS_PCH_IBX(dev_priv)) From fdddf8c3a477f77b3a623f220e78d45e89fc50d5 Mon Sep 17 00:00:00 2001 From: Lukasz Majczak Date: Thu, 30 Sep 2021 15:46:06 +0200 Subject: [PATCH 054/235] drm/i915/bdb: Fix version check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With patch "drm/i915/vbt: Fix backlight parsing for VBT 234+" the size of bdb_lfp_backlight_data structure has been increased, causing if-statement in the parse_lfp_backlight function that comapres this structure size to the one retrieved from BDB, always to fail for older revisions. This patch calculates expected size of the structure for a given BDB version and compares it with the value gathered from BDB. Tested on Chromebook Pixelbook (Nocturne) (reports bdb->version = 221) Fixes: d381baad29b4 ("drm/i915/vbt: Fix backlight parsing for VBT 234+") Tested-by: Lukasz Majczak Signed-off-by: Lukasz Majczak Reviewed-by: José Roberto de Souza Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210930134606.227234-1-lma@semihalf.com (cherry picked from commit 4378daf5d04eed59724e6d0e74755e17dce2e105) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bios.c | 22 ++++++++++++++----- drivers/gpu/drm/i915/display/intel_vbt_defs.h | 5 +++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index e86e6ed2d3bf..fd71346aac7b 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -451,13 +451,23 @@ parse_lfp_backlight(struct drm_i915_private *i915, } i915->vbt.backlight.type = INTEL_BACKLIGHT_DISPLAY_DDI; - if (bdb->version >= 191 && - get_blocksize(backlight_data) >= sizeof(*backlight_data)) { - const struct lfp_backlight_control_method *method; + if (bdb->version >= 191) { + size_t exp_size; - method = &backlight_data->backlight_control[panel_type]; - i915->vbt.backlight.type = method->type; - i915->vbt.backlight.controller = method->controller; + if (bdb->version >= 236) + exp_size = sizeof(struct bdb_lfp_backlight_data); + else if (bdb->version >= 234) + exp_size = EXP_BDB_LFP_BL_DATA_SIZE_REV_234; + else + exp_size = EXP_BDB_LFP_BL_DATA_SIZE_REV_191; + + if (get_blocksize(backlight_data) >= exp_size) { + const struct lfp_backlight_control_method *method; + + method = &backlight_data->backlight_control[panel_type]; + i915->vbt.backlight.type = method->type; + i915->vbt.backlight.controller = method->controller; + } } i915->vbt.backlight.pwm_freq_hz = entry->pwm_freq_hz; diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 330077c2e588..a2108a8f544d 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -814,6 +814,11 @@ struct lfp_brightness_level { u16 reserved; } __packed; +#define EXP_BDB_LFP_BL_DATA_SIZE_REV_191 \ + offsetof(struct bdb_lfp_backlight_data, brightness_level) +#define EXP_BDB_LFP_BL_DATA_SIZE_REV_234 \ + offsetof(struct bdb_lfp_backlight_data, brightness_precision_bits) + struct bdb_lfp_backlight_data { u8 entry_size; struct lfp_backlight_data_entry data[16]; From b2d73debfdc16b742e64948dc4461876af3f8c10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 30 Sep 2021 22:09:42 +0300 Subject: [PATCH 055/235] drm/i915: Extend the async flip VT-d w/a to skl/bxt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like skl/bxt/derivatives also need the plane stride stretch w/a when using async flips and VT-d is enabled, or else we get corruption on screen. To my surprise this was even documented in bspec, but only as a note on the CHICHKEN_PIPESL register description rather than on the w/a list. So very much the same thing as on HSW/BDW, except the bits moved yet again. Cc: stable@vger.kernel.org Cc: Karthik B S Fixes: 55ea1cb178ef ("drm/i915: Enable async flips in i915") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210930190943.17547-1-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper (cherry picked from commit d08df3b0bdb25546e86dc9a6c4e3ec0c43832299) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 5 +++++ drivers/gpu/drm/i915/intel_pm.c | 12 ++++++++++++ 2 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 664970f2bc62..4037030f0984 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8193,6 +8193,11 @@ enum { #define HSW_SPR_STRETCH_MAX_X1 REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 3) #define HSW_FBCQ_DIS (1 << 22) #define BDW_DPRS_MASK_VBLANK_SRD (1 << 0) +#define SKL_PLANE1_STRETCH_MAX_MASK REG_GENMASK(1, 0) +#define SKL_PLANE1_STRETCH_MAX_X8 REG_FIELD_PREP(SKL_PLANE1_STRETCH_MAX_MASK, 0) +#define SKL_PLANE1_STRETCH_MAX_X4 REG_FIELD_PREP(SKL_PLANE1_STRETCH_MAX_MASK, 1) +#define SKL_PLANE1_STRETCH_MAX_X2 REG_FIELD_PREP(SKL_PLANE1_STRETCH_MAX_MASK, 2) +#define SKL_PLANE1_STRETCH_MAX_X1 REG_FIELD_PREP(SKL_PLANE1_STRETCH_MAX_MASK, 3) #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B) #define _CHICKEN_TRANS_A 0x420c0 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 65bc3709f54c..a725792d5248 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -76,6 +76,8 @@ struct intel_wm_config { static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) { + enum pipe pipe; + if (HAS_LLC(dev_priv)) { /* * WaCompressedResourceDisplayNewHashMode:skl,kbl @@ -89,6 +91,16 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) SKL_DE_COMPRESSED_HASH_MODE); } + for_each_pipe(dev_priv, pipe) { + /* + * "Plane N strech max must be programmed to 11b (x1) + * when Async flips are enabled on that plane." + */ + if (!IS_GEMINILAKE(dev_priv) && intel_vtd_active()) + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe), + SKL_PLANE1_STRETCH_MAX_MASK, SKL_PLANE1_STRETCH_MAX_X1); + } + /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */ intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1, intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); From 3ea75b3f57e5b2837b980a2cbcf014773d00ae51 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Oct 2021 13:20:28 +0200 Subject: [PATCH 056/235] usb: xhci: tegra: mark PM functions as __maybe_unused The added #ifdefs in the PM rework were almost correct, but still cause warnings in some randconfig builds: drivers/usb/host/xhci-tegra.c:2147:12: error: 'tegra_xusb_resume' defined but not used [-Werror=unused-function] 2147 | static int tegra_xusb_resume(struct device *dev) | ^~~~~~~~~~~~~~~~~ drivers/usb/host/xhci-tegra.c:2105:12: error: 'tegra_xusb_suspend' defined but not used [-Werror=unused-function] 2105 | static int tegra_xusb_suspend(struct device *dev) Replace the #ifdef checks with simpler __maybe_unused annotations to reliably shut up these warnings. Link: https://lore.kernel.org/all/20210421135613.3560777-2-arnd@kernel.org/ Fixes: 971ee247060d ("usb: xhci: tegra: Enable ELPG for runtime/system PM") Reviewed-by: JC Kuo Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211005112057.2700888-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index 575fa89a783f..1bf494b649bd 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1787,7 +1787,6 @@ static int tegra_xusb_remove(struct platform_device *pdev) return 0; } -#if IS_ENABLED(CONFIG_PM) || IS_ENABLED(CONFIG_PM_SLEEP) static bool xhci_hub_ports_suspended(struct xhci_hub *hub) { struct device *dev = hub->hcd->self.controller; @@ -2102,7 +2101,7 @@ static int tegra_xusb_exit_elpg(struct tegra_xusb *tegra, bool runtime) return err; } -static int tegra_xusb_suspend(struct device *dev) +static __maybe_unused int tegra_xusb_suspend(struct device *dev) { struct tegra_xusb *tegra = dev_get_drvdata(dev); int err; @@ -2144,7 +2143,7 @@ static int tegra_xusb_suspend(struct device *dev) return err; } -static int tegra_xusb_resume(struct device *dev) +static __maybe_unused int tegra_xusb_resume(struct device *dev) { struct tegra_xusb *tegra = dev_get_drvdata(dev); int err; @@ -2174,10 +2173,8 @@ static int tegra_xusb_resume(struct device *dev) return 0; } -#endif -#ifdef CONFIG_PM -static int tegra_xusb_runtime_suspend(struct device *dev) +static __maybe_unused int tegra_xusb_runtime_suspend(struct device *dev) { struct tegra_xusb *tegra = dev_get_drvdata(dev); int ret; @@ -2190,7 +2187,7 @@ static int tegra_xusb_runtime_suspend(struct device *dev) return ret; } -static int tegra_xusb_runtime_resume(struct device *dev) +static __maybe_unused int tegra_xusb_runtime_resume(struct device *dev) { struct tegra_xusb *tegra = dev_get_drvdata(dev); int err; @@ -2201,7 +2198,6 @@ static int tegra_xusb_runtime_resume(struct device *dev) return err; } -#endif static const struct dev_pm_ops tegra_xusb_pm_ops = { SET_RUNTIME_PM_OPS(tegra_xusb_runtime_suspend, From 268bbde716e3a79a747a0f4ebbeb9f63d861737d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 4 Oct 2021 17:18:39 +0300 Subject: [PATCH 057/235] usb: dwc3: gadget: Revert "set gadgets parent to the right controller" The commit c6e23b89a95d ("usb: dwc3: gadget: set gadgets parent to the right controller") changed the device for the UDC and broke the user space scripts that instantiate the USB gadget(s) via ConfigFS. Revert it for now until the better solution will be proposed. Fixes: c6e23b89a95d ("usb: dwc3: gadget: set gadgets parent to the right controller") Tested-by: Ferry Toth Cc: Michael Grzeschik Cc: Felipe Balbi Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211004141839.49079-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 804b50548163..4519d06c9ca2 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4243,7 +4243,7 @@ int dwc3_gadget_init(struct dwc3 *dwc) } - usb_initialize_gadget(dwc->sysdev, dwc->gadget, dwc_gadget_release); + usb_initialize_gadget(dwc->dev, dwc->gadget, dwc_gadget_release); dev = &dwc->gadget->dev; dev->platform_data = dwc; dwc->gadget->ops = &dwc3_gadget_ops; From 4d1aa9112c8e6995ef2c8a76972c9671332ccfea Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 21 Sep 2021 16:34:42 +0200 Subject: [PATCH 058/235] Partially revert "usb: Kconfig: using select for USB_COMMON dependency" This reverts commit cb9c1cfc86926d0e86d19c8e34f6c23458cd3478 for USB_LED_TRIG. This config symbol has bool type and enables extra code in usb_common itself, not a separate driver. Enabling it should not force usb_common to be built-in! Fixes: cb9c1cfc8692 ("usb: Kconfig: using select for USB_COMMON dependency") Cc: stable Signed-off-by: Ben Hutchings Signed-off-by: Salvatore Bonaccorso Link: https://lore.kernel.org/r/20210921143442.340087-1-carnil@debian.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/common/Kconfig b/drivers/usb/common/Kconfig index 5e8a04e3dd3c..b856622431a7 100644 --- a/drivers/usb/common/Kconfig +++ b/drivers/usb/common/Kconfig @@ -6,8 +6,7 @@ config USB_COMMON config USB_LED_TRIG bool "USB LED Triggers" - depends on LEDS_CLASS && LEDS_TRIGGERS - select USB_COMMON + depends on LEDS_CLASS && USB_COMMON && LEDS_TRIGGERS help This option adds LED triggers for USB host and/or gadget activity. From b87d8d0d4c43c29ccdc57d15b2ebc1df886a34b4 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 30 Sep 2021 15:47:58 +0300 Subject: [PATCH 059/235] usb: typec: tipd: Remove dependency on "connector" child fwnode There is no "connector" child node available on every platform, so the driver can't fail to probe when it's missing. Fixes: 57560ee95cb7 ("usb: typec: tipd: Don't block probing of consumer of "connector" nodes") Cc: stable@vger.kernel.org # 5.14+ Reported-by: "Regupathy, Rajaram" Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20210930124758.23233-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 21b3ae25c76d..ea4cc0a6e40c 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -625,10 +625,6 @@ static int tps6598x_probe(struct i2c_client *client) if (ret < 0) return ret; - fwnode = device_get_named_child_node(&client->dev, "connector"); - if (!fwnode) - return -ENODEV; - /* * This fwnode has a "compatible" property, but is never populated as a * struct device. Instead we simply parse it to read the properties. @@ -636,7 +632,9 @@ static int tps6598x_probe(struct i2c_client *client) * with existing DT files, we work around this by deleting any * fwnode_links to/from this fwnode. */ - fw_devlink_purge_absent_suppliers(fwnode); + fwnode = device_get_named_child_node(&client->dev, "connector"); + if (fwnode) + fw_devlink_purge_absent_suppliers(fwnode); tps->role_sw = fwnode_usb_role_switch_get(fwnode); if (IS_ERR(tps->role_sw)) { From 05300871c0e21c288bd5c30ac6f9b1da6ddeed22 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Sun, 26 Sep 2021 18:14:15 +0800 Subject: [PATCH 060/235] usb: typec: tcpci: don't handle vSafe0V event if it's not enabled USB TCPCI Spec, 4.4.3 Mask Registers: "A masked register will still indicate in the ALERT register, but shall not set the Alert# pin low." Thus, the Extended Status will still indicate in ALERT register if vSafe0V is detected by TCPC even though being masked. In current code, howerer, this event will not be handled in detection time. Rather it will be handled when next ALERT event coming(CC evnet, PD event, etc). Tcpm might transition to a wrong state in this situation. Thus, the vSafe0V event should not be handled when it's masked. Fixes: 766c485b86ef ("usb: typec: tcpci: Add support to report vSafe0V") cc: Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20210926101415.3775058-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index 9858716698df..c15eec9cc460 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -696,7 +696,7 @@ irqreturn_t tcpci_irq(struct tcpci *tcpci) tcpm_pd_receive(tcpci->port, &msg); } - if (status & TCPC_ALERT_EXTENDED_STATUS) { + if (tcpci->data->vbus_vsafe0v && (status & TCPC_ALERT_EXTENDED_STATUS)) { ret = regmap_read(tcpci->regmap, TCPC_EXTENDED_STATUS, &raw); if (!ret && (raw & TCPC_EXTENDED_STATUS_VSAFE0V)) tcpm_vbus_change(tcpci->port); From 6d91017a295e9790eec02c4e43f020cdb55f5d98 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Tue, 28 Sep 2021 19:16:39 +0800 Subject: [PATCH 061/235] usb: typec: tcpm: handle SRC_STARTUP state if cc changes TCPM for DRP should do the same action as SRC_ATTACHED when cc changes in SRC_STARTUP state. Otherwise, TCPM will transition to SRC_UNATTACHED state which is not satisfied with the Type-C spec. Per Type-C spec: DRP port should move to Unattached.SNK instead of Unattached.SRC if sink removed. Fixes: 4b4e02c83167 ("typec: tcpm: Move out of staging") cc: Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20210928111639.3854174-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index a4d37205df54..7f2f3ff1b391 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4876,6 +4876,7 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, tcpm_set_state(port, SRC_ATTACH_WAIT, 0); break; case SRC_ATTACHED: + case SRC_STARTUP: case SRC_SEND_CAPABILITIES: case SRC_READY: if (tcpm_port_is_disconnected(port) || From 8253a34bfae3278baca52fc1209b7c29270486ca Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 21 Sep 2021 08:37:54 -0300 Subject: [PATCH 062/235] usb: chipidea: ci_hdrc_imx: Also search for 'phys' phandle When passing 'phys' in the devicetree to describe the USB PHY phandle (which is the recommended way according to Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt) the following NULL pointer dereference is observed on i.MX7 and i.MX8MM: [ 1.489344] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000098 [ 1.498170] Mem abort info: [ 1.500966] ESR = 0x96000044 [ 1.504030] EC = 0x25: DABT (current EL), IL = 32 bits [ 1.509356] SET = 0, FnV = 0 [ 1.512416] EA = 0, S1PTW = 0 [ 1.515569] FSC = 0x04: level 0 translation fault [ 1.520458] Data abort info: [ 1.523349] ISV = 0, ISS = 0x00000044 [ 1.527196] CM = 0, WnR = 1 [ 1.530176] [0000000000000098] user address but active_mm is swapper [ 1.536544] Internal error: Oops: 96000044 [#1] PREEMPT SMP [ 1.542125] Modules linked in: [ 1.545190] CPU: 3 PID: 7 Comm: kworker/u8:0 Not tainted 5.14.0-dirty #3 [ 1.551901] Hardware name: Kontron i.MX8MM N801X S (DT) [ 1.557133] Workqueue: events_unbound deferred_probe_work_func [ 1.562984] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--) [ 1.568998] pc : imx7d_charger_detection+0x3f0/0x510 [ 1.573973] lr : imx7d_charger_detection+0x22c/0x510 This happens because the charger functions check for the phy presence inside the imx_usbmisc_data structure (data->usb_phy), but the chipidea core populates the usb_phy passed via 'phys' inside 'struct ci_hdrc' (ci->usb_phy) instead. This causes the NULL pointer dereference inside imx7d_charger_detection(). Fix it by also searching for 'phys' in case 'fsl,usbphy' is not found. Tested on a imx7s-warp board. Fixes: 746f316b753a ("usb: chipidea: introduce imx7d USB charger detection") Cc: stable@vger.kernel.org Reported-by: Heiko Thiery Tested-by: Frieder Schrempf Reviewed-by: Frieder Schrempf Acked-by: Peter Chen Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20210921113754.767631-1-festevam@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 8b7bc10b6e8b..f1d100671ee6 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -420,11 +420,16 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) data->phy = devm_usb_get_phy_by_phandle(dev, "fsl,usbphy", 0); if (IS_ERR(data->phy)) { ret = PTR_ERR(data->phy); - /* Return -EINVAL if no usbphy is available */ - if (ret == -ENODEV) - data->phy = NULL; - else - goto err_clk; + if (ret == -ENODEV) { + data->phy = devm_usb_get_phy_by_phandle(dev, "phys", 0); + if (IS_ERR(data->phy)) { + ret = PTR_ERR(data->phy); + if (ret == -ENODEV) + data->phy = NULL; + else + goto err_clk; + } + } } pdata.usb_phy = data->phy; From 04d2b75537085cb0c85d73a2e0e50317bffa883f Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Wed, 29 Sep 2021 21:45:46 +0200 Subject: [PATCH 063/235] usb: cdc-wdm: Fix check for WWAN CONFIG_WWAN_CORE was with CONFIG_WWAN in commit 89212e160b81 ("net: wwan: Fix WWAN config symbols"), but did not update all users of it. Change it back to use CONFIG_WWAN instead. Fixes: 89212e160b81 ("net: wwan: Fix WWAN config symbols") Cc: Signed-off-by: Rikard Falkeborn Acked-by: Oliver Neukum Link: https://lore.kernel.org/r/20210929194547.46954-2-rikard.falkeborn@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 35d5908b5478..fdf79bcf7eb0 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -824,7 +824,7 @@ static struct usb_class_driver wdm_class = { }; /* --- WWAN framework integration --- */ -#ifdef CONFIG_WWAN_CORE +#ifdef CONFIG_WWAN static int wdm_wwan_port_start(struct wwan_port *port) { struct wdm_device *desc = wwan_port_get_drvdata(port); @@ -963,11 +963,11 @@ static void wdm_wwan_rx(struct wdm_device *desc, int length) /* inbuf has been copied, it is safe to check for outstanding data */ schedule_work(&desc->service_outs_intr); } -#else /* CONFIG_WWAN_CORE */ +#else /* CONFIG_WWAN */ static void wdm_wwan_init(struct wdm_device *desc) {} static void wdm_wwan_deinit(struct wdm_device *desc) {} static void wdm_wwan_rx(struct wdm_device *desc, int length) {} -#endif /* CONFIG_WWAN_CORE */ +#endif /* CONFIG_WWAN */ /* --- error handling --- */ static void wdm_rxwork(struct work_struct *work) From 0560c9c552c1815e7b480bc11fd785fefc82bb27 Mon Sep 17 00:00:00 2001 From: Pavel Hofman Date: Fri, 24 Sep 2021 10:00:27 +0200 Subject: [PATCH 064/235] usb: gadget: f_uac2: fixed EP-IN wMaxPacketSize Async feedback patches broke enumeration on Windows 10 previously fixed by commit 789ea77310f0 ("usb: gadget: f_uac2: always increase endpoint max_packet_size by one audio slot"). While the existing calculation for EP OUT capture for async mode yields size+1 frame due to uac2_opts->fb_max > 0, playback side lost the +1 feature. Therefore the +1 frame addition must be re-introduced for playback. Win10 enumerates the device only when both EP IN and EP OUT max packet sizes are (at least) +1 frame. Fixes: e89bb4288378 ("usb: gadget: u_audio: add real feedback implementation") Cc: stable Tested-by: Henrik Enquist Tested-by: Jack Pham Signed-off-by: Pavel Hofman Link: https://lore.kernel.org/r/20210924080027.5362-1-pavel.hofman@ivitera.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index be864560bfea..ef55b8bb5870 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -674,11 +674,17 @@ static int set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, ssize = uac2_opts->c_ssize; } - if (!is_playback && (uac2_opts->c_sync == USB_ENDPOINT_SYNC_ASYNC)) + if (!is_playback && (uac2_opts->c_sync == USB_ENDPOINT_SYNC_ASYNC)) { + // Win10 requires max packet size + 1 frame srate = srate * (1000 + uac2_opts->fb_max) / 1000; - - max_size_bw = num_channels(chmask) * ssize * - DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1))); + // updated srate is always bigger, therefore DIV_ROUND_UP always yields +1 + max_size_bw = num_channels(chmask) * ssize * + (DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1)))); + } else { + // adding 1 frame provision for Win10 + max_size_bw = num_channels(chmask) * ssize * + (DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1))) + 1); + } ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_size_bw, max_size_ep)); From 65a205e6113506e69a503b61d97efec43fc10fd7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 29 Sep 2021 11:09:36 +0200 Subject: [PATCH 065/235] USB: cdc-acm: fix racy tty buffer accesses A recent change that started reporting break events to the line discipline caused the tty-buffer insertions to no longer be serialised by inserting events also from the completion handler for the interrupt endpoint. Completion calls for distinct endpoints are not guaranteed to be serialised. For example, in case a host-controller driver uses bottom-half completion, the interrupt and bulk-in completion handlers can end up running in parallel on two CPUs (high-and low-prio tasklets, respectively) thereby breaking the tty layer's single producer assumption. Fix this by holding the read lock also when inserting characters from the bulk endpoint. Fixes: 08dff274edda ("cdc-acm: fix BREAK rx code path adding necessary calls") Cc: stable@vger.kernel.org Acked-by: Oliver Neukum Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210929090937.7410-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 4e2f1552f4b7..c7a1736720e7 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -475,11 +475,16 @@ static int acm_submit_read_urbs(struct acm *acm, gfp_t mem_flags) static void acm_process_read_urb(struct acm *acm, struct urb *urb) { + unsigned long flags; + if (!urb->actual_length) return; + spin_lock_irqsave(&acm->read_lock, flags); tty_insert_flip_string(&acm->port, urb->transfer_buffer, urb->actual_length); + spin_unlock_irqrestore(&acm->read_lock, flags); + tty_flip_buffer_push(&acm->port); } From 58fc1daa4d2e9789b9ffc880907c961ea7c062cc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 29 Sep 2021 11:09:37 +0200 Subject: [PATCH 066/235] USB: cdc-acm: fix break reporting A recent change that started reporting break events forgot to push the event to the line discipline, which meant that a detected break would not be reported until further characters had been receive (the port could even have been closed and reopened in between). Fixes: 08dff274edda ("cdc-acm: fix BREAK rx code path adding necessary calls") Cc: stable@vger.kernel.org Acked-by: Oliver Neukum Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210929090937.7410-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index c7a1736720e7..7b2e2420ecae 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -340,6 +340,9 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf) acm->iocount.overrun++; spin_unlock_irqrestore(&acm->read_lock, flags); + if (newctrl & ACM_CTRL_BRK) + tty_flip_buffer_push(&acm->port); + if (difference) wake_up_all(&acm->wioctl); From 45d65c0f09aaa6cdd21fe0743f317d4bbdfd1466 Mon Sep 17 00:00:00 2001 From: "Liu, Zhan" Date: Thu, 9 Sep 2021 13:26:37 -0400 Subject: [PATCH 067/235] drm/amd/display: Fix B0 USB-C DP Alt mode [Why] Starting from B0, along with RDPCSTX, RDPCSPIPE registers are also used. [How] Make sure RDPCSPIPE registers are programmed correctly. Reviewed-by: Charlene Liu Acked-by: Rodrigo Siqueira Signed-off-by: Zhan Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org (cherry picked from commit bdd1a21b52557ea8f61d0a5dc2f77151b576eb70) --- .../amd/display/dc/dcn10/dcn10_link_encoder.h | 1 + .../display/dc/dcn31/dcn31_dio_link_encoder.c | 33 ++++++++++++++++++- .../display/dc/dcn31/dcn31_dio_link_encoder.h | 11 ++++++- .../include/asic_reg/dpcs/dpcs_4_2_0_offset.h | 27 +++++++++++++++ 4 files changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h index d8b22618b79e..c337588231ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h @@ -118,6 +118,7 @@ struct dcn10_link_enc_registers { uint32_t RDPCSTX_PHY_CNTL4; uint32_t RDPCSTX_PHY_CNTL5; uint32_t RDPCSTX_PHY_CNTL6; + uint32_t RDPCSPIPE_PHY_CNTL6; uint32_t RDPCSTX_PHY_CNTL7; uint32_t RDPCSTX_PHY_CNTL8; uint32_t RDPCSTX_PHY_CNTL9; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c index 90127c1f9e35..d1870ac33143 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c @@ -37,6 +37,7 @@ #include "link_enc_cfg.h" #include "dc_dmub_srv.h" +#include "dal_asic_id.h" #define CTX \ enc10->base.ctx @@ -215,7 +216,7 @@ static const struct link_encoder_funcs dcn31_link_enc_funcs = { .fec_is_active = enc2_fec_is_active, .get_dig_frontend = dcn10_get_dig_frontend, .get_dig_mode = dcn10_get_dig_mode, - .is_in_alt_mode = dcn20_link_encoder_is_in_alt_mode, + .is_in_alt_mode = dcn31_link_encoder_is_in_alt_mode, .get_max_link_cap = dcn20_link_encoder_get_max_link_cap, .set_dio_phy_mux = dcn31_link_encoder_set_dio_phy_mux, }; @@ -404,3 +405,33 @@ void dcn31_link_encoder_disable_output( } } +bool dcn31_link_encoder_is_in_alt_mode(struct link_encoder *enc) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + uint32_t dp_alt_mode_disable; + bool is_usb_c_alt_mode = false; + + if (enc->features.flags.bits.DP_IS_USB_C) { + if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) { + // [Note] no need to check hw_internal_rev once phy mux selection is ready + REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable); + } else { + /* + * B0 phys use a new set of registers to check whether alt mode is disabled. + * if value == 1 alt mode is disabled, otherwise it is enabled. + */ + if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) + || (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) + || (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) { + REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable); + } else { + // [Note] need to change TRANSMITTER_UNIPHY_C/D to F/G once phy mux selection is ready + REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable); + } + } + + is_usb_c_alt_mode = (dp_alt_mode_disable == 0); + } + + return is_usb_c_alt_mode; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h index 32d146312838..bec50e4402ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h @@ -69,6 +69,7 @@ SRI(RDPCSTX_PHY_CNTL4, RDPCSTX, id), \ SRI(RDPCSTX_PHY_CNTL5, RDPCSTX, id), \ SRI(RDPCSTX_PHY_CNTL6, RDPCSTX, id), \ + SRI(RDPCSPIPE_PHY_CNTL6, RDPCSPIPE, id), \ SRI(RDPCSTX_PHY_CNTL7, RDPCSTX, id), \ SRI(RDPCSTX_PHY_CNTL8, RDPCSTX, id), \ SRI(RDPCSTX_PHY_CNTL9, RDPCSTX, id), \ @@ -115,7 +116,9 @@ LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_TX2_MPLL_EN, mask_sh),\ LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_TX3_MPLL_EN, mask_sh),\ LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, mask_sh),\ - LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, mask_sh),\ + LE_SF(RDPCSPIPE0_RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, mask_sh),\ + LE_SF(RDPCSPIPE0_RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, mask_sh),\ + LE_SF(RDPCSPIPE0_RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE_ACK, mask_sh),\ LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL7, RDPCS_PHY_DP_MPLLB_FRACN_QUOT, mask_sh),\ LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL7, RDPCS_PHY_DP_MPLLB_FRACN_DEN, mask_sh),\ LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL8, RDPCS_PHY_DP_MPLLB_SSC_PEAK, mask_sh),\ @@ -243,4 +246,10 @@ void dcn31_link_encoder_disable_output( struct link_encoder *enc, enum signal_type signal); +/* + * Check whether USB-C DP Alt mode is disabled + */ +bool dcn31_link_encoder_is_in_alt_mode( + struct link_encoder *enc); + #endif /* __DC_LINK_ENCODER__DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_0_offset.h index 92caf8441d1e..01a56556cde1 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_0_offset.h @@ -11932,5 +11932,32 @@ #define ixDPCSSYS_CR4_RAWLANEX_DIG_PCS_XF_RX_OVRD_OUT_2 0xe0c7 #define ixDPCSSYS_CR4_RAWLANEX_DIG_PCS_XF_TX_OVRD_IN_2 0xe0c8 +//RDPCSPIPE0_RDPCSPIPE_PHY_CNTL6 +#define RDPCSPIPE0_RDPCSPIPE_PHY_CNTL6__RDPCS_PHY_DPALT_DP4__SHIFT 0x10 +#define RDPCSPIPE0_RDPCSPIPE_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE__SHIFT 0x11 +#define RDPCSPIPE0_RDPCSPIPE_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK__SHIFT 0x12 +#define RDPCSPIPE0_RDPCSPIPE_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_MASK 0x00010000L +#define RDPCSPIPE0_RDPCSPIPE_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_MASK 0x00020000L +#define RDPCSPIPE0_RDPCSPIPE_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_MASK 0x00040000L + +//RDPCSPIPE1_RDPCSPIPE_PHY_CNTL6 +#define RDPCSPIPE1_RDPCSPIPE_PHY_CNTL6__RDPCS_PHY_DPALT_DP4__SHIFT 0x10 +#define RDPCSPIPE1_RDPCSPIPE_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE__SHIFT 0x11 +#define RDPCSPIPE1_RDPCSPIPE_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK__SHIFT 0x12 +#define RDPCSPIPE1_RDPCSPIPE_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_MASK 0x00010000L +#define RDPCSPIPE1_RDPCSPIPE_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_MASK 0x00020000L +#define RDPCSPIPE1_RDPCSPIPE_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_MASK 0x00040000L + +//[Note] Hack. RDPCSPIPE only has 2 instances. +#define regRDPCSPIPE0_RDPCSPIPE_PHY_CNTL6 0x2d73 +#define regRDPCSPIPE0_RDPCSPIPE_PHY_CNTL6_BASE_IDX 2 +#define regRDPCSPIPE1_RDPCSPIPE_PHY_CNTL6 0x2e4b +#define regRDPCSPIPE1_RDPCSPIPE_PHY_CNTL6_BASE_IDX 2 +#define regRDPCSPIPE2_RDPCSPIPE_PHY_CNTL6 0x2d73 +#define regRDPCSPIPE2_RDPCSPIPE_PHY_CNTL6_BASE_IDX 2 +#define regRDPCSPIPE3_RDPCSPIPE_PHY_CNTL6 0x2e4b +#define regRDPCSPIPE3_RDPCSPIPE_PHY_CNTL6_BASE_IDX 2 +#define regRDPCSPIPE4_RDPCSPIPE_PHY_CNTL6 0x2d73 +#define regRDPCSPIPE4_RDPCSPIPE_PHY_CNTL6_BASE_IDX 2 #endif From 2fe9a0e1173f4805669e7af34ea25af835274426 Mon Sep 17 00:00:00 2001 From: "Liu, Zhan" Date: Thu, 2 Sep 2021 15:08:29 -0400 Subject: [PATCH 068/235] drm/amd/display: Fix DCN3 B0 DP Alt Mapping [Why] DCN3 B0 has a mux, which redirects PHYC and PHYD to PHYF and PHYG. [How] Fix DIG mapping. Reviewed-by: Charlene Liu Acked-by: Rodrigo Siqueira Signed-off-by: Zhan Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org (cherry picked from commit 4b7786d87fb3adf3e534c4f1e4f824d8700b786b) --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index a7702d3c75cd..cb50e6eda47e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1284,6 +1284,12 @@ static struct stream_encoder *dcn31_stream_encoder_create( if (!enc1 || !vpg || !afmt) return NULL; + if (ctx->asic_id.chip_family == FAMILY_YELLOW_CARP && + ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { + if ((eng_id == ENGINE_ID_DIGC) || (eng_id == ENGINE_ID_DIGD)) + eng_id = eng_id + 3; // For B0 only. C->F, D->G. + } + dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, vpg, afmt, &stream_enc_regs[eng_id], From 7ab0965079bbc0e39fe0e1df4dcdf931c9d25372 Mon Sep 17 00:00:00 2001 From: Jude Shih Date: Mon, 4 Oct 2021 22:40:50 +0800 Subject: [PATCH 069/235] drm/amd/display: USB4 bring up set correct address [Why] YELLOW_CARP_B0 address was not correct [How] Set YELLOW_CARP_B0 to 0x1A. Reviewed-by: Nicholas Kazlauskas Acked-by: Wayne Lin Signed-off-by: Jude Shih Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/include/dal_asic_id.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 381c17caace1..5adc471bef57 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -227,7 +227,7 @@ enum { #define FAMILY_YELLOW_CARP 146 #define YELLOW_CARP_A0 0x01 -#define YELLOW_CARP_B0 0x02 // TODO: DCN31 - update with correct B0 ID +#define YELLOW_CARP_B0 0x1A #define YELLOW_CARP_UNKNOWN 0xFF #ifndef ASICREV_IS_YELLOW_CARP From b072ef1215aca33186e3a10109e872e528a9e516 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Wed, 29 Sep 2021 14:54:39 +0800 Subject: [PATCH 070/235] drm/amdkfd: fix a potential ttm->sg memory leak Memory is allocated for ttm->sg by kmalloc in kfd_mem_dmamap_userptr, but isn't freed by kfree in kfd_mem_dmaunmap_userptr. Free it! Fixes: 264fb4d332f5 ("drm/amdgpu: Add multi-GPU DMA mapping helpers") Signed-off-by: Lang Yu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2d6b2d77b738..054c1a224def 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -563,6 +563,7 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem, dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); sg_free_table(ttm->sg); + kfree(ttm->sg); ttm->sg = NULL; } From d08ce8c6d29f8cc7493b781be282604b10fc6e1e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 29 Sep 2021 13:42:08 -0400 Subject: [PATCH 071/235] Documentation/gpu: remove spurious "+" in amdgpu.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not sure why that was there. Remove it. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst index 364680cdad2e..8ba72e898099 100644 --- a/Documentation/gpu/amdgpu.rst +++ b/Documentation/gpu/amdgpu.rst @@ -300,8 +300,8 @@ pcie_replay_count .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c :doc: pcie_replay_count -+GPU SmartShift Information -============================ +GPU SmartShift Information +========================== GPU SmartShift information via sysfs From 1d617c029fd9c960f8ba7a8d1a10699d820bd6b9 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 1 Oct 2021 16:49:07 +0800 Subject: [PATCH 072/235] drm/amdgpu: During s0ix don't wait to signal GFXOFF In the rare event when GFX IP suspend coincides with a s0ix entry, don't schedule a delayed work, instead signal PMFW immediately to allow GFXOFF entry. GFXOFF is a prerequisite for s0ix entry. PMFW needs to be signaled about GFXOFF status before amd-pmc module passes OS HINT to PMFW telling that everything is ready for a safe s0ix entry. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1712 Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e7f06bd0f0cd..1916ec84dd71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -31,6 +31,8 @@ /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) +#define GFX_OFF_NO_DELAY 0 + /* * GPU GFX IP block helpers function. */ @@ -558,6 +560,8 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) { + unsigned long delay = GFX_OFF_DELAY_ENABLE; + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return; @@ -573,8 +577,14 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) adev->gfx.gfx_off_req_count--; - if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE); + if (adev->gfx.gfx_off_req_count == 0 && + !adev->gfx.gfx_off_state) { + /* If going to s2idle, no need to wait */ + if (adev->in_s0ix) + delay = GFX_OFF_NO_DELAY; + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + delay); + } } else { if (adev->gfx.gfx_off_req_count == 0) { cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); From 4702b34d1de9582df9dfa0e583ea28fff7de29df Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 1 Oct 2021 15:40:00 -0400 Subject: [PATCH 073/235] drm/amdgpu/display: fix dependencies for DRM_AMD_DC_SI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Depends on DRM_AMDGPU_SI and DRM_AMD_DC Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 7dffc04a557e..127667e549c1 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -25,6 +25,8 @@ config DRM_AMD_DC_HDCP config DRM_AMD_DC_SI bool "AMD DC support for Southern Islands ASICs" + depends on DRM_AMDGPU_SI + depends on DRM_AMD_DC default n help Choose this option to enable new AMD DC support for SI asics From 0dd10a961f2aa39d02e9aa2194946713cb36d403 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Fri, 24 Sep 2021 11:15:35 +0800 Subject: [PATCH 074/235] drm/amdkfd: remove redundant iommu cleanup code kfd_resume doesn't involve iommu operation, remove redundant iommu cleanup code. Signed-off-by: Yifan Zhang Reviewed-by: James Zhu Tested-by: James Zhu Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index c2a4d920da40..4a416231b24c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1085,18 +1085,12 @@ static int kfd_resume(struct kfd_dev *kfd) int err = 0; err = kfd->dqm->ops.start(kfd->dqm); - if (err) { + if (err) dev_err(kfd_device, "Error starting queue manager for device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); - goto dqm_start_error; - } return err; - -dqm_start_error: - kfd_iommu_suspend(kfd); - return err; } static inline void kfd_queue_work(struct workqueue_struct *wq, From 714d9e4574d54596973ee3b0624ee4a16264d700 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 28 Sep 2021 15:42:35 +0800 Subject: [PATCH 075/235] drm/amdgpu: init iommu after amdkfd device init This patch is to fix clinfo failure in Raven/Picasso: Number of platforms: 1 Platform Profile: FULL_PROFILE Platform Version: OpenCL 2.2 AMD-APP (3364.0) Platform Name: AMD Accelerated Parallel Processing Platform Vendor: Advanced Micro Devices, Inc. Platform Extensions: cl_khr_icd cl_amd_event_callback Platform Name: AMD Accelerated Parallel Processing Number of devices: 0 Signed-off-by: Yifan Zhang Reviewed-by: James Zhu Tested-by: James Zhu Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ab3794c42d36..53f3e07f6bd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2394,10 +2394,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; - r = amdgpu_amdkfd_resume_iommu(adev); - if (r) - goto init_failed; - r = amdgpu_device_ip_hw_init_phase1(adev); if (r) goto init_failed; @@ -2436,6 +2432,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (!adev->gmc.xgmi.pending_reset) amdgpu_amdkfd_device_init(adev); + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + goto init_failed; + amdgpu_fru_get_product_info(adev); init_failed: From 248b061689a40f4fed05252ee2c89f87cf26d7d8 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Fri, 1 Oct 2021 09:48:50 +0800 Subject: [PATCH 076/235] drm/amdgpu: handle the case of pci_channel_io_frozen only in amdgpu_pci_resume In current code, when a PCI error state pci_channel_io_normal is detectd, it will report PCI_ERS_RESULT_CAN_RECOVER status to PCI driver, and PCI driver will continue the execution of PCI resume callback report_resume by pci_walk_bridge, and the callback will go into amdgpu_pci_resume finally, where write lock is releasd unconditionally without acquiring such lock first. In this case, a deadlock will happen when other threads start to acquire the read lock. To fix this, add a member in amdgpu_device strucutre to cache pci_channel_state, and only continue the execution in amdgpu_pci_resume when it's pci_channel_io_frozen. Fixes: c9a6b82f45e2 ("drm/amdgpu: Implement DPC recovery") Suggested-by: Andrey Grodzovsky Signed-off-by: Guchun Chen Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d356e329e6f8..269437b01328 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1087,6 +1087,7 @@ struct amdgpu_device { bool no_hw_access; struct pci_saved_state *pci_state; + pci_channel_state_t pci_channel_state; struct amdgpu_reset_control *reset_cntl; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 53f3e07f6bd4..af9bdf16eefd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5399,6 +5399,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta return PCI_ERS_RESULT_DISCONNECT; } + adev->pci_channel_state = state; + switch (state) { case pci_channel_io_normal: return PCI_ERS_RESULT_CAN_RECOVER; @@ -5541,6 +5543,10 @@ void amdgpu_pci_resume(struct pci_dev *pdev) DRM_INFO("PCI error: resume callback!!\n"); + /* Only continue execution for the case of pci_channel_io_frozen */ + if (adev->pci_channel_state != pci_channel_io_frozen) + return; + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; From 4d8b35968bbf9e42b6b202eedb510e2c82ad8b38 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 4 Oct 2021 10:07:50 -0700 Subject: [PATCH 077/235] objtool: Remove reloc symbol type checks in get_alt_entry() Converting a special section's relocation reference to a symbol is straightforward. No need for objtool to complain that it doesn't know how to handle it. Just handle it. This fixes the following warning: arch/x86/kvm/emulate.o: warning: objtool: __ex_table+0x4: don't know how to handle reloc symbol type: kvm_fastop_exception Fixes: 24ff65257375 ("objtool: Teach get_alt_entry() about more relocation types") Reported-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/feadbc3dfb3440d973580fad8d3db873cbfe1694.1633367242.git.jpoimboe@redhat.com Cc: Peter Zijlstra Cc: x86@kernel.org Cc: Miroslav Benes Cc: linux-kernel@vger.kernel.org --- tools/objtool/special.c | 36 +++++++----------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/tools/objtool/special.c b/tools/objtool/special.c index f1428e32a505..83d5f969bcb0 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -58,22 +58,11 @@ void __weak arch_handle_alternative(unsigned short feature, struct special_alt * { } -static bool reloc2sec_off(struct reloc *reloc, struct section **sec, unsigned long *off) +static void reloc_to_sec_off(struct reloc *reloc, struct section **sec, + unsigned long *off) { - switch (reloc->sym->type) { - case STT_FUNC: - *sec = reloc->sym->sec; - *off = reloc->sym->offset + reloc->addend; - return true; - - case STT_SECTION: - *sec = reloc->sym->sec; - *off = reloc->addend; - return true; - - default: - return false; - } + *sec = reloc->sym->sec; + *off = reloc->sym->offset + reloc->addend; } static int get_alt_entry(struct elf *elf, struct special_entry *entry, @@ -109,13 +98,8 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); return -1; } - if (!reloc2sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off)) { - WARN_FUNC("don't know how to handle reloc symbol type %d: %s", - sec, offset + entry->orig, - orig_reloc->sym->type, - orig_reloc->sym->name); - return -1; - } + + reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off); if (!entry->group || alt->new_len) { new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new); @@ -133,13 +117,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, if (arch_is_retpoline(new_reloc->sym)) return 1; - if (!reloc2sec_off(new_reloc, &alt->new_sec, &alt->new_off)) { - WARN_FUNC("don't know how to handle reloc symbol type %d: %s", - sec, offset + entry->new, - new_reloc->sym->type, - new_reloc->sym->name); - return -1; - } + reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off); /* _ASM_EXTABLE_EX hack */ if (alt->new_off >= 0x7ffffff0) From dc02368164bd0ec603e3f5b3dd8252744a667b8a Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Sun, 22 Aug 2021 18:50:36 -0400 Subject: [PATCH 078/235] objtool: Make .altinstructions section entry size consistent Commit e31694e0a7a7 ("objtool: Don't make .altinstructions writable") aligned objtool-created and kernel-created .altinstructions section flags, but there remains a minor discrepency in their use of a section entry size: objtool sets one while the kernel build does not. While sh_entsize of sizeof(struct alt_instr) seems intuitive, this small deviation can cause failures with external tooling (kpatch-build). Fix this by creating new .altinstructions sections with sh_entsize of 0 and then later updating sec->sh_size as alternatives are added to the section. An added benefit is avoiding the data descriptor and buffer created by elf_create_section(), but previously unused by elf_add_alternative(). Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls") Signed-off-by: Joe Lawrence Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20210822225037.54620-2-joe.lawrence@redhat.com Cc: Andy Lavr Cc: Peter Zijlstra Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org --- tools/objtool/arch/x86/decode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index bc821056aba9..0893436cc09f 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -684,7 +684,7 @@ static int elf_add_alternative(struct elf *elf, sec = find_section_by_name(elf, ".altinstructions"); if (!sec) { sec = elf_create_section(elf, ".altinstructions", - SHF_ALLOC, size, 0); + SHF_ALLOC, 0, 0); if (!sec) { WARN_ELF("elf_create_section"); From fe255fe6ad97685e5a4be0d871f43288dbc10ad6 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Sun, 22 Aug 2021 18:50:37 -0400 Subject: [PATCH 079/235] objtool: Remove redundant 'len' field from struct section The section structure already contains sh_size, so just remove the extra 'len' member that requires extra mirroring and potential confusion. Suggested-by: Josh Poimboeuf Signed-off-by: Joe Lawrence Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20210822225037.54620-3-joe.lawrence@redhat.com Cc: Andy Lavr Cc: Peter Zijlstra Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org --- tools/objtool/check.c | 16 ++++++++-------- tools/objtool/elf.c | 14 ++++++-------- tools/objtool/include/objtool/elf.h | 1 - tools/objtool/orc_gen.c | 2 +- tools/objtool/special.c | 4 ++-- 5 files changed, 17 insertions(+), 20 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e5947fbb9e7a..06b5c164ae93 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -292,7 +292,7 @@ static int decode_instructions(struct objtool_file *file) !strcmp(sec->name, ".entry.text")) sec->noinstr = true; - for (offset = 0; offset < sec->len; offset += insn->len) { + for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { insn = malloc(sizeof(*insn)); if (!insn) { WARN("malloc failed"); @@ -307,7 +307,7 @@ static int decode_instructions(struct objtool_file *file) insn->offset = offset; ret = arch_decode_instruction(file->elf, sec, offset, - sec->len - offset, + sec->sh.sh_size - offset, &insn->len, &insn->type, &insn->immediate, &insn->stack_ops); @@ -349,9 +349,9 @@ static struct instruction *find_last_insn(struct objtool_file *file, { struct instruction *insn = NULL; unsigned int offset; - unsigned int end = (sec->len > 10) ? sec->len - 10 : 0; + unsigned int end = (sec->sh.sh_size > 10) ? sec->sh.sh_size - 10 : 0; - for (offset = sec->len - 1; offset >= end && !insn; offset--) + for (offset = sec->sh.sh_size - 1; offset >= end && !insn; offset--) insn = find_insn(file, sec, offset); return insn; @@ -389,7 +389,7 @@ static int add_dead_ends(struct objtool_file *file) insn = find_insn(file, reloc->sym->sec, reloc->addend); if (insn) insn = list_prev_entry(insn, list); - else if (reloc->addend == reloc->sym->sec->len) { + else if (reloc->addend == reloc->sym->sec->sh.sh_size) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { WARN("can't find unreachable insn at %s+0x%x", @@ -424,7 +424,7 @@ static int add_dead_ends(struct objtool_file *file) insn = find_insn(file, reloc->sym->sec, reloc->addend); if (insn) insn = list_prev_entry(insn, list); - else if (reloc->addend == reloc->sym->sec->len) { + else if (reloc->addend == reloc->sym->sec->sh.sh_size) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { WARN("can't find reachable insn at %s+0x%x", @@ -1561,14 +1561,14 @@ static int read_unwind_hints(struct objtool_file *file) return -1; } - if (sec->len % sizeof(struct unwind_hint)) { + if (sec->sh.sh_size % sizeof(struct unwind_hint)) { WARN("struct unwind_hint size mismatch"); return -1; } file->hints = true; - for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) { + for (i = 0; i < sec->sh.sh_size / sizeof(struct unwind_hint); i++) { hint = (struct unwind_hint *)sec->data->d_buf + i; reloc = find_reloc_by_dest(file->elf, sec, i * sizeof(*hint)); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 8676c7598728..b18f0055b50b 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -286,10 +286,9 @@ static int read_sections(struct elf *elf) return -1; } } - sec->len = sec->sh.sh_size; if (sec->sh.sh_flags & SHF_EXECINSTR) - elf->text_size += sec->len; + elf->text_size += sec->sh.sh_size; list_add_tail(&sec->list, &elf->sections); elf_hash_add(section, &sec->hash, sec->idx); @@ -734,8 +733,8 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str) data->d_size = strlen(str) + 1; data->d_align = 1; - len = strtab->len; - strtab->len += data->d_size; + len = strtab->sh.sh_size; + strtab->sh.sh_size += data->d_size; strtab->changed = true; return len; @@ -790,9 +789,9 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) data->d_align = 1; data->d_type = ELF_T_SYM; - sym->idx = symtab->len / sizeof(sym->sym); + sym->idx = symtab->sh.sh_size / sizeof(sym->sym); - symtab->len += data->d_size; + symtab->sh.sh_size += data->d_size; symtab->changed = true; symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); @@ -814,7 +813,7 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) data->d_align = 4; data->d_type = ELF_T_WORD; - symtab_shndx->len += 4; + symtab_shndx->sh.sh_size += 4; symtab_shndx->changed = true; } @@ -855,7 +854,6 @@ struct section *elf_create_section(struct elf *elf, const char *name, } sec->idx = elf_ndxscn(s); - sec->len = size; sec->changed = true; sec->data = elf_newdata(s); diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index e34395047530..075d8291b854 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -38,7 +38,6 @@ struct section { Elf_Data *data; char *name; int idx; - unsigned int len; bool changed, text, rodata, noinstr; }; diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index dc9b7dd314b0..b5865e2450cb 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -204,7 +204,7 @@ int orc_create(struct objtool_file *file) /* Add a section terminator */ if (!empty) { - orc_list_add(&orc_list, &null, sec, sec->len); + orc_list_add(&orc_list, &null, sec, sec->sh.sh_size); nr++; } } diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 83d5f969bcb0..06c3eacab3d5 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -159,13 +159,13 @@ int special_get_alts(struct elf *elf, struct list_head *alts) if (!sec) continue; - if (sec->len % entry->size != 0) { + if (sec->sh.sh_size % entry->size != 0) { WARN("%s size not a multiple of %d", sec->name, entry->size); return -1; } - nr_entries = sec->len / entry->size; + nr_entries = sec->sh.sh_size / entry->size; for (idx = 0; idx < nr_entries; idx++) { alt = malloc(sizeof(*alt)); From f3d7c2cdf6dc0d5402ec29c3673893b3542c5ad1 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 5 Oct 2021 11:36:01 -0700 Subject: [PATCH 080/235] xtensa: xtfpga: use CONFIG_USE_OF instead of CONFIG_OF Use platform data to initialize xtfpga device drivers when CONFIG_USE_OF is not selected. This fixes xtfpga networking when CONFIG_USE_OF is not selected but CONFIG_OF is. Signed-off-by: Max Filippov --- arch/xtensa/platforms/xtfpga/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 4f7d6142d41f..59b7f11f2a3e 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -66,7 +66,7 @@ void __init platform_calibrate_ccount(void) #endif -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF static void __init xtfpga_clk_setup(struct device_node *np) { @@ -284,4 +284,4 @@ static int __init xtavnet_init(void) */ arch_initcall(xtavnet_init); -#endif /* CONFIG_OF */ +#endif /* CONFIG_USE_OF */ From 012e974501a270d8dfd4ee2039e1fdf7579c907e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Aug 2021 10:36:59 -0700 Subject: [PATCH 081/235] xtensa: xtfpga: Try software restart before simulating CPU reset Rebooting xtensa images loaded with the '-kernel' option in qemu does not work. When executing a reboot command, the qemu session either hangs or experiences an endless sequence of error messages. Kernel panic - not syncing: Unrecoverable error in exception handler Reset code jumps to the CPU restart address, but Linux can not recover from there because code and data in the kernel init sections have been discarded and overwritten at this point. XTFPGA platforms have a means to reset the CPU by writing 0xdead into a specific FPGA IO address. When used in QEMU the kernel image loaded with the '-kernel' option gets restored to its original state allowing the machine to boot successfully. Use that mechanism to attempt a platform reset. If it does not work, fall back to the existing mechanism. Signed-off-by: Guenter Roeck Signed-off-by: Max Filippov --- arch/xtensa/platforms/xtfpga/setup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 59b7f11f2a3e..538e6748e85a 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -51,8 +51,12 @@ void platform_power_off(void) void platform_restart(void) { - /* Flush and reset the mmu, simulate a processor reset, and - * jump to the reset vector. */ + /* Try software reset first. */ + WRITE_ONCE(*(u32 *)XTFPGA_SWRST_VADDR, 0xdead); + + /* If software reset did not work, flush and reset the mmu, + * simulate a processor reset, and jump to the reset vector. + */ cpu_reset(); /* control never gets here */ } From 363999901116ffa9a5462215fef25ea9c7f2823c Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 24 Sep 2021 09:17:30 +0900 Subject: [PATCH 082/235] ksmbd: add the check to vaildate if stream protocol length exceeds maximum value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch add MAX_STREAM_PROT_LEN macro and check if stream protocol length exceeds maximum value. opencode pdu size check in ksmbd_pdu_size_has_room(). Cc: Tom Talpey Cc: Ronnie Sahlberg Cc: Ralph Böhme Acked-by: Hyunchul Lee Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/connection.c | 10 ++++++---- fs/ksmbd/smb_common.c | 6 ------ fs/ksmbd/smb_common.h | 4 ++-- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index af086d35398a..48b18b4ec117 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -296,10 +296,12 @@ int ksmbd_conn_handler_loop(void *p) pdu_size = get_rfc1002_len(hdr_buf); ksmbd_debug(CONN, "RFC1002 header %u bytes\n", pdu_size); - /* make sure we have enough to get to SMB header end */ - if (!ksmbd_pdu_size_has_room(pdu_size)) { - ksmbd_debug(CONN, "SMB request too short (%u bytes)\n", - pdu_size); + /* + * Check if pdu size is valid (min : smb header size, + * max : 0x00FFFFFF). + */ + if (pdu_size < __SMB2_HEADER_STRUCTURE_SIZE || + pdu_size > MAX_STREAM_PROT_LEN) { continue; } diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index db8042a173d0..b6c4c7e960fa 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -21,7 +21,6 @@ static const char basechars[43] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_-!@#$%"; #define MAGIC_CHAR '~' #define PERIOD '.' #define mangle(V) ((char)(basechars[(V) % MANGLE_BASE])) -#define KSMBD_MIN_SUPPORTED_HEADER_SIZE (sizeof(struct smb2_hdr)) struct smb_protocol { int index; @@ -294,11 +293,6 @@ int ksmbd_init_smb_server(struct ksmbd_work *work) return 0; } -bool ksmbd_pdu_size_has_room(unsigned int pdu) -{ - return (pdu >= KSMBD_MIN_SUPPORTED_HEADER_SIZE - 4); -} - int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, struct ksmbd_file *dir, struct ksmbd_dir_info *d_info, diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h index 994abede27e9..6e79e7577f6b 100644 --- a/fs/ksmbd/smb_common.h +++ b/fs/ksmbd/smb_common.h @@ -48,6 +48,8 @@ #define CIFS_DEFAULT_IOSIZE (64 * 1024) #define MAX_CIFS_SMALL_BUFFER_SIZE 448 /* big enough for most */ +#define MAX_STREAM_PROT_LEN 0x00FFFFFF + /* Responses when opening a file. */ #define F_SUPERSEDED 0 #define F_OPENED 1 @@ -493,8 +495,6 @@ int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count); int ksmbd_init_smb_server(struct ksmbd_work *work); -bool ksmbd_pdu_size_has_room(unsigned int pdu); - struct ksmbd_kstat; int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, From 319933a80fd4f07122466a77f93e5019d71be74c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 5 Oct 2021 15:34:33 +0200 Subject: [PATCH 083/235] xen/balloon: fix cancelled balloon action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case a ballooning action is cancelled the new kernel thread handling the ballooning might end up in a busy loop. Fix that by handling the cancelled action gracefully. While at it introduce a short wait for the BP_WAIT case. Cc: stable@vger.kernel.org Fixes: 8480ed9c2bbd56 ("xen/balloon: use a kernel thread instead a workqueue") Reported-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Tested-by: Jason Andryuk Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20211005133433.32008-1-jgross@suse.com Signed-off-by: Juergen Gross --- drivers/xen/balloon.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 43ebfe36ac27..3a50f097ed3e 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -491,12 +491,12 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) } /* - * Stop waiting if either state is not BP_EAGAIN and ballooning action is - * needed, or if the credit has changed while state is BP_EAGAIN. + * Stop waiting if either state is BP_DONE and ballooning action is + * needed, or if the credit has changed while state is not BP_DONE. */ static bool balloon_thread_cond(enum bp_state state, long credit) { - if (state != BP_EAGAIN) + if (state == BP_DONE) credit = 0; return current_credit() != credit || kthread_should_stop(); @@ -516,10 +516,19 @@ static int balloon_thread(void *unused) set_freezable(); for (;;) { - if (state == BP_EAGAIN) - timeout = balloon_stats.schedule_delay * HZ; - else + switch (state) { + case BP_DONE: + case BP_ECANCELED: timeout = 3600 * HZ; + break; + case BP_EAGAIN: + timeout = balloon_stats.schedule_delay * HZ; + break; + case BP_WAIT: + timeout = HZ; + break; + } + credit = current_credit(); wait_event_freezable_timeout(balloon_thread_wq, From c026565fe9be813fe826f7e5533ed763283af5f0 Mon Sep 17 00:00:00 2001 From: Edmund Dea Date: Fri, 4 Dec 2020 14:34:29 -0800 Subject: [PATCH 084/235] drm/kmb: Enable alpha blended second plane Enable one additional plane that is alpha blended on top of the primary plane. This also fixes the below warnings when building with -Warray-bounds: drivers/gpu/drm/kmb/kmb_plane.c:135:20: warning: array subscript 3 is above array bounds of 'struct layer_status[1]' [-Warray-bounds] drivers/gpu/drm/kmb/kmb_plane.c:132:20: warning: array subscript 2 is above array bounds of 'struct layer_status[1]' [-Warray-bounds] drivers/gpu/drm/kmb/kmb_plane.c:129:20: warning: array subscript 1 is above array bounds of 'struct layer_status[1]' [-Warray-bounds] v2: corrected previous patch dependecies so it builds Signed-off-by: Edmund Dea Signed-off-by: Anitha Chrisanthus Acked-by: Sam Ravnborg Link: https://patchwork.kernel.org/project/dri-devel/patch/20210728003126.1425028-13-anitha.chrisanthus@intel.com/ Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/kmb/kmb_drv.c | 8 ++-- drivers/gpu/drm/kmb/kmb_drv.h | 5 ++ drivers/gpu/drm/kmb/kmb_plane.c | 81 +++++++++++++++++++++++++++++---- drivers/gpu/drm/kmb/kmb_plane.h | 5 +- drivers/gpu/drm/kmb/kmb_regs.h | 3 ++ 5 files changed, 87 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c index 1c2f4799f421..12ce669650cc 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.c +++ b/drivers/gpu/drm/kmb/kmb_drv.c @@ -172,10 +172,10 @@ static int kmb_setup_mode_config(struct drm_device *drm) ret = drmm_mode_config_init(drm); if (ret) return ret; - drm->mode_config.min_width = KMB_MIN_WIDTH; - drm->mode_config.min_height = KMB_MIN_HEIGHT; - drm->mode_config.max_width = KMB_MAX_WIDTH; - drm->mode_config.max_height = KMB_MAX_HEIGHT; + drm->mode_config.min_width = KMB_FB_MIN_WIDTH; + drm->mode_config.min_height = KMB_FB_MIN_HEIGHT; + drm->mode_config.max_width = KMB_FB_MAX_WIDTH; + drm->mode_config.max_height = KMB_FB_MAX_HEIGHT; drm->mode_config.funcs = &kmb_mode_config_funcs; ret = kmb_setup_crtc(drm); diff --git a/drivers/gpu/drm/kmb/kmb_drv.h b/drivers/gpu/drm/kmb/kmb_drv.h index ebbaa5f422d5..69a62e2d03ff 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.h +++ b/drivers/gpu/drm/kmb/kmb_drv.h @@ -20,6 +20,11 @@ #define DRIVER_MAJOR 1 #define DRIVER_MINOR 1 +#define KMB_FB_MAX_WIDTH 1920 +#define KMB_FB_MAX_HEIGHT 1080 +#define KMB_FB_MIN_WIDTH 1 +#define KMB_FB_MIN_HEIGHT 1 + #define KMB_LCD_DEFAULT_CLK 200000000 #define KMB_SYS_CLK_MHZ 500 diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c index ecee6782612d..06b0c42c9e91 100644 --- a/drivers/gpu/drm/kmb/kmb_plane.c +++ b/drivers/gpu/drm/kmb/kmb_plane.c @@ -94,9 +94,10 @@ static int kmb_plane_atomic_check(struct drm_plane *plane, if (ret) return ret; - if (new_plane_state->crtc_w > KMB_MAX_WIDTH || new_plane_state->crtc_h > KMB_MAX_HEIGHT) - return -EINVAL; - if (new_plane_state->crtc_w < KMB_MIN_WIDTH || new_plane_state->crtc_h < KMB_MIN_HEIGHT) + if (new_plane_state->crtc_w > KMB_FB_MAX_WIDTH || + new_plane_state->crtc_h > KMB_FB_MAX_HEIGHT || + new_plane_state->crtc_w < KMB_FB_MIN_WIDTH || + new_plane_state->crtc_h < KMB_FB_MIN_HEIGHT) return -EINVAL; can_position = (plane->type == DRM_PLANE_TYPE_OVERLAY); crtc_state = @@ -277,6 +278,44 @@ static void config_csc(struct kmb_drm_private *kmb, int plane_id) kmb_write_lcd(kmb, LCD_LAYERn_CSC_OFF3(plane_id), csc_coef_lcd[11]); } +static void kmb_plane_set_alpha(struct kmb_drm_private *kmb, + const struct drm_plane_state *state, + unsigned char plane_id, + unsigned int *val) +{ + u16 plane_alpha = state->alpha; + u16 pixel_blend_mode = state->pixel_blend_mode; + int has_alpha = state->fb->format->has_alpha; + + if (plane_alpha != DRM_BLEND_ALPHA_OPAQUE) + *val |= LCD_LAYER_ALPHA_STATIC; + + if (has_alpha) { + switch (pixel_blend_mode) { + case DRM_MODE_BLEND_PIXEL_NONE: + break; + case DRM_MODE_BLEND_PREMULTI: + *val |= LCD_LAYER_ALPHA_EMBED | LCD_LAYER_ALPHA_PREMULT; + break; + case DRM_MODE_BLEND_COVERAGE: + *val |= LCD_LAYER_ALPHA_EMBED; + break; + default: + DRM_DEBUG("Missing pixel blend mode case (%s == %ld)\n", + __stringify(pixel_blend_mode), + (long)pixel_blend_mode); + break; + } + } + + if (plane_alpha == DRM_BLEND_ALPHA_OPAQUE && !has_alpha) { + *val &= LCD_LAYER_ALPHA_DISABLED; + return; + } + + kmb_write_lcd(kmb, LCD_LAYERn_ALPHA(plane_id), plane_alpha); +} + static void kmb_plane_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -303,11 +342,12 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, fb = new_plane_state->fb; if (!fb) return; + num_planes = fb->format->num_planes; kmb_plane = to_kmb_plane(plane); - plane_id = kmb_plane->id; kmb = to_kmb(plane->dev); + plane_id = kmb_plane->id; spin_lock_irq(&kmb->irq_lock); if (kmb->kmb_under_flow || kmb->kmb_flush_done) { @@ -400,20 +440,32 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, config_csc(kmb, plane_id); } + kmb_plane_set_alpha(kmb, plane->state, plane_id, &val); + kmb_write_lcd(kmb, LCD_LAYERn_CFG(plane_id), val); + /* Configure LCD_CONTROL */ + ctrl = kmb_read_lcd(kmb, LCD_CONTROL); + + /* Set layer blending config */ + ctrl &= ~LCD_CTRL_ALPHA_ALL; + ctrl |= LCD_CTRL_ALPHA_BOTTOM_VL1 | + LCD_CTRL_ALPHA_BLEND_VL2; + + ctrl &= ~LCD_CTRL_ALPHA_BLEND_BKGND_DISABLE; + switch (plane_id) { case LAYER_0: - ctrl = LCD_CTRL_VL1_ENABLE; + ctrl |= LCD_CTRL_VL1_ENABLE; break; case LAYER_1: - ctrl = LCD_CTRL_VL2_ENABLE; + ctrl |= LCD_CTRL_VL2_ENABLE; break; case LAYER_2: - ctrl = LCD_CTRL_GL1_ENABLE; + ctrl |= LCD_CTRL_GL1_ENABLE; break; case LAYER_3: - ctrl = LCD_CTRL_GL2_ENABLE; + ctrl |= LCD_CTRL_GL2_ENABLE; break; } @@ -425,7 +477,7 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, */ ctrl |= LCD_CTRL_VHSYNC_IDLE_LVL; - kmb_set_bitmask_lcd(kmb, LCD_CONTROL, ctrl); + kmb_write_lcd(kmb, LCD_CONTROL, ctrl); /* Enable pipeline AXI read transactions for the DMA * after setting graphics layers. This must be done @@ -490,6 +542,9 @@ struct kmb_plane *kmb_plane_init(struct drm_device *drm) enum drm_plane_type plane_type; const u32 *plane_formats; int num_plane_formats; + unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) | + BIT(DRM_MODE_BLEND_PREMULTI) | + BIT(DRM_MODE_BLEND_COVERAGE); for (i = 0; i < KMB_MAX_PLANES; i++) { plane = drmm_kzalloc(drm, sizeof(*plane), GFP_KERNEL); @@ -521,8 +576,16 @@ struct kmb_plane *kmb_plane_init(struct drm_device *drm) drm_dbg(drm, "%s : %d i=%d type=%d", __func__, __LINE__, i, plane_type); + drm_plane_create_alpha_property(&plane->base_plane); + + drm_plane_create_blend_mode_property(&plane->base_plane, + blend_caps); + + drm_plane_create_zpos_immutable_property(&plane->base_plane, i); + drm_plane_helper_add(&plane->base_plane, &kmb_plane_helper_funcs); + if (plane_type == DRM_PLANE_TYPE_PRIMARY) { primary = plane; kmb->plane = plane; diff --git a/drivers/gpu/drm/kmb/kmb_plane.h b/drivers/gpu/drm/kmb/kmb_plane.h index 486490f7a3ec..6e8d22cf8819 100644 --- a/drivers/gpu/drm/kmb/kmb_plane.h +++ b/drivers/gpu/drm/kmb/kmb_plane.h @@ -35,6 +35,9 @@ #define POSSIBLE_CRTCS 1 #define to_kmb_plane(x) container_of(x, struct kmb_plane, base_plane) +#define POSSIBLE_CRTCS 1 +#define KMB_MAX_PLANES 2 + enum layer_id { LAYER_0, LAYER_1, @@ -43,8 +46,6 @@ enum layer_id { /* KMB_MAX_PLANES */ }; -#define KMB_MAX_PLANES 1 - enum sub_plane_id { Y_PLANE, U_PLANE, diff --git a/drivers/gpu/drm/kmb/kmb_regs.h b/drivers/gpu/drm/kmb/kmb_regs.h index 48150569f702..9756101b0d32 100644 --- a/drivers/gpu/drm/kmb/kmb_regs.h +++ b/drivers/gpu/drm/kmb/kmb_regs.h @@ -43,8 +43,10 @@ #define LCD_CTRL_OUTPUT_ENABLED BIT(19) #define LCD_CTRL_BPORCH_ENABLE BIT(21) #define LCD_CTRL_FPORCH_ENABLE BIT(22) +#define LCD_CTRL_ALPHA_BLEND_BKGND_DISABLE BIT(23) #define LCD_CTRL_PIPELINE_DMA BIT(28) #define LCD_CTRL_VHSYNC_IDLE_LVL BIT(31) +#define LCD_CTRL_ALPHA_ALL (0xff << 6) /* interrupts */ #define LCD_INT_STATUS (0x4 * 0x001) @@ -115,6 +117,7 @@ #define LCD_LAYER_ALPHA_EMBED BIT(5) #define LCD_LAYER_ALPHA_COMBI (LCD_LAYER_ALPHA_STATIC | \ LCD_LAYER_ALPHA_EMBED) +#define LCD_LAYER_ALPHA_DISABLED ~(LCD_LAYER_ALPHA_COMBI) /* RGB multiplied with alpha */ #define LCD_LAYER_ALPHA_PREMULT BIT(6) #define LCD_LAYER_INVERT_COL BIT(7) From 5e2e412d47f21c1682e701f946d4114f9885c23f Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 19 Aug 2021 16:07:53 +0200 Subject: [PATCH 085/235] drm/vc4: hdmi: Remove unused struct Commitc7d30623540b ("drm/vc4: hdmi: Remove unused struct") removed the references to the vc4_hdmi_audio_widgets and vc4_hdmi_audio_routes structures, but not the structures themselves resulting in two warnings. Remove it. Fixes: c7d30623540b ("drm/vc4: hdmi: Remove unused struct") Reported-by: kernel test robot Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Link: https://patchwork.freedesktop.org/patch/msgid/20210819140753.930751-1-maxime@cerno.tech Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/vc4/vc4_hdmi.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index b4b4653fe301..ed8a4b7f8b6e 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1395,14 +1395,6 @@ static int vc4_hdmi_audio_prepare(struct device *dev, void *data, return 0; } -static const struct snd_soc_dapm_widget vc4_hdmi_audio_widgets[] = { - SND_SOC_DAPM_OUTPUT("TX"), -}; - -static const struct snd_soc_dapm_route vc4_hdmi_audio_routes[] = { - { "TX", NULL, "Playback" }, -}; - static const struct snd_soc_component_driver vc4_hdmi_audio_cpu_dai_comp = { .name = "vc4-hdmi-cpu-dai-component", }; From c64c8e04a12ed3e2238761e26cda78e72550dc98 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Wed, 15 Sep 2021 19:58:36 +0200 Subject: [PATCH 086/235] drm/sun4i: dw-hdmi: Fix HDMI PHY clock setup Recent rework, which made HDMI PHY driver a platform device, inadvertely reversed clock setup order. HW is very touchy about it. Proper way is to handle controllers resets and clocks first and HDMI PHYs second. Currently, without this fix, first mode set completely fails (nothing on HDMI monitor) on H3 era PHYs. On H6, it still somehow work. Move HDMI PHY reset & clocks handling to sun8i_hdmi_phy_init() which will assure that code is executed after controllers reset & clocks are handled. Additionally, add sun8i_hdmi_phy_deinit() which will deinit them at controllers driver unload. Tested on A64, H3, H6 and R40. Fixes: 9bf3797796f5 ("drm/sun4i: dw-hdmi: Make HDMI PHY into a platform device") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20210915175836.3158839-1-jernej.skrabec@gmail.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c | 7 +- drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h | 4 +- drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c | 97 ++++++++++++++------------ 3 files changed, 61 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index f75fb157f2ff..016b877051da 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -216,11 +216,13 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master, goto err_disable_clk_tmds; } + ret = sun8i_hdmi_phy_init(hdmi->phy); + if (ret) + goto err_disable_clk_tmds; + drm_encoder_helper_add(encoder, &sun8i_dw_hdmi_encoder_helper_funcs); drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS); - sun8i_hdmi_phy_init(hdmi->phy); - plat_data->mode_valid = hdmi->quirks->mode_valid; plat_data->use_drm_infoframe = hdmi->quirks->use_drm_infoframe; sun8i_hdmi_phy_set_ops(hdmi->phy, plat_data); @@ -262,6 +264,7 @@ static void sun8i_dw_hdmi_unbind(struct device *dev, struct device *master, struct sun8i_dw_hdmi *hdmi = dev_get_drvdata(dev); dw_hdmi_unbind(hdmi->hdmi); + sun8i_hdmi_phy_deinit(hdmi->phy); clk_disable_unprepare(hdmi->clk_tmds); reset_control_assert(hdmi->rst_ctrl); gpiod_set_value(hdmi->ddc_en, 0); diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h index 74f6ed0e2570..bffe1b9cd3dc 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h @@ -169,6 +169,7 @@ struct sun8i_hdmi_phy { struct clk *clk_phy; struct clk *clk_pll0; struct clk *clk_pll1; + struct device *dev; unsigned int rcal; struct regmap *regs; struct reset_control *rst_phy; @@ -205,7 +206,8 @@ encoder_to_sun8i_dw_hdmi(struct drm_encoder *encoder) int sun8i_hdmi_phy_get(struct sun8i_dw_hdmi *hdmi, struct device_node *node); -void sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy); +int sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy); +void sun8i_hdmi_phy_deinit(struct sun8i_hdmi_phy *phy); void sun8i_hdmi_phy_set_ops(struct sun8i_hdmi_phy *phy, struct dw_hdmi_plat_data *plat_data); diff --git a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c index c9239708d398..b64d93da651d 100644 --- a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c +++ b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c @@ -506,9 +506,60 @@ static void sun8i_hdmi_phy_init_h3(struct sun8i_hdmi_phy *phy) phy->rcal = (val & SUN8I_HDMI_PHY_ANA_STS_RCAL_MASK) >> 2; } -void sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy) +int sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy) { + int ret; + + ret = reset_control_deassert(phy->rst_phy); + if (ret) { + dev_err(phy->dev, "Cannot deassert phy reset control: %d\n", ret); + return ret; + } + + ret = clk_prepare_enable(phy->clk_bus); + if (ret) { + dev_err(phy->dev, "Cannot enable bus clock: %d\n", ret); + goto err_assert_rst_phy; + } + + ret = clk_prepare_enable(phy->clk_mod); + if (ret) { + dev_err(phy->dev, "Cannot enable mod clock: %d\n", ret); + goto err_disable_clk_bus; + } + + if (phy->variant->has_phy_clk) { + ret = sun8i_phy_clk_create(phy, phy->dev, + phy->variant->has_second_pll); + if (ret) { + dev_err(phy->dev, "Couldn't create the PHY clock\n"); + goto err_disable_clk_mod; + } + + clk_prepare_enable(phy->clk_phy); + } + phy->variant->phy_init(phy); + + return 0; + +err_disable_clk_mod: + clk_disable_unprepare(phy->clk_mod); +err_disable_clk_bus: + clk_disable_unprepare(phy->clk_bus); +err_assert_rst_phy: + reset_control_assert(phy->rst_phy); + + return ret; +} + +void sun8i_hdmi_phy_deinit(struct sun8i_hdmi_phy *phy) +{ + clk_disable_unprepare(phy->clk_mod); + clk_disable_unprepare(phy->clk_bus); + clk_disable_unprepare(phy->clk_phy); + + reset_control_assert(phy->rst_phy); } void sun8i_hdmi_phy_set_ops(struct sun8i_hdmi_phy *phy, @@ -638,6 +689,7 @@ static int sun8i_hdmi_phy_probe(struct platform_device *pdev) return -ENOMEM; phy->variant = (struct sun8i_hdmi_phy_variant *)match->data; + phy->dev = dev; ret = of_address_to_resource(node, 0, &res); if (ret) { @@ -696,47 +748,10 @@ static int sun8i_hdmi_phy_probe(struct platform_device *pdev) goto err_put_clk_pll1; } - ret = reset_control_deassert(phy->rst_phy); - if (ret) { - dev_err(dev, "Cannot deassert phy reset control: %d\n", ret); - goto err_put_rst_phy; - } - - ret = clk_prepare_enable(phy->clk_bus); - if (ret) { - dev_err(dev, "Cannot enable bus clock: %d\n", ret); - goto err_deassert_rst_phy; - } - - ret = clk_prepare_enable(phy->clk_mod); - if (ret) { - dev_err(dev, "Cannot enable mod clock: %d\n", ret); - goto err_disable_clk_bus; - } - - if (phy->variant->has_phy_clk) { - ret = sun8i_phy_clk_create(phy, dev, - phy->variant->has_second_pll); - if (ret) { - dev_err(dev, "Couldn't create the PHY clock\n"); - goto err_disable_clk_mod; - } - - clk_prepare_enable(phy->clk_phy); - } - platform_set_drvdata(pdev, phy); return 0; -err_disable_clk_mod: - clk_disable_unprepare(phy->clk_mod); -err_disable_clk_bus: - clk_disable_unprepare(phy->clk_bus); -err_deassert_rst_phy: - reset_control_assert(phy->rst_phy); -err_put_rst_phy: - reset_control_put(phy->rst_phy); err_put_clk_pll1: clk_put(phy->clk_pll1); err_put_clk_pll0: @@ -753,12 +768,6 @@ static int sun8i_hdmi_phy_remove(struct platform_device *pdev) { struct sun8i_hdmi_phy *phy = platform_get_drvdata(pdev); - clk_disable_unprepare(phy->clk_mod); - clk_disable_unprepare(phy->clk_bus); - clk_disable_unprepare(phy->clk_phy); - - reset_control_assert(phy->rst_phy); - reset_control_put(phy->rst_phy); clk_put(phy->clk_pll0); From f732e2e34aa08493fdd762f3daa4e5f16bbf1e45 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 6 Sep 2021 10:56:28 +1000 Subject: [PATCH 087/235] drm/nouveau/kms/tu102-: delay enabling cursor until after assign_windows Prevent NVD core channel error code 67 occuring and hanging display, managed to reproduce on GA102 while testing suspend/resume scenarios. Required extension of earlier commit to fix interactions with EFI. Fixes: e78b1b545c6c ("drm/nouveau/kms/nv50: workaround EFI GOP window channel format differences") Signed-off-by: Ben Skeggs Cc: Lyude Paul Cc: Karol Herbst Cc: # v5.12+ Reviewed-by: Karol Herbst Signed-off-by: Karol Herbst Link: https://patchwork.freedesktop.org/patch/msgid/20210906005628.11499-2-skeggsb@gmail.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/dispnv50/head.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c index d66f97280282..72099d1e4816 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/head.c +++ b/drivers/gpu/drm/nouveau/dispnv50/head.c @@ -52,6 +52,7 @@ nv50_head_flush_clr(struct nv50_head *head, void nv50_head_flush_set_wndw(struct nv50_head *head, struct nv50_head_atom *asyh) { + if (asyh->set.curs ) head->func->curs_set(head, asyh); if (asyh->set.olut ) { asyh->olut.offset = nv50_lut_load(&head->olut, asyh->olut.buffer, @@ -67,7 +68,6 @@ nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh) if (asyh->set.view ) head->func->view (head, asyh); if (asyh->set.mode ) head->func->mode (head, asyh); if (asyh->set.core ) head->func->core_set(head, asyh); - if (asyh->set.curs ) head->func->curs_set(head, asyh); if (asyh->set.base ) head->func->base (head, asyh); if (asyh->set.ovly ) head->func->ovly (head, asyh); if (asyh->set.dither ) head->func->dither (head, asyh); From 49b2dfc081826874705b27f7970631319628ee7f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 17 Sep 2021 08:04:06 +1000 Subject: [PATCH 088/235] drm/nouveau/ga102-: support ttm buffer moves via copy engine We don't currently have any kind of real acceleration on Ampere GPUs, but the TTM memcpy() fallback paths aren't really designed to handle copies between different devices, such as on Optimus systems, and result in a kernel OOPS. A few options were investigated to try and fix this, but didn't work out, and likely would have resulted in a very unpleasant experience for users anyway. This commit adds just enough support for setting up a single channel connected to a copy engine, which the kernel can use to accelerate the buffer copies between devices. Userspace has no access to this incomplete channel support, but it's suitable for TTM's needs. A more complete implementation of host(fifo) for Ampere GPUs is in the works, but the required changes are far too invasive that they would be unsuitable to backport to fix this issue on current kernels. v2: fix GPFIFO length in RAMFC (reported by Karol) Signed-off-by: Ben Skeggs Cc: Lyude Paul Cc: Karol Herbst Cc: # v5.12+ Reviewed-by: Karol Herbst Tested-by: Karol Herbst Signed-off-by: Karol Herbst Link: https://patchwork.freedesktop.org/patch/msgid/20210916220406.666454-1-skeggsb@gmail.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/include/nvif/class.h | 2 + .../drm/nouveau/include/nvkm/engine/fifo.h | 1 + drivers/gpu/drm/nouveau/nouveau_bo.c | 1 + drivers/gpu/drm/nouveau/nouveau_chan.c | 6 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 4 + drivers/gpu/drm/nouveau/nv84_fence.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/device/base.c | 3 + .../gpu/drm/nouveau/nvkm/engine/fifo/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/engine/fifo/ga102.c | 308 ++++++++++++++++++ .../gpu/drm/nouveau/nvkm/subdev/top/ga100.c | 7 +- 10 files changed, 329 insertions(+), 6 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index c68cc957248e..a582c0cb0cb0 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -71,6 +71,7 @@ #define PASCAL_CHANNEL_GPFIFO_A /* cla06f.h */ 0x0000c06f #define VOLTA_CHANNEL_GPFIFO_A /* clc36f.h */ 0x0000c36f #define TURING_CHANNEL_GPFIFO_A /* clc36f.h */ 0x0000c46f +#define AMPERE_CHANNEL_GPFIFO_B /* clc36f.h */ 0x0000c76f #define NV50_DISP /* cl5070.h */ 0x00005070 #define G82_DISP /* cl5070.h */ 0x00008270 @@ -200,6 +201,7 @@ #define PASCAL_DMA_COPY_B 0x0000c1b5 #define VOLTA_DMA_COPY_A 0x0000c3b5 #define TURING_DMA_COPY_A 0x0000c5b5 +#define AMPERE_DMA_COPY_B 0x0000c7b5 #define FERMI_DECOMPRESS 0x000090b8 diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h index 54fab7cc36c1..64ee82c7c1be 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h @@ -77,4 +77,5 @@ int gp100_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct int gp10b_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **); int gv100_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **); int tu102_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **); +int ga102_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 6d07e653f82d..c58bcdba2c7a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -844,6 +844,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm) struct ttm_resource *, struct ttm_resource *); int (*init)(struct nouveau_channel *, u32 handle); } _methods[] = { + { "COPY", 4, 0xc7b5, nve0_bo_move_copy, nve0_bo_move_init }, { "COPY", 4, 0xc5b5, nve0_bo_move_copy, nve0_bo_move_init }, { "GRCE", 0, 0xc5b5, nve0_bo_move_copy, nvc0_bo_move_init }, { "COPY", 4, 0xc3b5, nve0_bo_move_copy, nve0_bo_move_init }, diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 80099ef75702..ea7769135b0d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -250,7 +250,8 @@ static int nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, u64 runlist, bool priv, struct nouveau_channel **pchan) { - static const u16 oclasses[] = { TURING_CHANNEL_GPFIFO_A, + static const u16 oclasses[] = { AMPERE_CHANNEL_GPFIFO_B, + TURING_CHANNEL_GPFIFO_A, VOLTA_CHANNEL_GPFIFO_A, PASCAL_CHANNEL_GPFIFO_A, MAXWELL_CHANNEL_GPFIFO_A, @@ -386,7 +387,8 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) nvif_object_map(&chan->user, NULL, 0); - if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO) { + if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO && + chan->user.oclass < AMPERE_CHANNEL_GPFIFO_B) { ret = nvif_notify_ctor(&chan->user, "abi16ChanKilled", nouveau_channel_killed, true, NV906F_V0_NTFY_KILLED, diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 1f828c9f691c..6109cd9e3399 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -345,6 +345,9 @@ nouveau_accel_gr_init(struct nouveau_drm *drm) u32 arg0, arg1; int ret; + if (device->info.family >= NV_DEVICE_INFO_V0_AMPERE) + return; + /* Allocate channel that has access to the graphics engine. */ if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) { arg0 = nvif_fifo_runlist(device, NV_DEVICE_HOST_RUNLIST_ENGINES_GR); @@ -469,6 +472,7 @@ nouveau_accel_init(struct nouveau_drm *drm) case PASCAL_CHANNEL_GPFIFO_A: case VOLTA_CHANNEL_GPFIFO_A: case TURING_CHANNEL_GPFIFO_A: + case AMPERE_CHANNEL_GPFIFO_B: ret = nvc0_fence_create(drm); break; default: diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index 7c9c928c3196..c3526a8622e3 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -204,7 +204,7 @@ nv84_fence_create(struct nouveau_drm *drm) priv->base.context_new = nv84_fence_context_new; priv->base.context_del = nv84_fence_context_del; - priv->base.uevent = true; + priv->base.uevent = drm->client.device.info.family < NV_DEVICE_INFO_V0_AMPERE; mutex_init(&priv->mutex); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 93ddf63d1114..ca75c5f6ecaf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2602,6 +2602,7 @@ nv172_chipset = { .top = { 0x00000001, ga100_top_new }, .disp = { 0x00000001, ga102_disp_new }, .dma = { 0x00000001, gv100_dma_new }, + .fifo = { 0x00000001, ga102_fifo_new }, }; static const struct nvkm_device_chip @@ -2622,6 +2623,7 @@ nv174_chipset = { .top = { 0x00000001, ga100_top_new }, .disp = { 0x00000001, ga102_disp_new }, .dma = { 0x00000001, gv100_dma_new }, + .fifo = { 0x00000001, ga102_fifo_new }, }; static const struct nvkm_device_chip @@ -2642,6 +2644,7 @@ nv177_chipset = { .top = { 0x00000001, ga100_top_new }, .disp = { 0x00000001, ga102_disp_new }, .dma = { 0x00000001, gv100_dma_new }, + .fifo = { 0x00000001, ga102_fifo_new }, }; static int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild index 3209eb7af65f..5e831d347a95 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild @@ -18,6 +18,7 @@ nvkm-y += nvkm/engine/fifo/gp100.o nvkm-y += nvkm/engine/fifo/gp10b.o nvkm-y += nvkm/engine/fifo/gv100.o nvkm-y += nvkm/engine/fifo/tu102.o +nvkm-y += nvkm/engine/fifo/ga102.o nvkm-y += nvkm/engine/fifo/chan.o nvkm-y += nvkm/engine/fifo/channv50.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c new file mode 100644 index 000000000000..f897bef13acf --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c @@ -0,0 +1,308 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#define ga102_fifo(p) container_of((p), struct ga102_fifo, base.engine) +#define ga102_chan(p) container_of((p), struct ga102_chan, object) +#include +#include "user.h" + +#include +#include +#include +#include + +#include +#include +#include + +struct ga102_fifo { + struct nvkm_fifo base; +}; + +struct ga102_chan { + struct nvkm_object object; + + struct { + u32 runl; + u32 chan; + } ctrl; + + struct nvkm_memory *mthd; + struct nvkm_memory *inst; + struct nvkm_memory *user; + struct nvkm_memory *runl; + + struct nvkm_vmm *vmm; +}; + +static int +ga102_chan_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *oclass) +{ + if (index == 0) { + oclass->ctor = nvkm_object_new; + oclass->base = (struct nvkm_sclass) { -1, -1, AMPERE_DMA_COPY_B }; + return 0; + } + + return -EINVAL; +} + +static int +ga102_chan_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) +{ + struct ga102_chan *chan = ga102_chan(object); + struct nvkm_device *device = chan->object.engine->subdev.device; + u64 bar2 = nvkm_memory_bar2(chan->user); + + if (bar2 == ~0ULL) + return -EFAULT; + + *type = NVKM_OBJECT_MAP_IO; + *addr = device->func->resource_addr(device, 3) + bar2; + *size = 0x1000; + return 0; +} + +static int +ga102_chan_fini(struct nvkm_object *object, bool suspend) +{ + struct ga102_chan *chan = ga102_chan(object); + struct nvkm_device *device = chan->object.engine->subdev.device; + + nvkm_wr32(device, chan->ctrl.chan, 0x00000003); + + nvkm_wr32(device, chan->ctrl.runl + 0x098, 0x01000000); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, chan->ctrl.runl + 0x098) & 0x00100000)) + break; + ); + + nvkm_wr32(device, chan->ctrl.runl + 0x088, 0); + + nvkm_wr32(device, chan->ctrl.chan, 0xffffffff); + return 0; +} + +static int +ga102_chan_init(struct nvkm_object *object) +{ + struct ga102_chan *chan = ga102_chan(object); + struct nvkm_device *device = chan->object.engine->subdev.device; + + nvkm_mask(device, chan->ctrl.runl + 0x300, 0x80000000, 0x80000000); + + nvkm_wr32(device, chan->ctrl.runl + 0x080, lower_32_bits(nvkm_memory_addr(chan->runl))); + nvkm_wr32(device, chan->ctrl.runl + 0x084, upper_32_bits(nvkm_memory_addr(chan->runl))); + nvkm_wr32(device, chan->ctrl.runl + 0x088, 2); + + nvkm_wr32(device, chan->ctrl.chan, 0x00000002); + nvkm_wr32(device, chan->ctrl.runl + 0x0090, 0); + return 0; +} + +static void * +ga102_chan_dtor(struct nvkm_object *object) +{ + struct ga102_chan *chan = ga102_chan(object); + + if (chan->vmm) { + nvkm_vmm_part(chan->vmm, chan->inst); + nvkm_vmm_unref(&chan->vmm); + } + + nvkm_memory_unref(&chan->runl); + nvkm_memory_unref(&chan->user); + nvkm_memory_unref(&chan->inst); + nvkm_memory_unref(&chan->mthd); + return chan; +} + +static const struct nvkm_object_func +ga102_chan = { + .dtor = ga102_chan_dtor, + .init = ga102_chan_init, + .fini = ga102_chan_fini, + .map = ga102_chan_map, + .sclass = ga102_chan_sclass, +}; + +static int +ga102_chan_new(struct nvkm_device *device, + const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject) +{ + struct volta_channel_gpfifo_a_v0 *args = argv; + struct nvkm_top_device *tdev; + struct nvkm_vmm *vmm; + struct ga102_chan *chan; + int ret; + + if (argc != sizeof(*args)) + return -ENOSYS; + + vmm = nvkm_uvmm_search(oclass->client, args->vmm); + if (IS_ERR(vmm)) + return PTR_ERR(vmm); + + if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) + return -ENOMEM; + + nvkm_object_ctor(&ga102_chan, oclass, &chan->object); + *pobject = &chan->object; + + list_for_each_entry(tdev, &device->top->device, head) { + if (tdev->type == NVKM_ENGINE_CE) { + chan->ctrl.runl = tdev->runlist; + break; + } + } + + if (!chan->ctrl.runl) + return -ENODEV; + + chan->ctrl.chan = nvkm_rd32(device, chan->ctrl.runl + 0x004) & 0xfffffff0; + args->token = nvkm_rd32(device, chan->ctrl.runl + 0x008) & 0xffff0000; + + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->mthd); + if (ret) + return ret; + + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->inst); + if (ret) + return ret; + + nvkm_kmap(chan->inst); + nvkm_wo32(chan->inst, 0x010, 0x0000face); + nvkm_wo32(chan->inst, 0x030, 0x7ffff902); + nvkm_wo32(chan->inst, 0x048, lower_32_bits(args->ioffset)); + nvkm_wo32(chan->inst, 0x04c, upper_32_bits(args->ioffset) | + (order_base_2(args->ilength / 8) << 16)); + nvkm_wo32(chan->inst, 0x084, 0x20400000); + nvkm_wo32(chan->inst, 0x094, 0x30000001); + nvkm_wo32(chan->inst, 0x0ac, 0x00020000); + nvkm_wo32(chan->inst, 0x0e4, 0x00000000); + nvkm_wo32(chan->inst, 0x0e8, 0); + nvkm_wo32(chan->inst, 0x0f4, 0x00001000); + nvkm_wo32(chan->inst, 0x0f8, 0x10003080); + nvkm_mo32(chan->inst, 0x218, 0x00000000, 0x00000000); + nvkm_wo32(chan->inst, 0x220, lower_32_bits(nvkm_memory_bar2(chan->mthd))); + nvkm_wo32(chan->inst, 0x224, upper_32_bits(nvkm_memory_bar2(chan->mthd))); + nvkm_done(chan->inst); + + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->user); + if (ret) + return ret; + + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->runl); + if (ret) + return ret; + + nvkm_kmap(chan->runl); + nvkm_wo32(chan->runl, 0x00, 0x80030001); + nvkm_wo32(chan->runl, 0x04, 1); + nvkm_wo32(chan->runl, 0x08, 0); + nvkm_wo32(chan->runl, 0x0c, 0x00000000); + nvkm_wo32(chan->runl, 0x10, lower_32_bits(nvkm_memory_addr(chan->user))); + nvkm_wo32(chan->runl, 0x14, upper_32_bits(nvkm_memory_addr(chan->user))); + nvkm_wo32(chan->runl, 0x18, lower_32_bits(nvkm_memory_addr(chan->inst))); + nvkm_wo32(chan->runl, 0x1c, upper_32_bits(nvkm_memory_addr(chan->inst))); + nvkm_done(chan->runl); + + ret = nvkm_vmm_join(vmm, chan->inst); + if (ret) + return ret; + + chan->vmm = nvkm_vmm_ref(vmm); + return 0; +} + +static const struct nvkm_device_oclass +ga102_chan_oclass = { + .ctor = ga102_chan_new, +}; + +static int +ga102_user_new(struct nvkm_device *device, + const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject) +{ + return tu102_fifo_user_new(oclass, argv, argc, pobject); +} + +static const struct nvkm_device_oclass +ga102_user_oclass = { + .ctor = ga102_user_new, +}; + +static int +ga102_fifo_sclass(struct nvkm_oclass *oclass, int index, const struct nvkm_device_oclass **class) +{ + if (index == 0) { + oclass->base = (struct nvkm_sclass) { -1, -1, VOLTA_USERMODE_A }; + *class = &ga102_user_oclass; + return 0; + } else + if (index == 1) { + oclass->base = (struct nvkm_sclass) { 0, 0, AMPERE_CHANNEL_GPFIFO_B }; + *class = &ga102_chan_oclass; + return 0; + } + + return 2; +} + +static int +ga102_fifo_info(struct nvkm_engine *engine, u64 mthd, u64 *data) +{ + switch (mthd) { + case NV_DEVICE_HOST_CHANNELS: *data = 1; return 0; + default: + break; + } + + return -ENOSYS; +} + +static void * +ga102_fifo_dtor(struct nvkm_engine *engine) +{ + return ga102_fifo(engine); +} + +static const struct nvkm_engine_func +ga102_fifo = { + .dtor = ga102_fifo_dtor, + .info = ga102_fifo_info, + .base.sclass = ga102_fifo_sclass, +}; + +int +ga102_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, + struct nvkm_fifo **pfifo) +{ + struct ga102_fifo *fifo; + + if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL))) + return -ENOMEM; + + nvkm_engine_ctor(&ga102_fifo, device, type, inst, true, &fifo->base.engine); + *pfifo = &fifo->base; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c index 31933f3e5a07..c982d834c8d9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c @@ -54,7 +54,7 @@ ga100_top_oneinit(struct nvkm_top *top) info->reset = (data & 0x0000001f); break; case 2: - info->runlist = (data & 0x0000fc00) >> 10; + info->runlist = (data & 0x00fffc00); info->engine = (data & 0x00000003); break; default: @@ -85,9 +85,10 @@ ga100_top_oneinit(struct nvkm_top *top) } nvkm_debug(subdev, "%02x.%d (%8s): addr %06x fault %2d " - "runlist %2d engine %2d reset %2d\n", type, inst, + "runlist %6x engine %2d reset %2d\n", type, inst, info->type == NVKM_SUBDEV_NR ? "????????" : nvkm_subdev_type[info->type], - info->addr, info->fault, info->runlist, info->engine, info->reset); + info->addr, info->fault, info->runlist < 0 ? 0 : info->runlist, + info->engine, info->reset); info = NULL; } From 64ec4912c51ad782067e56b106735eaf62ea035c Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Thu, 16 Sep 2021 15:29:07 -0500 Subject: [PATCH 089/235] drm/rockchip: Update crtc fixup to account for fractional clk change After commit 928f9e268611 ("clk: fractional-divider: Hide clk_fractional_divider_ops from wide audience") was merged it appears that the DSI panel on my Odroid Go Advance stopped working. Upon closer examination of the problem, it looks like it was the fixup in the rockchip_drm_vop.c file was causing the issue. The changes made to the clk driver appear to change some assumptions made in the fixup. After debugging the working 5.14 kernel and the no-longer working 5.15 kernel, it looks like this was broken all along but still worked, whereas after the fractional clock change it stopped working despite the issue (it went from sort-of broken to very broken). In the 5.14 kernel the dclk_vopb_frac was being requested to be set to 17000999 on my board. The clock driver was taking the value of the parent clock and attempting to divide the requested value from it (17000000/17000999 = 0), then subtracting 1 from it (making it -1), and running it through fls_long to get 64. It would then subtract the value of fd->mwidth from it to get 48, and then bit shift 17000999 to the left by 48, coming up with a very large number of 7649082492112076800. This resulted in a numerator of 65535 and a denominator of 1 from the clk driver. The driver seemingly would try again and get a correct 1:1 value later, and then move on. Output from my 5.14 kernel (with some printfs for good measure): [ 2.830066] rockchip-drm display-subsystem: bound ff460000.vop (ops vop_component_ops) [ 2.839431] rockchip-drm display-subsystem: bound ff450000.dsi (ops dw_mipi_dsi_rockchip_ops) [ 2.855980] Clock is dclk_vopb_frac [ 2.856004] Scale 64, Rate 7649082492112076800, Oldrate 17000999, Parent Rate 17000000, Best Numerator 65535, Best Denominator 1, fd->mwidth 16 [ 2.903529] Clock is dclk_vopb_frac [ 2.903556] Scale 0, Rate 17000000, Oldrate 17000000, Parent Rate 17000000, Best Numerator 1, Best Denominator 1, fd->mwidth 16 [ 2.903579] Clock is dclk_vopb_frac [ 2.903583] Scale 0, Rate 17000000, Oldrate 17000000, Parent Rate 17000000, Best Numerator 1, Best Denominator 1, fd->mwidth 16 Contrast this with 5.15 after the clk change where the rate of 17000999 was getting passed and resulted in numerators/denomiators of 17001/ 17000. Output from my 5.15 kernel (with some printfs added for good measure): [ 2.817571] rockchip-drm display-subsystem: bound ff460000.vop (ops vop_component_ops) [ 2.826975] rockchip-drm display-subsystem: bound ff450000.dsi (ops dw_mipi_dsi_rockchip_ops) [ 2.843430] Rate 17000999, Parent Rate 17000000, Best Numerator 17018, Best Denominator 17017 [ 2.891073] Rate 17001000, Parent Rate 17000000, Best Numerator 17001, Best Denominator 17000 [ 2.891269] Rate 17001000, Parent Rate 17000000, Best Numerator 17001, Best Denominator 17000 [ 2.891281] Rate 17001000, Parent Rate 17000000, Best Numerator 17001, Best Denominator 17000 I have tested the change extensively on my Odroid Go Advance (Rockchip RK3326) and it appears to work well. However, this change will affect all Rockchip SoCs that use this driver so I believe further testing is warranted. Please note that without this change I can confirm at least all PX30s with DSI panels will stop working with the 5.15 kernel. Upon advice from Doug Anderson it was decided that we would first check if the clock rate can be set exactly as requested, and only if it could not would we then add 999 to it and attempt the process again. This way we can preserve the behavior for clocks that still need it while resolving the specific issue for the PX30 and DSI panels (since it is using a fractional clock). Changes since v2: - Moved fixes to correct location. Changes since v1: - Made the addition of 999 conditional based on whether the clock subsystem can set the actual clock rate as requested. - Updated the notes in the fixup routine to reflect this new behavior. - Added reference to original commit, as this has technically been broken since then however only now is it an issue due to the clock changes. Fixes: 4e7cf74fa3b2 ("clk: fractional-divider: Export approximation algorithm to the CCF users") Signed-off-by: Chris Morgan Reviewed-by: Douglas Anderson Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20210916202907.18394-1-macroalpha82@gmail.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 26 ++++++++++----------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index ba9e14da41b4..a25b98b7f5bd 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -1174,26 +1174,24 @@ static bool vop_crtc_mode_fixup(struct drm_crtc *crtc, * * Action plan: * - * 1. When DRM gives us a mode, we should add 999 Hz to it. That way - * if the clock we need is 60000001 Hz (~60 MHz) and DRM tells us to - * make 60000 kHz then the clock framework will actually give us - * the right clock. + * 1. Try to set the exact rate first, and confirm the clock framework + * can provide it. * - * NOTE: if the PLL (maybe through a divider) could actually make - * a clock rate 999 Hz higher instead of the one we want then this - * could be a problem. Unfortunately there's not much we can do - * since it's baked into DRM to use kHz. It shouldn't matter in - * practice since Rockchip PLLs are controlled by tables and - * even if there is a divider in the middle I wouldn't expect PLL - * rates in the table that are just a few kHz different. + * 2. If the clock framework cannot provide the exact rate, we should + * add 999 Hz to the requested rate. That way if the clock we need + * is 60000001 Hz (~60 MHz) and DRM tells us to make 60000 kHz then + * the clock framework will actually give us the right clock. * - * 2. Get the clock framework to round the rate for us to tell us + * 3. Get the clock framework to round the rate for us to tell us * what it will actually make. * - * 3. Store the rounded up rate so that we don't need to worry about + * 4. Store the rounded up rate so that we don't need to worry about * this in the actual clk_set_rate(). */ - rate = clk_round_rate(vop->dclk, adjusted_mode->clock * 1000 + 999); + rate = clk_round_rate(vop->dclk, adjusted_mode->clock * 1000); + if (rate / 1000 != adjusted_mode->clock) + rate = clk_round_rate(vop->dclk, + adjusted_mode->clock * 1000 + 999); adjusted_mode->clock = DIV_ROUND_UP(rate, 1000); return true; From 0689ea432a85ad1a108f47c3d90b6feae322c7f9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 21 Sep 2021 19:07:35 +1000 Subject: [PATCH 090/235] drm/nouveau/fifo/ga102: initialise chid on return from channel creation Turns out caller isn't zero-initialised after-all. Fixes: 49b2dfc08182 ("drm/nouveau/ga102-: support ttm buffer moves via copy engine") Reported-by: Karol Herbst Signed-off-by: Ben Skeggs Reviewed-by: Karol Herbst Signed-off-by: Karol Herbst Link: https://patchwork.freedesktop.org/patch/msgid/20210921090735.247236-1-skeggsb@gmail.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c index f897bef13acf..c630dbd2911a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c @@ -179,6 +179,9 @@ ga102_chan_new(struct nvkm_device *device, return -ENODEV; chan->ctrl.chan = nvkm_rd32(device, chan->ctrl.runl + 0x004) & 0xfffffff0; + + args->chid = 0; + args->inst = 0; args->token = nvkm_rd32(device, chan->ctrl.runl + 0x008) & 0xffff0000; ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->mthd); From 990a9ff072776908bf0654e23df69c30aa9ff945 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 19 Aug 2021 12:10:19 +0200 Subject: [PATCH 091/235] dt-bindings: panel: ili9341: correct indentation Correct indentation warning: ilitek,ili9341.yaml:25:9: [warning] wrong indentation: expected 10 but found 8 (indentation) Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20210819101020.26368-1-krzysztof.kozlowski@canonical.com (cherry picked from commit 333ba0d9d5d5a2cf1f6bbb754045e4f2cb3ed22d) Link: https://lore.kernel.org/dri-devel/CAL_JsqKcTfgnXNYzGDSFhKS2udhw2Dvk04ODwTxUdDRQjKdT0Q@mail.gmail.com/ Signed-off-by: Maxime Ripard Signed-off-by: Maarten Lankhorst --- .../devicetree/bindings/display/panel/ilitek,ili9341.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9341.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9341.yaml index 2ed010f91e2d..20ce88ab4b3a 100644 --- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9341.yaml +++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9341.yaml @@ -22,7 +22,7 @@ properties: items: - enum: # ili9341 240*320 Color on stm32f429-disco board - - st,sf-tc240t-9370-t + - st,sf-tc240t-9370-t - const: ilitek,ili9341 reg: true From 413e8d06ad896dae9bbc6f97b0abea5eae5495f1 Mon Sep 17 00:00:00 2001 From: Christophe Branchereau Date: Tue, 14 Sep 2021 11:27:16 +0200 Subject: [PATCH 092/235] drm/panel: abt-y030xx067a: yellow tint fix The previous parameters caused an unbalanced yellow tint. Fixes: 7467389bdafb ("drm/panel: Add ABT Y030XX067A 3.0" 320x480 panel") Signed-off-by: Christophe Branchereau Acked-by: Sam Ravnborg [Paul: Add Fixes: tag, and fix case and punctuation in commit message] Signed-off-by: Paul Cercueil Link: https://patchwork.freedesktop.org/patch/msgid/20210914092716.2370039-1-cbranchereau@gmail.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/panel/panel-abt-y030xx067a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c index 2d8794d495d0..3d8a9ab47cae 100644 --- a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c +++ b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c @@ -146,8 +146,8 @@ static const struct reg_sequence y030xx067a_init_sequence[] = { { 0x09, REG09_SUB_BRIGHT_R(0x20) }, { 0x0a, REG0A_SUB_BRIGHT_B(0x20) }, { 0x0b, REG0B_HD_FREERUN | REG0B_VD_FREERUN }, - { 0x0c, REG0C_CONTRAST_R(0x10) }, - { 0x0d, REG0D_CONTRAST_G(0x10) }, + { 0x0c, REG0C_CONTRAST_R(0x00) }, + { 0x0d, REG0D_CONTRAST_G(0x00) }, { 0x0e, REG0E_CONTRAST_B(0x10) }, { 0x0f, 0 }, { 0x10, REG10_BRIGHT(0x7f) }, From ec7cc3f74b4236860ce612656aa5be7936d1c594 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Sep 2021 16:52:10 +0200 Subject: [PATCH 093/235] fbdev: simplefb: fix Kconfig dependencies Configurations with both CONFIG_FB_SIMPLE=y and CONFIG_DRM_SIMPLEDRM=m are allowed by Kconfig because the 'depends on !DRM_SIMPLEDRM' dependency does not disallow FB_SIMPLE as long as SIMPLEDRM is not built-in. This can however result in a build failure when cfb_fillrect() etc are then also in loadable modules: x86_64-linux-ld: drivers/video/fbdev/simplefb.o:(.rodata+0x1f8): undefined reference to `cfb_fillrect' x86_64-linux-ld: drivers/video/fbdev/simplefb.o:(.rodata+0x200): undefined reference to `cfb_copyarea' x86_64-linux-ld: drivers/video/fbdev/simplefb.o:(.rodata+0x208): undefined reference to `cfb_imageblit' To work around this, change FB_SIMPLE to be a 'tristate' symbol, which still allows both to be =m together, but not one of them to be =y if the other one is =m. If a distro kernel picks this configuration, it can be determined by local policy which of the two modules gets loaded. The 'of_chosen' export is needed as this is the first loadable module referencing it. Alternatively, the Kconfig dependency could be changed to 'depends on DRM_SIMPLEDRM=n', which would forbid the configuration with both drivers. Fixes: 11e8f5fd223b ("drm: Add simpledrm driver") Acked-by: Rob Herring # for drivers/of/ Link: https://lore.kernel.org/all/20210721151839.2484245-1-arnd@kernel.org/ Signed-off-by: Arnd Bergmann Cc: Thomas Zimmermann Cc: Daniel Vetter # fbdev support Cc: Maxime Ripard Cc: Liam Girdwood Cc: Mark Brown Cc: Daniel Vetter Cc: Borislav Petkov Cc: Javier Martinez Canillas Cc: Randy Dunlap Cc: Geert Uytterhoeven Cc: Peter Collingbourne Cc: Andy Shevchenko Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Cc: # v5.14+ Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20210928145243.1098064-1-arnd@kernel.org Signed-off-by: Maarten Lankhorst --- drivers/of/base.c | 1 + drivers/video/fbdev/Kconfig | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index f720c0d246f2..0ac17256258d 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -36,6 +36,7 @@ LIST_HEAD(aliases_lookup); struct device_node *of_root; EXPORT_SYMBOL(of_root); struct device_node *of_chosen; +EXPORT_SYMBOL(of_chosen); struct device_node *of_aliases; struct device_node *of_stdout; static const char *of_stdout_options; diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index b26b79dfcac9..6ed5e608dd04 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -2193,8 +2193,9 @@ config FB_HYPERV This framebuffer driver supports Microsoft Hyper-V Synthetic Video. config FB_SIMPLE - bool "Simple framebuffer support" - depends on (FB = y) && !DRM_SIMPLEDRM + tristate "Simple framebuffer support" + depends on FB + depends on !DRM_SIMPLEDRM select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT From 11b8e2bb986d23157e82e267fb8cc6b281dfdee9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 21 Sep 2021 22:21:02 +0100 Subject: [PATCH 094/235] video: fbdev: gbefb: Only instantiate device when built for IP32 The gbefb driver not only registers a driver but also the device for that driver. This is all well and good when run on the IP32 machines that are supported by the driver but since the driver supports building with COMPILE_TEST we might also be building on other platforms which do not have this hardware and will crash instantiating the driver. Add an IS_ENABLED() check so we compile out the device registration if we don't have the Kconfig option for the machine enabled. Fixes: 552ccf6b259d290c0c ("video: fbdev: gbefb: add COMPILE_TEST support") Signed-off-by: Mark Brown Cc: Bartlomiej Zolnierkiewicz Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210921212102.30803-1-broonie@kernel.org Signed-off-by: Maarten Lankhorst --- drivers/video/fbdev/gbefb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/gbefb.c b/drivers/video/fbdev/gbefb.c index c5b99a4861e8..6b4d5a7f3e15 100644 --- a/drivers/video/fbdev/gbefb.c +++ b/drivers/video/fbdev/gbefb.c @@ -1267,7 +1267,7 @@ static struct platform_device *gbefb_device; static int __init gbefb_init(void) { int ret = platform_driver_register(&gbefb_driver); - if (!ret) { + if (IS_ENABLED(CONFIG_SGI_IP32) && !ret) { gbefb_device = platform_device_alloc("gbefb", 0); if (gbefb_device) { ret = platform_device_add(gbefb_device); From b67929808fe46d67cc9357b0112e4549076db4c5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 4 Oct 2021 19:53:12 -0700 Subject: [PATCH 095/235] DRM: delete DRM IRQ legacy midlayer docs Remove documentation associated with the removal of the DRM IRQ legacy midlayer. Eliminates these documentation warnings: ../drivers/gpu/drm/drm_irq.c:1: warning: 'irq helpers' not found ../drivers/gpu/drm/drm_irq.c:1: warning: no structured comments found Fixes: c1736b9008cb ("drm: IRQ midlayer is now legacy") Signed-off-by: Randy Dunlap Cc: Thomas Zimmermann Cc: Sam Ravnborg Cc: dri-devel@lists.freedesktop.org Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20211005025312.20913-1-rdunlap@infradead.org Signed-off-by: Maarten Lankhorst --- Documentation/gpu/drm-internals.rst | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-internals.rst index 06af044c882f..607f78f0f189 100644 --- a/Documentation/gpu/drm-internals.rst +++ b/Documentation/gpu/drm-internals.rst @@ -111,15 +111,6 @@ Component Helper Usage .. kernel-doc:: drivers/gpu/drm/drm_drv.c :doc: component helper usage recommendations -IRQ Helper Library -~~~~~~~~~~~~~~~~~~ - -.. kernel-doc:: drivers/gpu/drm/drm_irq.c - :doc: irq helpers - -.. kernel-doc:: drivers/gpu/drm/drm_irq.c - :export: - Memory Manager Initialization ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From bcf34aa5082ee2343574bc3f4d1c126030913e54 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Wed, 2 Dec 2020 19:02:20 -0500 Subject: [PATCH 096/235] drm/nouveau: avoid a use-after-free when BO init fails nouveau_bo_init() is backed by ttm_bo_init() and ferries its return code back to the caller. On failures, ttm_bo_init() invokes the provided destructor which should de-initialize and free the memory. Thus, when nouveau_bo_init() returns an error the gem object has already been released and the memory freed by nouveau_bo_del_ttm(). Fixes: 019cbd4a4feb ("drm/nouveau: Initialize GEM object before TTM object") Cc: Thierry Reding Signed-off-by: Jeremy Cline Reviewed-by: Lyude Paul Reviewed-by: Karol Herbst Signed-off-by: Karol Herbst Link: https://patchwork.freedesktop.org/patch/msgid/20201203000220.18238-1-jcline@redhat.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nouveau_gem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 5b27845075a1..8c2ecc282723 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -247,10 +247,8 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain, } ret = nouveau_bo_init(nvbo, size, align, domain, NULL, NULL); - if (ret) { - nouveau_bo_ref(NULL, &nvbo); + if (ret) return ret; - } /* we restrict allowed domains on nv50+ to only the types * that were requested at creation time. not possibly on From 0b3d4945cc7e7ea1acd52cb06dfa83bfe265b6d5 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 11 Sep 2021 15:50:22 +0800 Subject: [PATCH 097/235] drm/nouveau/kms/nv50-: fix file release memory leak When using single_open() for opening, single_release() should be called, otherwise the 'op' allocated in single_open() will be leaked. Fixes: 12885ecbfe62 ("drm/nouveau/kms/nvd9-: Add CRC support") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Reviewed-by: Karol Herbst Signed-off-by: Karol Herbst Link: https://patchwork.freedesktop.org/patch/msgid/20210911075023.3969054-1-yangyingliang@huawei.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/dispnv50/crc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/dispnv50/crc.c b/drivers/gpu/drm/nouveau/dispnv50/crc.c index b8c31b697797..66f32d965c72 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/crc.c +++ b/drivers/gpu/drm/nouveau/dispnv50/crc.c @@ -704,6 +704,7 @@ static const struct file_operations nv50_crc_flip_threshold_fops = { .open = nv50_crc_debugfs_flip_threshold_open, .read = seq_read, .write = nv50_crc_debugfs_flip_threshold_set, + .release = single_release, }; int nv50_head_crc_late_register(struct nv50_head *head) From f5a8703a9c418c6fc54eb772712dfe7641e3991c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 11 Sep 2021 15:50:23 +0800 Subject: [PATCH 098/235] drm/nouveau/debugfs: fix file release memory leak When using single_open() for opening, single_release() should be called, otherwise the 'op' allocated in single_open() will be leaked. Fixes: 6e9fc177399f ("drm/nouveau/debugfs: add copy of sysfs pstate interface ported to debugfs") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Reviewed-by: Karol Herbst Signed-off-by: Karol Herbst Link: https://patchwork.freedesktop.org/patch/msgid/20210911075023.3969054-2-yangyingliang@huawei.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nouveau_debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index c2bc05eb2e54..1cbe01048b93 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -207,6 +207,7 @@ static const struct file_operations nouveau_pstate_fops = { .open = nouveau_debugfs_pstate_open, .read = seq_read, .write = nouveau_debugfs_pstate_set, + .release = single_release, }; static struct drm_info_list nouveau_debugfs_list[] = { From dd6dd6e3c791db7fdbc5433ec7e450717aa3a0ce Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Wed, 6 Oct 2021 15:04:15 +0200 Subject: [PATCH 099/235] ALSA: hda/realtek: Add quirk for TongFang PHxTxX1 This applies a SND_PCI_QUIRK(...) to the TongFang PHxTxX1 barebone. This fixes the issue of the internal Microphone not working after booting another OS. When booting a certain another OS this barebone keeps some coeff settings even after a cold shutdown. These coeffs prevent the microphone detection from working in Linux, making the Laptop think that there is always an external microphone plugged-in and therefore preventing the use of the internal one. The relevant indexes and values where gathered by naively diff-ing and reading a working and a non-working coeff dump. Signed-off-by: Werner Sembach Cc: Link: https://lore.kernel.org/r/20211006130415.538243-1-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0689f43fc7af..73e7a92c3728 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6453,6 +6453,24 @@ static void alc287_fixup_legion_15imhg05_speakers(struct hda_codec *codec, /* for alc285_fixup_ideapad_s740_coef() */ #include "ideapad_s740_helper.c" +static void alc256_fixup_tongfang_reset_persistent_settings(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* + * A certain other OS sets these coeffs to different values. On at least one TongFang + * barebone these settings might survive even a cold reboot. So to restore a clean slate the + * values are explicitly reset to default here. Without this, the external microphone is + * always in a plugged-in state, while the internal microphone is always in an unplugged + * state, breaking the ability to use the internal microphone. + */ + alc_write_coef_idx(codec, 0x24, 0x0000); + alc_write_coef_idx(codec, 0x26, 0x0000); + alc_write_coef_idx(codec, 0x29, 0x3000); + alc_write_coef_idx(codec, 0x37, 0xfe05); + alc_write_coef_idx(codec, 0x45, 0x5089); +} + enum { ALC269_FIXUP_GPIO2, ALC269_FIXUP_SONY_VAIO, @@ -6667,7 +6685,8 @@ enum { ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS, ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE, ALC287_FIXUP_YOGA7_14ITL_SPEAKERS, - ALC287_FIXUP_13S_GEN2_SPEAKERS + ALC287_FIXUP_13S_GEN2_SPEAKERS, + ALC256_FIXUP_TONGFANG_RESET_PERSISTENT_SETTINGS, }; static const struct hda_fixup alc269_fixups[] = { @@ -8365,6 +8384,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE, }, + [ALC256_FIXUP_TONGFANG_RESET_PERSISTENT_SETTINGS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc256_fixup_tongfang_reset_persistent_settings, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8796,6 +8819,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), + SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_TONGFANG_RESET_PERSISTENT_SETTINGS), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), From af467fad78f03a42de8b72190f6a595366b870db Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 24 Sep 2021 11:28:50 +0300 Subject: [PATCH 100/235] mmc: sdhci-of-at91: wait for calibration done before proceed Datasheet specifies that at the end of calibration the SDMMC_CALCR_EN bit will be cleared. No commands should be send before calibration is done. Fixes: dbdea70f71d67 ("mmc: sdhci-of-at91: fix CALCR register being rewritten") Fixes: 727d836a375ad ("mmc: sdhci-of-at91: add DT property to enable calibration on full reset") Signed-off-by: Claudiu Beznea Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210924082851.2132068-2-claudiu.beznea@microchip.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-at91.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 5564d7b23e7c..134ba01d3063 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -114,6 +115,7 @@ static void sdhci_at91_reset(struct sdhci_host *host, u8 mask) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_at91_priv *priv = sdhci_pltfm_priv(pltfm_host); + unsigned int tmp; sdhci_reset(host, mask); @@ -126,6 +128,10 @@ static void sdhci_at91_reset(struct sdhci_host *host, u8 mask) sdhci_writel(host, calcr | SDMMC_CALCR_ALWYSON | SDMMC_CALCR_EN, SDMMC_CALCR); + + if (read_poll_timeout(sdhci_readl, tmp, !(tmp & SDMMC_CALCR_EN), + 10, 20000, false, host, SDMMC_CALCR)) + dev_err(mmc_dev(host->mmc), "Failed to calibrate\n"); } } From 30d4b990ec644e8bd49ef0a2f074fabc0d189e53 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 24 Sep 2021 11:28:51 +0300 Subject: [PATCH 101/235] mmc: sdhci-of-at91: replace while loop with read_poll_timeout Replace while loop with read_poll_timeout(). Signed-off-by: Claudiu Beznea Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210924082851.2132068-3-claudiu.beznea@microchip.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-at91.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 134ba01d3063..d1a1c548c515 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -62,7 +62,6 @@ static void sdhci_at91_set_force_card_detect(struct sdhci_host *host) static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock) { u16 clk; - unsigned long timeout; host->mmc->actual_clock = 0; @@ -87,16 +86,11 @@ static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock) sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL); /* Wait max 20 ms */ - timeout = 20; - while (!((clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL)) - & SDHCI_CLOCK_INT_STABLE)) { - if (timeout == 0) { - pr_err("%s: Internal clock never stabilised.\n", - mmc_hostname(host->mmc)); - return; - } - timeout--; - mdelay(1); + if (read_poll_timeout(sdhci_readw, clk, (clk & SDHCI_CLOCK_INT_STABLE), + 1000, 20000, false, host, SDHCI_CLOCK_CONTROL)) { + pr_err("%s: Internal clock never stabilised.\n", + mmc_hostname(host->mmc)); + return; } clk |= SDHCI_CLOCK_CARD_EN; From 8a38a4d51c5055d0201542e5ea3c0cb287f6e223 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 28 Sep 2021 09:36:52 +0200 Subject: [PATCH 102/235] mmc: meson-gx: do not use memcpy_to/fromio for dram-access-quirk The memory at the end of the controller only accepts 32bit read/write accesses, but the arm64 memcpy_to/fromio implementation only uses 64bit (which will be split into two 32bit access) and 8bit leading to incomplete copies to/from this memory when the buffer is not multiple of 8bytes. Add a local copy using writel/readl accesses to make sure we use the right memory access width. The switch to memcpy_to/fromio was done because of 285133040e6c ("arm64: Import latest memcpy()/memmove() implementation"), but using memcpy worked before since it mainly used 32bit memory acceses. Fixes: 103a5348c22c ("mmc: meson-gx: use memcpy_to/fromio for dram-access-quirk") Reported-by: Christian Hewitt Suggested-by: Martin Blumenstingl Signed-off-by: Neil Armstrong Tested-by: Martin Blumenstingl Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210928073652.434690-1-narmstrong@baylibre.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 73 ++++++++++++++++++++++++++------- 1 file changed, 59 insertions(+), 14 deletions(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 3f28eb4d17fe..8f36536cb1b6 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -746,7 +746,7 @@ static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg) writel(start, host->regs + SD_EMMC_START); } -/* local sg copy to buffer version with _to/fromio usage for dram_access_quirk */ +/* local sg copy for dram_access_quirk */ static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data, size_t buflen, bool to_buffer) { @@ -764,21 +764,27 @@ static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data sg_miter_start(&miter, sgl, nents, sg_flags); while ((offset < buflen) && sg_miter_next(&miter)) { - unsigned int len; + unsigned int buf_offset = 0; + unsigned int len, left; + u32 *buf = miter.addr; len = min(miter.length, buflen - offset); + left = len; - /* When dram_access_quirk, the bounce buffer is a iomem mapping */ - if (host->dram_access_quirk) { - if (to_buffer) - memcpy_toio(host->bounce_iomem_buf + offset, miter.addr, len); - else - memcpy_fromio(miter.addr, host->bounce_iomem_buf + offset, len); + if (to_buffer) { + do { + writel(*buf++, host->bounce_iomem_buf + offset + buf_offset); + + buf_offset += 4; + left -= 4; + } while (left); } else { - if (to_buffer) - memcpy(host->bounce_buf + offset, miter.addr, len); - else - memcpy(miter.addr, host->bounce_buf + offset, len); + do { + *buf++ = readl(host->bounce_iomem_buf + offset + buf_offset); + + buf_offset += 4; + left -= 4; + } while (left); } offset += len; @@ -830,7 +836,11 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) if (data->flags & MMC_DATA_WRITE) { cmd_cfg |= CMD_CFG_DATA_WR; WARN_ON(xfer_bytes > host->bounce_buf_size); - meson_mmc_copy_buffer(host, data, xfer_bytes, true); + if (host->dram_access_quirk) + meson_mmc_copy_buffer(host, data, xfer_bytes, true); + else + sg_copy_to_buffer(data->sg, data->sg_len, + host->bounce_buf, xfer_bytes); dma_wmb(); } @@ -849,12 +859,43 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) writel(cmd->arg, host->regs + SD_EMMC_CMD_ARG); } +static int meson_mmc_validate_dram_access(struct mmc_host *mmc, struct mmc_data *data) +{ + struct scatterlist *sg; + int i; + + /* Reject request if any element offset or size is not 32bit aligned */ + for_each_sg(data->sg, sg, data->sg_len, i) { + if (!IS_ALIGNED(sg->offset, sizeof(u32)) || + !IS_ALIGNED(sg->length, sizeof(u32))) { + dev_err(mmc_dev(mmc), "unaligned sg offset %u len %u\n", + data->sg->offset, data->sg->length); + return -EINVAL; + } + } + + return 0; +} + static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq) { struct meson_host *host = mmc_priv(mmc); bool needs_pre_post_req = mrq->data && !(mrq->data->host_cookie & SD_EMMC_PRE_REQ_DONE); + /* + * The memory at the end of the controller used as bounce buffer for + * the dram_access_quirk only accepts 32bit read/write access, + * check the aligment and length of the data before starting the request. + */ + if (host->dram_access_quirk && mrq->data) { + mrq->cmd->error = meson_mmc_validate_dram_access(mmc, mrq->data); + if (mrq->cmd->error) { + mmc_request_done(mmc, mrq); + return; + } + } + if (needs_pre_post_req) { meson_mmc_get_transfer_mode(mmc, mrq); if (!meson_mmc_desc_chain_mode(mrq->data)) @@ -999,7 +1040,11 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) if (meson_mmc_bounce_buf_read(data)) { xfer_bytes = data->blksz * data->blocks; WARN_ON(xfer_bytes > host->bounce_buf_size); - meson_mmc_copy_buffer(host, data, xfer_bytes, false); + if (host->dram_access_quirk) + meson_mmc_copy_buffer(host, data, xfer_bytes, false); + else + sg_copy_from_buffer(data->sg, data->sg_len, + host->bounce_buf, xfer_bytes); } next_cmd = meson_mmc_get_next_command(cmd); From 64e87d4bd3201bf8a4685083ee4daf5c0d001452 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 17 Sep 2021 16:59:58 +0000 Subject: [PATCH 103/235] x86/resctrl: Free the ctrlval arrays when domain_setup_mon_state() fails domain_add_cpu() is called whenever a CPU is brought online. The earlier call to domain_setup_ctrlval() allocates the control value arrays. If domain_setup_mon_state() fails, the control value arrays are not freed. Add the missing kfree() calls. Fixes: 1bd2a63b4f0de ("x86/intel_rdt/mba_sc: Add initialization support") Fixes: edf6fa1c4a951 ("x86/intel_rdt/cqm: Add RMID (Resource monitoring ID) management") Signed-off-by: James Morse Signed-off-by: Borislav Petkov Acked-by: Reinette Chatre Cc: Link: https://lkml.kernel.org/r/20210917165958.28313-1-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 4b8813bafffd..b5de5a6c115c 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -532,6 +532,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) } if (r->mon_capable && domain_setup_mon_state(r, d)) { + kfree(hw_dom->ctrl_val); + kfree(hw_dom->mbps_val); kfree(d); return; } From d4ebfca26dfab2803c31d68a30225be31b2f9ecf Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 17 Sep 2021 16:59:24 +0000 Subject: [PATCH 104/235] x86/resctrl: Fix kfree() of the wrong type in domain_add_cpu() Commit in Fixes separated the architecture specific and filesystem parts of the resctrl domain structures. This left the error paths in domain_add_cpu() kfree()ing the memory with the wrong type. This will cause a problem if someone adds a new member to struct rdt_hw_domain meaning d_resctrl is no longer the first member. Fixes: 792e0f6f789b ("x86/resctrl: Split struct rdt_domain") Signed-off-by: James Morse Signed-off-by: Borislav Petkov Acked-by: Reinette Chatre Link: https://lkml.kernel.org/r/20210917165924.28254-1-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index b5de5a6c115c..bb1c3f5f60c8 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -527,14 +527,14 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) rdt_domain_reconfigure_cdp(r); if (r->alloc_capable && domain_setup_ctrlval(r, d)) { - kfree(d); + kfree(hw_dom); return; } if (r->mon_capable && domain_setup_mon_state(r, d)) { kfree(hw_dom->ctrl_val); kfree(hw_dom->mbps_val); - kfree(d); + kfree(hw_dom); return; } From 2c861f2b859385e9eaa6e464a8a7435b5a6bf564 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 3 Aug 2021 13:35:23 +0200 Subject: [PATCH 105/235] x86/entry: Correct reference to intended CONFIG_64_BIT Commit in Fixes adds a condition with IS_ENABLED(CONFIG_64_BIT), but the intended config item is called CONFIG_64BIT, as defined in arch/x86/Kconfig. Fortunately, scripts/checkkconfigsymbols.py warns: 64_BIT Referencing files: arch/x86/include/asm/entry-common.h Correct the reference to the intended config symbol. Fixes: 662a0221893a ("x86/entry: Fix AC assertion") Suggested-by: Randy Dunlap Signed-off-by: Lukas Bulwahn Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20210803113531.30720-2-lukas.bulwahn@gmail.com --- arch/x86/include/asm/entry-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 14ebd2196569..43184640b579 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -25,7 +25,7 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs) * For !SMAP hardware we patch out CLAC on entry. */ if (boot_cpu_has(X86_FEATURE_SMAP) || - (IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV))) + (IS_ENABLED(CONFIG_64BIT) && boot_cpu_has(X86_FEATURE_XENPV))) mask |= X86_EFLAGS_AC; WARN_ON_ONCE(flags & mask); From 3958b9c34c2729597e182cc606cc43942fd19f7c Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 4 Oct 2021 00:34:23 +0200 Subject: [PATCH 106/235] x86/entry: Clear X86_FEATURE_SMAP when CONFIG_X86_SMAP=n Commit 3c73b81a9164 ("x86/entry, selftests: Further improve user entry sanity checks") added a warning if AC is set when in the kernel. Commit 662a0221893a3d ("x86/entry: Fix AC assertion") changed the warning to only fire if the CPU supports SMAP. However, the warning can still trigger on a machine that supports SMAP but where it's disabled in the kernel config and when running the syscall_nt selftest, for example: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 49 at irqentry_enter_from_user_mode CPU: 0 PID: 49 Comm: init Tainted: G T 5.15.0-rc4+ #98 e6202628ee053b4f310759978284bd8bb0ce6905 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:irqentry_enter_from_user_mode ... Call Trace: ? irqentry_enter ? exc_general_protection ? asm_exc_general_protection ? asm_exc_general_protectio IS_ENABLED(CONFIG_X86_SMAP) could be added to the warning condition, but even this would not be enough in case SMAP is disabled at boot time with the "nosmap" parameter. To be consistent with "nosmap" behaviour, clear X86_FEATURE_SMAP when !CONFIG_X86_SMAP. Found using entry-fuzz + satrandconfig. [ bp: Massage commit message. ] Fixes: 3c73b81a9164 ("x86/entry, selftests: Further improve user entry sanity checks") Fixes: 662a0221893a ("x86/entry: Fix AC assertion") Signed-off-by: Vegard Nossum Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20211003223423.8666-1-vegard.nossum@oracle.com --- arch/x86/kernel/cpu/common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0f8885949e8c..b3410f1ac217 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -326,6 +326,7 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_SMAP cr4_set_bits(X86_CR4_SMAP); #else + clear_cpu_cap(c, X86_FEATURE_SMAP); cr4_clear_bits(X86_CR4_SMAP); #endif } From 4758fd801f919b8b9acad78d2e49a195ec2be46b Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 3 Aug 2021 13:35:24 +0200 Subject: [PATCH 107/235] x86/platform/olpc: Correct ifdef symbol to intended CONFIG_OLPC_XO15_SCI The refactoring in the commit in Fixes introduced an ifdef CONFIG_OLPC_XO1_5_SCI, however the config symbol is actually called "CONFIG_OLPC_XO15_SCI". Fortunately, ./scripts/checkkconfigsymbols.py warns: OLPC_XO1_5_SCI Referencing files: arch/x86/platform/olpc/olpc.c Correct this ifdef condition to the intended config symbol. Fixes: ec9964b48033 ("Platform: OLPC: Move EC-specific functionality out from x86") Suggested-by: Randy Dunlap Signed-off-by: Lukas Bulwahn Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20210803113531.30720-3-lukas.bulwahn@gmail.com --- arch/x86/platform/olpc/olpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c index ee2beda590d0..1d4a00e767ec 100644 --- a/arch/x86/platform/olpc/olpc.c +++ b/arch/x86/platform/olpc/olpc.c @@ -274,7 +274,7 @@ static struct olpc_ec_driver ec_xo1_driver = { static struct olpc_ec_driver ec_xo1_5_driver = { .ec_cmd = olpc_xo1_ec_cmd, -#ifdef CONFIG_OLPC_XO1_5_SCI +#ifdef CONFIG_OLPC_XO15_SCI /* * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is * compiled in From 225bac2dc5d192e55f2c50123ee539b1edf8a411 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 3 Aug 2021 13:35:25 +0200 Subject: [PATCH 108/235] x86/Kconfig: Correct reference to MWINCHIP3D Commit in Fixes intended to exclude the Winchip series and referred to CONFIG_WINCHIP3D, but the config symbol is called CONFIG_MWINCHIP3D. Hence, scripts/checkkconfigsymbols.py warns: WINCHIP3D Referencing files: arch/x86/Kconfig Correct the reference to the intended config symbol. Fixes: 69b8d3fcabdc ("x86/Kconfig: Exclude i586-class CPUs lacking PAE support from the HIGHMEM64G Kconfig group") Suggested-by: Randy Dunlap Signed-off-by: Lukas Bulwahn Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20210803113531.30720-4-lukas.bulwahn@gmail.com --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ab83c22d274e..8055da49f1c0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1405,7 +1405,7 @@ config HIGHMEM4G config HIGHMEM64G bool "64GB" - depends on !M486SX && !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6 + depends on !M486SX && !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !MWINCHIP3D && !MK6 select X86_PAE help Select this if you have a 32-bit processor and more than 4 From 2387033ac0db3235f3fa9d9976aeeeb50349550e Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 1 Oct 2021 22:36:09 +0800 Subject: [PATCH 109/235] drm/amd/display: Skip override for preferred link settings during link training [Why] Overriding link setting inside override_training_settings result in fallback link settings being ignored. This can potentially cause link training to always fail and consequently result in an infinite loop of link training to occur in dp_verify_link_cap during detection. [How] Since preferred link settings are already considered inside decide_link_settings, skip the check in override_training_settings to avoid infinite link training loops. Reviewed-by: Wenjing Liu Acked-by: Solomon Chiu Signed-off-by: George Shen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 05eaec03d9f7..6d655e158267 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1306,12 +1306,6 @@ static void override_training_settings( { uint32_t lane; - /* Override link settings */ - if (link->preferred_link_setting.link_rate != LINK_RATE_UNKNOWN) - lt_settings->link_settings.link_rate = link->preferred_link_setting.link_rate; - if (link->preferred_link_setting.lane_count != LANE_COUNT_UNKNOWN) - lt_settings->link_settings.lane_count = link->preferred_link_setting.lane_count; - /* Override link spread */ if (!link->dp_ss_off && overrides->downspread != NULL) lt_settings->link_settings.link_spread = *overrides->downspread ? From a7e397b7c45377e20542146be10231b8afa948d1 Mon Sep 17 00:00:00 2001 From: Nikola Cornij Date: Fri, 1 Oct 2021 22:36:14 +0800 Subject: [PATCH 110/235] drm/amd/display: Limit display scaling to up to 4k for DCN 3.1 [why] The existing limit was mistakenly bigger than 4k for DCN 3.1 Reviewed-by: Zhan Liu Acked-by: Solomon Chiu Signed-off-by: Nikola Cornij Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index cb50e6eda47e..0006bbac466c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -928,7 +928,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .vsr_support = true, .performance_trace = false, - .max_downscale_src_width = 7680,/*upto 8K*/ + .max_downscale_src_width = 3840,/*upto 4K*/ .disable_pplib_wm_range = false, .scl_reset_length10 = true, .sanity_checks = false, From 5a1fef027846e7635b9d320b2cc0b416fd11a3be Mon Sep 17 00:00:00 2001 From: Hansen Date: Fri, 1 Oct 2021 22:36:15 +0800 Subject: [PATCH 111/235] drm/amd/display: Fix detection of 4 lane for DPALT [Why] DPALT detection for B0 PHY has its own set of RDPCSPIPE registers [How] Use RDPCSPIPE registers to detect if DPALT lane is 4 lane Reviewed-by: Charlene Liu Acked-by: Solomon Chiu Signed-off-by: Hansen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../display/dc/dcn31/dcn31_dio_link_encoder.c | 33 ++++++++++++++++++- .../display/dc/dcn31/dcn31_dio_link_encoder.h | 3 ++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c index d1870ac33143..b0892443fbd5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c @@ -63,6 +63,10 @@ #define AUX_REG_WRITE(reg_name, val) \ dm_write_reg(CTX, AUX_REG(reg_name), val) +#ifndef MIN +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#endif + void dcn31_link_encoder_set_dio_phy_mux( struct link_encoder *enc, enum encoder_type_select sel, @@ -217,7 +221,7 @@ static const struct link_encoder_funcs dcn31_link_enc_funcs = { .get_dig_frontend = dcn10_get_dig_frontend, .get_dig_mode = dcn10_get_dig_mode, .is_in_alt_mode = dcn31_link_encoder_is_in_alt_mode, - .get_max_link_cap = dcn20_link_encoder_get_max_link_cap, + .get_max_link_cap = dcn31_link_encoder_get_max_link_cap, .set_dio_phy_mux = dcn31_link_encoder_set_dio_phy_mux, }; @@ -435,3 +439,30 @@ bool dcn31_link_encoder_is_in_alt_mode(struct link_encoder *enc) return is_usb_c_alt_mode; } + +void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, + struct dc_link_settings *link_settings) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + uint32_t is_in_usb_c_dp4_mode = 0; + + dcn10_link_encoder_get_max_link_cap(enc, link_settings); + + /* in usb c dp2 mode, max lane count is 2 */ + if (enc->funcs->is_in_alt_mode && enc->funcs->is_in_alt_mode(enc)) { + if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) { + // [Note] no need to check hw_internal_rev once phy mux selection is ready + REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode); + } else { + if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) + || (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) + || (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) { + REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode); + } else { + REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode); + } + } + if (!is_in_usb_c_dp4_mode) + link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count); + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h index bec50e4402ff..3454f1e7c1f1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h @@ -252,4 +252,7 @@ void dcn31_link_encoder_disable_output( bool dcn31_link_encoder_is_in_alt_mode( struct link_encoder *enc); +void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, + struct dc_link_settings *link_settings); + #endif /* __DC_LINK_ENCODER__DCN31_H__ */ From 554afc3b9797511e3245864e32aebeb6abbab1e3 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Wed, 29 Sep 2021 14:27:09 -0700 Subject: [PATCH 112/235] gcc-plugins/structleak: add makefile var for disabling structleak KUnit and structleak don't play nice, so add a makefile variable for enabling structleak when it complains. Co-developed-by: Kees Cook Signed-off-by: Kees Cook Signed-off-by: Brendan Higgins Reviewed-by: David Gow Signed-off-by: Shuah Khan --- scripts/Makefile.gcc-plugins | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index 952e46876329..4aad28480035 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -19,6 +19,10 @@ gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF) \ += -fplugin-arg-structleak_plugin-byref gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL) \ += -fplugin-arg-structleak_plugin-byref-all +ifdef CONFIG_GCC_PLUGIN_STRUCTLEAK + DISABLE_STRUCTLEAK_PLUGIN += -fplugin-arg-structleak_plugin-disable +endif +export DISABLE_STRUCTLEAK_PLUGIN gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK) \ += -DSTRUCTLEAK_PLUGIN From 2326f3cdba1d105b68cc1295e78f17ae8faa5a76 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Wed, 29 Sep 2021 14:27:10 -0700 Subject: [PATCH 113/235] iio/test-format: build kunit tests without structleak plugin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The structleak plugin causes the stack frame size to grow immensely when used with KUnit: ../drivers/iio/test/iio-test-format.c: In function ‘iio_test_iio_format_value_fixedpoint’: ../drivers/iio/test/iio-test-format.c:98:1: warning: the frame size of 2336 bytes is larger than 2048 bytes [-Wframe-larger-than=] Turn it off in this file. Signed-off-by: Brendan Higgins Suggested-by: Arnd Bergmann Reviewed-by: Kees Cook Acked-by: Jonathan Cameron Signed-off-by: Shuah Khan --- drivers/iio/test/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/test/Makefile b/drivers/iio/test/Makefile index f1099b495301..467519a2027e 100644 --- a/drivers/iio/test/Makefile +++ b/drivers/iio/test/Makefile @@ -5,3 +5,4 @@ # Keep in alphabetical order obj-$(CONFIG_IIO_TEST_FORMAT) += iio-test-format.o +CFLAGS_iio-test-format.o += $(DISABLE_STRUCTLEAK_PLUGIN) From 6a1e2d93d55b000962b82b9a080006446150b022 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Wed, 29 Sep 2021 14:27:11 -0700 Subject: [PATCH 114/235] device property: build kunit tests without structleak plugin The structleak plugin causes the stack frame size to grow immensely when used with KUnit: ../drivers/base/test/property-entry-test.c:492:1: warning: the frame size of 2832 bytes is larger than 2048 bytes [-Wframe-larger-than=] ../drivers/base/test/property-entry-test.c:322:1: warning: the frame size of 2080 bytes is larger than 2048 bytes [-Wframe-larger-than=] ../drivers/base/test/property-entry-test.c:250:1: warning: the frame size of 4976 bytes is larger than 2048 bytes [-Wframe-larger-than=] ../drivers/base/test/property-entry-test.c:115:1: warning: the frame size of 3280 bytes is larger than 2048 bytes [-Wframe-larger-than=] Turn it off in this file. Signed-off-by: Brendan Higgins Suggested-by: Arnd Bergmann Reviewed-by: Kees Cook Signed-off-by: Shuah Khan --- drivers/base/test/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/test/Makefile b/drivers/base/test/Makefile index 64b2f3d744d5..7f76fee6f989 100644 --- a/drivers/base/test/Makefile +++ b/drivers/base/test/Makefile @@ -2,4 +2,4 @@ obj-$(CONFIG_TEST_ASYNC_DRIVER_PROBE) += test_async_driver_probe.o obj-$(CONFIG_DRIVER_PE_KUNIT_TEST) += property-entry-test.o -CFLAGS_REMOVE_property-entry-test.o += -fplugin-arg-structleak_plugin-byref -fplugin-arg-structleak_plugin-byref-all +CFLAGS_property-entry-test.o += $(DISABLE_STRUCTLEAK_PLUGIN) From 33d4951e021bb67ebd6bdb01f3d437c0f45b3c0c Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Wed, 29 Sep 2021 14:27:12 -0700 Subject: [PATCH 115/235] thunderbolt: build kunit tests without structleak plugin The structleak plugin causes the stack frame size to grow immensely when used with KUnit: drivers/thunderbolt/test.c:1529:1: error: the frame size of 1176 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] Turn it off in this file. Linus already split up tests in this file, so this change *should* be redundant now. Signed-off-by: Brendan Higgins Suggested-by: Arnd Bergmann Acked-by: Mika Westerberg Reviewed-by: Kees Cook Signed-off-by: Shuah Khan --- drivers/thunderbolt/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thunderbolt/Makefile b/drivers/thunderbolt/Makefile index da19d7987d00..78fd365893c1 100644 --- a/drivers/thunderbolt/Makefile +++ b/drivers/thunderbolt/Makefile @@ -7,6 +7,7 @@ thunderbolt-objs += usb4_port.o nvm.o retimer.o quirks.o thunderbolt-${CONFIG_ACPI} += acpi.o thunderbolt-$(CONFIG_DEBUG_FS) += debugfs.o thunderbolt-${CONFIG_USB4_KUNIT_TEST} += test.o +CFLAGS_test.o += $(DISABLE_STRUCTLEAK_PLUGIN) thunderbolt_dma_test-${CONFIG_USB4_DMA_TEST} += dma_test.o obj-$(CONFIG_USB4_DMA_TEST) += thunderbolt_dma_test.o From a8cf90332ae3e2b53813a146a99261b6a5e16a73 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 29 Sep 2021 14:27:13 -0700 Subject: [PATCH 116/235] bitfield: build kunit tests without structleak plugin The structleak plugin causes the stack frame size to grow immensely: lib/bitfield_kunit.c: In function 'test_bitfields_constants': lib/bitfield_kunit.c:93:1: error: the frame size of 7440 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] Turn it off in this file. Signed-off-by: Arnd Bergmann Signed-off-by: Brendan Higgins Reviewed-by: Kees Cook Signed-off-by: Shuah Khan --- lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Makefile b/lib/Makefile index 5efd1b435a37..a841be5244ac 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -351,7 +351,7 @@ obj-$(CONFIG_OBJAGG) += objagg.o obj-$(CONFIG_PLDMFW) += pldmfw/ # KUnit tests -CFLAGS_bitfield_kunit.o := $(call cc-option,-Wframe-larger-than=10240) +CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o From 361b57df62de249dc0b2acbf48823662a5001bcd Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 5 Oct 2021 13:46:32 -0700 Subject: [PATCH 117/235] kunit: fix kernel-doc warnings due to mismatched arg names Commit 7122debb4367 ("kunit: introduce kunit_kmalloc_array/kunit_kcalloc() helpers") added new functions but called last arg `flags`, unlike the existing code that used `gfp`. This only is an issue in test.h, test.c still used `gfp`. But the documentation was copy-pasted with the old names, leading to kernel-doc warnings. Do s/flags/gfp to make the names consistent and fix the warnings. Fixes: 7122debb4367 ("kunit: introduce kunit_kmalloc_array/kunit_kcalloc() helpers") Reported-by: Randy Dunlap Signed-off-by: Daniel Latypov Reviewed-by: Randy Dunlap Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/test.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/kunit/test.h b/include/kunit/test.h index 24b40e5c160b..018e776a34b9 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -613,7 +613,7 @@ void kunit_remove_resource(struct kunit *test, struct kunit_resource *res); * and is automatically cleaned up after the test case concludes. See &struct * kunit_resource for more information. */ -void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t flags); +void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t gfp); /** * kunit_kmalloc() - Like kmalloc() except the allocation is *test managed*. @@ -657,9 +657,9 @@ static inline void *kunit_kzalloc(struct kunit *test, size_t size, gfp_t gfp) * * See kcalloc() and kunit_kmalloc_array() for more information. */ -static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp_t flags) +static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp_t gfp) { - return kunit_kmalloc_array(test, n, size, flags | __GFP_ZERO); + return kunit_kmalloc_array(test, n, size, gfp | __GFP_ZERO); } void kunit_cleanup(struct kunit *test); From c0f1886de7e173865f1a0fa7680a1c07954a987f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Oct 2021 16:19:40 +0200 Subject: [PATCH 118/235] ALSA: hda: intel: Allow repeatedly probing on codec configuration errors It seems that a few recent AMD systems show the codec configuration errors at the early boot, while loading the driver at a later stage works magically. Although the root cause of the error isn't clear, it's certainly not bad to allow retrying the codec probe in such a case if that helps. This patch adds the capability for retrying the probe upon codec probe errors on the certain AMD platforms. The probe_work is changed to a delayed work, and at the secondary call, it'll jump to the codec probing. Note that, not only adding the re-probing, this includes the behavior changes in the codec configuration function. Namely, snd_hda_codec_configure() won't unregister the codec at errors any longer. Instead, its caller, azx_codec_configure() unregisters the codecs with the probe failures *if* any codec has been successfully configured. If all codec probe failed, it doesn't unregister but let it re-probed -- which is the most case we're seeing and this patch tries to improve. Even if the driver doesn't re-probe or give up, it will go to the "free-all" error path, hence the leftover codecs shall be disabled / deleted in anyway. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1190801 Link: https://lore.kernel.org/r/20211006141940.2897-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/hda_codec.h | 1 + sound/pci/hda/hda_bind.c | 20 +++++++++++--------- sound/pci/hda/hda_codec.c | 1 + sound/pci/hda/hda_controller.c | 24 ++++++++++++++++-------- sound/pci/hda/hda_controller.h | 2 +- sound/pci/hda/hda_intel.c | 29 +++++++++++++++++++++++------ sound/pci/hda/hda_intel.h | 4 +++- 7 files changed, 56 insertions(+), 25 deletions(-) diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index 01570dbda503..0e45963bb767 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -224,6 +224,7 @@ struct hda_codec { #endif /* misc flags */ + unsigned int configured:1; /* codec was configured */ unsigned int in_freeing:1; /* being released */ unsigned int registered:1; /* codec was registered */ unsigned int display_power_control:1; /* needs display power */ diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index 2523b23389e9..1c8bffc3eec6 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -298,29 +298,31 @@ int snd_hda_codec_configure(struct hda_codec *codec) { int err; + if (codec->configured) + return 0; + if (is_generic_config(codec)) codec->probe_id = HDA_CODEC_ID_GENERIC; else codec->probe_id = 0; - err = snd_hdac_device_register(&codec->core); - if (err < 0) - return err; + if (!device_is_registered(&codec->core.dev)) { + err = snd_hdac_device_register(&codec->core); + if (err < 0) + return err; + } if (!codec->preset) codec_bind_module(codec); if (!codec->preset) { err = codec_bind_generic(codec); if (err < 0) { - codec_err(codec, "Unable to bind the codec\n"); - goto error; + codec_dbg(codec, "Unable to bind the codec\n"); + return err; } } + codec->configured = 1; return 0; - - error: - snd_hdac_device_unregister(&codec->core); - return err; } EXPORT_SYMBOL_GPL(snd_hda_codec_configure); diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index a9ebefd60cf6..0c4a337c9fc0 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -791,6 +791,7 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec) snd_array_free(&codec->nids); remove_conn_list(codec); snd_hdac_regmap_exit(&codec->core); + codec->configured = 0; } EXPORT_SYMBOL_GPL(snd_hda_codec_cleanup_for_unbind); diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 7cd452831fd3..930ae4002a81 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -25,6 +25,7 @@ #include #include #include "hda_controller.h" +#include "hda_local.h" #define CREATE_TRACE_POINTS #include "hda_controller_trace.h" @@ -1248,17 +1249,24 @@ EXPORT_SYMBOL_GPL(azx_probe_codecs); int azx_codec_configure(struct azx *chip) { struct hda_codec *codec, *next; + int success = 0; - /* use _safe version here since snd_hda_codec_configure() deregisters - * the device upon error and deletes itself from the bus list. - */ - list_for_each_codec_safe(codec, next, &chip->bus) { - snd_hda_codec_configure(codec); + list_for_each_codec(codec, &chip->bus) { + if (!snd_hda_codec_configure(codec)) + success++; } - if (!azx_bus(chip)->num_codecs) - return -ENODEV; - return 0; + if (success) { + /* unregister failed codecs if any codec has been probed */ + list_for_each_codec_safe(codec, next, &chip->bus) { + if (!codec->configured) { + codec_err(codec, "Unable to configure, disabling\n"); + snd_hdac_device_unregister(&codec->core); + } + } + } + + return success ? 0 : -ENODEV; } EXPORT_SYMBOL_GPL(azx_codec_configure); diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index 3062f87380b1..f5bf295eb830 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -41,7 +41,7 @@ /* 24 unused */ #define AZX_DCAPS_COUNT_LPIB_DELAY (1 << 25) /* Take LPIB as delay */ #define AZX_DCAPS_PM_RUNTIME (1 << 26) /* runtime PM support */ -/* 27 unused */ +#define AZX_DCAPS_RETRY_PROBE (1 << 27) /* retry probe if no codec is configured */ #define AZX_DCAPS_CORBRP_SELF_CLEAR (1 << 28) /* CORBRP clears itself after reset */ #define AZX_DCAPS_NO_MSI64 (1 << 29) /* Stick to 32-bit MSIs */ #define AZX_DCAPS_SEPARATE_STREAM_TAG (1 << 30) /* capture and playback use separate stream tag */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 47777439961c..4d22e7adeee8 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -307,7 +307,8 @@ enum { /* quirks for AMD SB */ #define AZX_DCAPS_PRESET_AMD_SB \ (AZX_DCAPS_NO_TCSEL | AZX_DCAPS_AMD_WORKAROUND |\ - AZX_DCAPS_SNOOP_TYPE(ATI) | AZX_DCAPS_PM_RUNTIME) + AZX_DCAPS_SNOOP_TYPE(ATI) | AZX_DCAPS_PM_RUNTIME |\ + AZX_DCAPS_RETRY_PROBE) /* quirks for Nvidia */ #define AZX_DCAPS_PRESET_NVIDIA \ @@ -1723,7 +1724,7 @@ static void azx_check_snoop_available(struct azx *chip) static void azx_probe_work(struct work_struct *work) { - struct hda_intel *hda = container_of(work, struct hda_intel, probe_work); + struct hda_intel *hda = container_of(work, struct hda_intel, probe_work.work); azx_probe_continue(&hda->chip); } @@ -1828,7 +1829,7 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, } /* continue probing in work context as may trigger request module */ - INIT_WORK(&hda->probe_work, azx_probe_work); + INIT_DELAYED_WORK(&hda->probe_work, azx_probe_work); *rchip = chip; @@ -2142,7 +2143,7 @@ static int azx_probe(struct pci_dev *pci, #endif if (schedule_probe) - schedule_work(&hda->probe_work); + schedule_delayed_work(&hda->probe_work, 0); dev++; if (chip->disabled) @@ -2228,6 +2229,11 @@ static int azx_probe_continue(struct azx *chip) int dev = chip->dev_index; int err; + if (chip->disabled || hda->init_failed) + return -EIO; + if (hda->probe_retry) + goto probe_retry; + to_hda_bus(bus)->bus_probing = 1; hda->probe_continued = 1; @@ -2289,10 +2295,20 @@ static int azx_probe_continue(struct azx *chip) #endif } #endif + + probe_retry: if (bus->codec_mask && !(probe_only[dev] & 1)) { err = azx_codec_configure(chip); - if (err < 0) + if (err) { + if ((chip->driver_caps & AZX_DCAPS_RETRY_PROBE) && + ++hda->probe_retry < 60) { + schedule_delayed_work(&hda->probe_work, + msecs_to_jiffies(1000)); + return 0; /* keep things up */ + } + dev_err(chip->card->dev, "Cannot probe codecs, giving up\n"); goto out_free; + } } err = snd_card_register(chip->card); @@ -2322,6 +2338,7 @@ static int azx_probe_continue(struct azx *chip) display_power(chip, false); complete_all(&hda->probe_wait); to_hda_bus(bus)->bus_probing = 0; + hda->probe_retry = 0; return 0; } @@ -2347,7 +2364,7 @@ static void azx_remove(struct pci_dev *pci) * device during cancel_work_sync() call. */ device_unlock(&pci->dev); - cancel_work_sync(&hda->probe_work); + cancel_delayed_work_sync(&hda->probe_work); device_lock(&pci->dev); snd_card_free(card); diff --git a/sound/pci/hda/hda_intel.h b/sound/pci/hda/hda_intel.h index 3fb119f09040..0f39418f9328 100644 --- a/sound/pci/hda/hda_intel.h +++ b/sound/pci/hda/hda_intel.h @@ -14,7 +14,7 @@ struct hda_intel { /* sync probing */ struct completion probe_wait; - struct work_struct probe_work; + struct delayed_work probe_work; /* card list (for power_save trigger) */ struct list_head list; @@ -30,6 +30,8 @@ struct hda_intel { unsigned int freed:1; /* resources already released */ bool need_i915_power:1; /* the hda controller needs i915 power */ + + int probe_retry; /* being probe-retry */ }; #endif From 4549c3ea3160fa8b3f37dfe2f957657bb265eda9 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 6 Oct 2021 01:55:20 +0530 Subject: [PATCH 119/235] powerpc/lib: Add helper to check if offset is within conditional branch range Add a helper to check if a given offset is within the branch range for a powerpc conditional branch instruction, and update some sites to use the new helper. Signed-off-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Song Liu Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/442b69a34ced32ca346a0d9a855f3f6cfdbbbd41.1633464148.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/include/asm/code-patching.h | 1 + arch/powerpc/lib/code-patching.c | 7 ++++++- arch/powerpc/net/bpf_jit.h | 7 +------ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index a95f63788c6b..4ba834599c4d 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -23,6 +23,7 @@ #define BRANCH_ABSOLUTE 0x2 bool is_offset_in_branch_range(long offset); +bool is_offset_in_cond_branch_range(long offset); int create_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags); int create_cond_branch(struct ppc_inst *instr, const u32 *addr, diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index f9a3019e37b4..c5ed98823835 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -228,6 +228,11 @@ bool is_offset_in_branch_range(long offset) return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3)); } +bool is_offset_in_cond_branch_range(long offset) +{ + return offset >= -0x8000 && offset <= 0x7fff && !(offset & 0x3); +} + /* * Helper to check if a given instruction is a conditional branch * Derived from the conditional checks in analyse_instr() @@ -280,7 +285,7 @@ int create_cond_branch(struct ppc_inst *instr, const u32 *addr, offset = offset - (unsigned long)addr; /* Check we can represent the target in the instruction format */ - if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3) + if (!is_offset_in_cond_branch_range(offset)) return 1; /* Mask out the flags and target, so they don't step on each other. */ diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 99fad093f43e..935ea95b6635 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -78,11 +78,6 @@ #define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0) #endif -static inline bool is_nearbranch(int offset) -{ - return (offset < 32768) && (offset >= -32768); -} - /* * The fly in the ointment of code size changing from pass to pass is * avoided by padding the short branch case with a NOP. If code size differs @@ -91,7 +86,7 @@ static inline bool is_nearbranch(int offset) * state. */ #define PPC_BCC(cond, dest) do { \ - if (is_nearbranch((dest) - (ctx->idx * 4))) { \ + if (is_offset_in_cond_branch_range((long)(dest) - (ctx->idx * 4))) { \ PPC_BCC_SHORT(cond, dest); \ EMIT(PPC_RAW_NOP()); \ } else { \ From 3832ba4e283d7052b783dab8311df7e3590fed93 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 6 Oct 2021 01:55:21 +0530 Subject: [PATCH 120/235] powerpc/bpf: Validate branch ranges Add checks to ensure that we never emit branch instructions with truncated branch offsets. Suggested-by: Michael Ellerman Signed-off-by: Naveen N. Rao Tested-by: Johan Almbladh Reviewed-by: Christophe Leroy Acked-by: Song Liu Acked-by: Johan Almbladh Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/71d33a6b7603ec1013c9734dd8bdd4ff5e929142.1633464148.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 26 ++++++++++++++++++++------ arch/powerpc/net/bpf_jit_comp.c | 6 +++++- arch/powerpc/net/bpf_jit_comp32.c | 8 ++++++-- arch/powerpc/net/bpf_jit_comp64.c | 8 ++++++-- 4 files changed, 37 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 935ea95b6635..7e9b978b768e 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -24,16 +24,30 @@ #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) /* Long jump; (unconditional 'branch') */ -#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ - (((dest) - (ctx->idx * 4)) & 0x03fffffc)) +#define PPC_JMP(dest) \ + do { \ + long offset = (long)(dest) - (ctx->idx * 4); \ + if (!is_offset_in_branch_range(offset)) { \ + pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ + EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc)); \ + } while (0) + /* blr; (unconditional 'branch' with link) to absolute address */ #define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \ (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) /* "cond" here covers BO:BI fields. */ -#define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ - (((cond) & 0x3ff) << 16) | \ - (((dest) - (ctx->idx * 4)) & \ - 0xfffc)) +#define PPC_BCC_SHORT(cond, dest) \ + do { \ + long offset = (long)(dest) - (ctx->idx * 4); \ + if (!is_offset_in_cond_branch_range(offset)) { \ + pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ + EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ + } while (0) + /* Sign-extended 32-bit immediate load */ #define PPC_LI32(d, i) do { \ if ((int)(uintptr_t)(i) >= -32768 && \ diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 53aefee3fe70..fcbf7a917c56 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -210,7 +210,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; bpf_jit_build_prologue(code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); + if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass)) { + bpf_jit_binary_free(bpf_hdr); + fp = org_fp; + goto out_addrs; + } bpf_jit_build_epilogue(code_base, &cgctx); if (bpf_jit_enable > 1) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index beb12cbc8c29..a74d52204f8d 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -200,7 +200,7 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun } } -static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* * By now, the eBPF program has already setup parameters in r3-r6 @@ -261,7 +261,9 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 bpf_jit_emit_common_epilogue(image, ctx); EMIT(PPC_RAW_BCTR()); + /* out: */ + return 0; } /* Assemble the body code between the prologue & epilogue */ @@ -1090,7 +1092,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * */ case BPF_JMP | BPF_TAIL_CALL: ctx->seen |= SEEN_TAILCALL; - bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + if (ret < 0) + return ret; break; default: diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index b87a63dba9c8..f06c62089b14 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -206,7 +206,7 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun EMIT(PPC_RAW_BCTRL()); } -static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* * By now, the eBPF program has already setup parameters in r3, r4 and r5 @@ -267,7 +267,9 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 bpf_jit_emit_common_epilogue(image, ctx); EMIT(PPC_RAW_BCTR()); + /* out: */ + return 0; } /* Assemble the body code between the prologue & epilogue */ @@ -993,7 +995,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * */ case BPF_JMP | BPF_TAIL_CALL: ctx->seen |= SEEN_TAILCALL; - bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + if (ret < 0) + return ret; break; default: From 8bbc9d822421d9ac8ff9ed26a3713c9afc69d6c8 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 6 Oct 2021 01:55:22 +0530 Subject: [PATCH 121/235] powerpc/bpf: Fix BPF_MOD when imm == 1 Only ignore the operation if dividing by 1. Fixes: 156d0e290e969c ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF") Signed-off-by: Naveen N. Rao Tested-by: Johan Almbladh Reviewed-by: Christophe Leroy Acked-by: Song Liu Acked-by: Johan Almbladh Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c674ca18c3046885602caebb326213731c675d06.1633464148.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index f06c62089b14..d67f6d62e2e1 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -391,8 +391,14 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ if (imm == 0) return -EINVAL; - else if (imm == 1) - goto bpf_alu32_trunc; + if (imm == 1) { + if (BPF_OP(code) == BPF_DIV) { + goto bpf_alu32_trunc; + } else { + EMIT(PPC_RAW_LI(dst_reg, 0)); + break; + } + } PPC_LI32(b2p[TMP_REG_1], imm); switch (BPF_CLASS(code)) { From 5855c4c1f415ca3ba1046e77c0b3d3dfc96c9025 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 6 Oct 2021 01:55:23 +0530 Subject: [PATCH 122/235] powerpc/bpf: Fix BPF_SUB when imm == 0x80000000 We aren't handling subtraction involving an immediate value of 0x80000000 properly. Fix the same. Fixes: 156d0e290e969c ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF") Signed-off-by: Naveen N. Rao Reviewed-by: Christophe Leroy [mpe: Fold in fix from Naveen to use imm <= 32768] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/fc4b1276eb10761fd7ce0814c8dd089da2815251.1633464148.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index d67f6d62e2e1..2ea1c3f6e287 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -330,18 +330,25 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ - case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ + if (!imm) { + goto bpf_alu32_trunc; + } else if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); + } else { + PPC_LI32(b2p[TMP_REG_1], imm); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); + } + goto bpf_alu32_trunc; + case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ - if (BPF_OP(code) == BPF_SUB) - imm = -imm; - if (imm) { - if (imm >= -32768 && imm < 32768) - EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); - else { - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); - } + if (!imm) { + goto bpf_alu32_trunc; + } else if (imm > -32768 && imm <= 32768) { + EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(-imm))); + } else { + PPC_LI32(b2p[TMP_REG_1], imm); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ From 030905920f32e91a52794937f67434ac0b3ea41a Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 6 Oct 2021 01:55:24 +0530 Subject: [PATCH 123/235] powerpc/security: Add a helper to query stf_barrier type Add a helper to return the stf_barrier type for the current processor. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/include/asm/security_features.h | 5 +++++ arch/powerpc/kernel/security.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 792eefaf230b..27574f218b37 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(u64 feature) return !!(powerpc_security_features & feature); } +#ifdef CONFIG_PPC_BOOK3S_64 +enum stf_barrier_type stf_barrier_type_get(void); +#else +static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; } +#endif // Features indicating support for Spectre/Meltdown mitigations diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 1a998490fe60..15fb5ea1b9ea 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -263,6 +263,11 @@ static int __init handle_no_stf_barrier(char *p) early_param("no_stf_barrier", handle_no_stf_barrier); +enum stf_barrier_type stf_barrier_type_get(void) +{ + return stf_enabled_flush_types; +} + /* This is the generic flag used by other architectures */ static int __init handle_ssbd(char *p) { From b7540d62509453263604a155bf2d5f0ed450cba2 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 6 Oct 2021 01:55:25 +0530 Subject: [PATCH 124/235] powerpc/bpf: Emit stf barrier instruction sequences for BPF_NOSPEC Emit similar instruction sequences to commit a048a07d7f4535 ("powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit") when encountering BPF_NOSPEC. Mitigations are enabled depending on what the firmware advertises. In particular, we do not gate these mitigations based on current settings, just like in x86. Due to this, we don't need to take any action if mitigations are enabled or disabled at runtime. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/956570cbc191cd41f8274bed48ee757a86dac62a.1633464148.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit64.h | 8 ++--- arch/powerpc/net/bpf_jit_comp64.c | 55 ++++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 7b713edfa7e2..b63b35e45e55 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -16,18 +16,18 @@ * with our redzone usage. * * [ prev sp ] <------------- - * [ nv gpr save area ] 6*8 | + * [ nv gpr save area ] 5*8 | * [ tail_call_cnt ] 8 | - * [ local_tmp_var ] 8 | + * [ local_tmp_var ] 16 | * fp (r31) --> [ ebpf stack space ] upto 512 | * [ frame header ] 32/112 | * sp (r1) ---> [ stack pointer ] -------------- */ /* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (6*8) +#define BPF_PPC_STACK_SAVE (5*8) /* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 16 +#define BPF_PPC_STACK_LOCALS 24 /* stack frame excluding BPF stack, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 2ea1c3f6e287..8b5157ccfeba 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "bpf_jit64.h" @@ -35,9 +36,9 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ prev sp ] <------------- * [ ... ] | * sp (r1) ---> [ stack pointer ] -------------- - * [ nv gpr save area ] 6*8 + * [ nv gpr save area ] 5*8 * [ tail_call_cnt ] 8 - * [ local_tmp_var ] 8 + * [ local_tmp_var ] 16 * [ unused red zone ] 208 bytes protected */ static int bpf_jit_stack_local(struct codegen_context *ctx) @@ -45,12 +46,12 @@ static int bpf_jit_stack_local(struct codegen_context *ctx) if (bpf_has_stack_frame(ctx)) return STACK_FRAME_MIN_SIZE + ctx->stack_size; else - return -(BPF_PPC_STACK_SAVE + 16); + return -(BPF_PPC_STACK_SAVE + 24); } static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) { - return bpf_jit_stack_local(ctx) + 8; + return bpf_jit_stack_local(ctx) + 16; } static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) @@ -272,10 +273,33 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o return 0; } +/* + * We spill into the redzone always, even if the bpf program has its own stackframe. + * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local() + */ +void bpf_stf_barrier(void); + +asm ( +" .global bpf_stf_barrier ;" +" bpf_stf_barrier: ;" +" std 21,-64(1) ;" +" std 22,-56(1) ;" +" sync ;" +" ld 21,-64(1) ;" +" ld 22,-56(1) ;" +" ori 31,31,0 ;" +" .rept 14 ;" +" b 1f ;" +" 1: ;" +" .endr ;" +" blr ;" +); + /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, bool extra_pass) { + enum stf_barrier_type stf_barrier = stf_barrier_type_get(); const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; int i, ret; @@ -646,6 +670,29 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * BPF_ST NOSPEC (speculation barrier) */ case BPF_ST | BPF_NOSPEC: + if (!security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) || + !security_ftr_enabled(SEC_FTR_STF_BARRIER)) + break; + + switch (stf_barrier) { + case STF_BARRIER_EIEIO: + EMIT(PPC_RAW_EIEIO() | 0x02000000); + break; + case STF_BARRIER_SYNC_ORI: + EMIT(PPC_RAW_SYNC()); + EMIT(PPC_RAW_LD(b2p[TMP_REG_1], _R13, 0)); + EMIT(PPC_RAW_ORI(_R31, _R31, 0)); + break; + case STF_BARRIER_FALLBACK: + EMIT(PPC_RAW_MFLR(b2p[TMP_REG_1])); + PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier)); + EMIT(PPC_RAW_MTCTR(12)); + EMIT(PPC_RAW_BCTRL()); + EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); + break; + case STF_BARRIER_NONE: + break; + } break; /* From c9b8da77f22d28348d1f89a6c4d3fec102e9b1c4 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 6 Oct 2021 01:55:26 +0530 Subject: [PATCH 125/235] powerpc/bpf ppc32: Fix ALU32 BPF_ARSH operation Correct the destination register used for ALU32 BPF_ARSH operation. Fixes: 51c66ad849a703 ("powerpc/bpf: Implement extended BPF on PPC32") Signed-off-by: Naveen N. Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6d24c1f9e79b6f61f5135eaf2ea1e8bcd4dac87b.1633464148.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index a74d52204f8d..519ecb9ab672 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -625,7 +625,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_LI(dst_reg_h, 0)); break; case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ - EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg, src_reg)); + EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg)); break; case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ bpf_set_seen_register(ctx, tmp_reg); From e8278d44443207bb6609c7b064073f353e6f4978 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 6 Oct 2021 01:55:27 +0530 Subject: [PATCH 126/235] powerpc/bpf ppc32: Fix JMP32_JSET_K 'andi' only takes an unsigned 16-bit value. Correct the imm range used when emitting andi. Fixes: 51c66ad849a703 ("powerpc/bpf: Implement extended BPF on PPC32") Signed-off-by: Naveen N. Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b94489f52831305ec15aca4dd04a3527236be7e8.1633464148.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 519ecb9ab672..7c65de9ed4fa 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -1075,7 +1075,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_JMP32 | BPF_JSET | BPF_K: /* andi does not sign-extend the immediate */ - if (imm >= -32768 && imm < 32768) { + if (imm >= 0 && imm < 32768) { /* PPC_ANDI is _only/always_ dot-form */ EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm)); } else { From 48164fccdff6d5cc11308126c050bd25a329df25 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 6 Oct 2021 01:55:28 +0530 Subject: [PATCH 127/235] powerpc/bpf ppc32: Do not emit zero extend instruction for 64-bit BPF_END Suppress emitting zero extend instruction for 64-bit BPF_END_FROM_[L|B]E operation. Fixes: 51c66ad849a703 ("powerpc/bpf: Implement extended BPF on PPC32") Signed-off-by: Naveen N. Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b4e3c3546121315a8e2059b19a1bda84971816e4.1633464148.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 7c65de9ed4fa..68dc8a8231de 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -1107,7 +1107,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * return -EOPNOTSUPP; } if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext && - !insn_is_zext(&insn[i + 1])) + !insn_is_zext(&insn[i + 1]) && !(BPF_OP(code) == BPF_END && imm == 64)) EMIT(PPC_RAW_LI(dst_reg_h, 0)); } From 548b762763b885b81850db676258df47c55dd5f9 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 6 Oct 2021 01:55:29 +0530 Subject: [PATCH 128/235] powerpc/bpf ppc32: Fix BPF_SUB when imm == 0x80000000 Special case handling of the smallest 32-bit negative number for BPF_SUB. Fixes: 51c66ad849a703 ("powerpc/bpf: Implement extended BPF on PPC32") Signed-off-by: Naveen N. Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7135360a0cdf70adedbccf9863128b8daef18764.1633464148.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 68dc8a8231de..0da31d41d413 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -357,7 +357,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * PPC_LI32(_R0, imm); EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, _R0)); } - if (imm >= 0) + if (imm >= 0 || (BPF_OP(code) == BPF_SUB && imm == 0x80000000)) EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h)); else EMIT(PPC_RAW_ADDME(dst_reg_h, dst_reg_h)); From 3e607dc4df180b72a38e75030cb0f94d12808712 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 5 Oct 2021 00:56:38 +1000 Subject: [PATCH 129/235] powerpc/64s: fix program check interrupt emergency stack path Emergency stack path was jumping into a 3: label inside the __GEN_COMMON_BODY macro for the normal path after it had finished, rather than jumping over it. By a small miracle this is the correct place to build up a new interrupt frame with the existing stack pointer, so things basically worked okay with an added weird looking 700 trap frame on top (which had the wrong ->nip so it didn't decode bug messages either). Fix this by avoiding using numeric labels when jumping over non-trivial macros. Before: LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: CPU: 0 PID: 88 Comm: sh Not tainted 5.15.0-rc2-00034-ge057cdade6e5 #2637 NIP: 7265677368657265 LR: c00000000006c0c8 CTR: c0000000000097f0 REGS: c0000000fffb3a50 TRAP: 0700 Not tainted MSR: 9000000000021031 CR: 00000700 XER: 20040000 CFAR: c0000000000098b0 IRQMASK: 0 GPR00: c00000000006c964 c0000000fffb3cf0 c000000001513800 0000000000000000 GPR04: 0000000048ab0778 0000000042000000 0000000000000000 0000000000001299 GPR08: 000001e447c718ec 0000000022424282 0000000000002710 c00000000006bee8 GPR12: 9000000000009033 c0000000016b0000 00000000000000b0 0000000000000001 GPR16: 0000000000000000 0000000000000002 0000000000000000 0000000000000ff8 GPR20: 0000000000001fff 0000000000000007 0000000000000080 00007fff89d90158 GPR24: 0000000002000000 0000000002000000 0000000000000255 0000000000000300 GPR28: c000000001270000 0000000042000000 0000000048ab0778 c000000080647e80 NIP [7265677368657265] 0x7265677368657265 LR [c00000000006c0c8] ___do_page_fault+0x3f8/0xb10 Call Trace: [c0000000fffb3cf0] [c00000000000bdac] soft_nmi_common+0x13c/0x1d0 (unreliable) --- interrupt: 700 at decrementer_common_virt+0xb8/0x230 NIP: c0000000000098b8 LR: c00000000006c0c8 CTR: c0000000000097f0 REGS: c0000000fffb3d60 TRAP: 0700 Not tainted MSR: 9000000000021031 CR: 22424282 XER: 20040000 CFAR: c0000000000098b0 IRQMASK: 0 GPR00: c00000000006c964 0000000000002400 c000000001513800 0000000000000000 GPR04: 0000000048ab0778 0000000042000000 0000000000000000 0000000000001299 GPR08: 000001e447c718ec 0000000022424282 0000000000002710 c00000000006bee8 GPR12: 9000000000009033 c0000000016b0000 00000000000000b0 0000000000000001 GPR16: 0000000000000000 0000000000000002 0000000000000000 0000000000000ff8 GPR20: 0000000000001fff 0000000000000007 0000000000000080 00007fff89d90158 GPR24: 0000000002000000 0000000002000000 0000000000000255 0000000000000300 GPR28: c000000001270000 0000000042000000 0000000048ab0778 c000000080647e80 NIP [c0000000000098b8] decrementer_common_virt+0xb8/0x230 LR [c00000000006c0c8] ___do_page_fault+0x3f8/0xb10 --- interrupt: 700 Instruction dump: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX ---[ end trace 6d28218e0cc3c949 ]--- After: ------------[ cut here ]------------ kernel BUG at arch/powerpc/kernel/exceptions-64s.S:491! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: CPU: 0 PID: 88 Comm: login Not tainted 5.15.0-rc2-00034-ge057cdade6e5-dirty #2638 NIP: c0000000000098b8 LR: c00000000006bf04 CTR: c0000000000097f0 REGS: c0000000fffb3d60 TRAP: 0700 Not tainted MSR: 9000000000021031 CR: 24482227 XER: 00040000 CFAR: c0000000000098b0 IRQMASK: 0 GPR00: c00000000006bf04 0000000000002400 c000000001513800 c000000001271868 GPR04: 00000000100f0d29 0000000042000000 0000000000000007 0000000000000009 GPR08: 00000000100f0d29 0000000024482227 0000000000002710 c000000000181b3c GPR12: 9000000000009033 c0000000016b0000 00000000100f0d29 c000000005b22f00 GPR16: 00000000ffff0000 0000000000000001 0000000000000009 00000000100eed90 GPR20: 00000000100eed90 0000000010000000 000000001000a49c 00000000100f1430 GPR24: c000000001271868 0000000002000000 0000000000000215 0000000000000300 GPR28: c000000001271800 0000000042000000 00000000100f0d29 c000000080647860 NIP [c0000000000098b8] decrementer_common_virt+0xb8/0x230 LR [c00000000006bf04] ___do_page_fault+0x234/0xb10 Call Trace: Instruction dump: 4182000c 39400001 48000008 894d0932 714a0001 39400008 408225fc 718a4000 7c2a0b78 3821fcf0 41c20008 e82d0910 <0981fcf0> f92101a0 f9610170 f9810178 ---[ end trace a5dbd1f5ea4ccc51 ]--- Fixes: 0a882e28468f4 ("powerpc/64s/exception: remove bad stack branch") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211004145642.1331214-2-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 37859e62a8dc..024d9231f88c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1665,27 +1665,30 @@ EXC_COMMON_BEGIN(program_check_common) */ andi. r10,r12,MSR_PR - bne 2f /* If userspace, go normal path */ + bne .Lnormal_stack /* If userspace, go normal path */ andis. r10,r12,(SRR1_PROGTM)@h - bne 1f /* If TM, emergency */ + bne .Lemergency_stack /* If TM, emergency */ cmpdi r1,-INT_FRAME_SIZE /* check if r1 is in userspace */ - blt 2f /* normal path if not */ + blt .Lnormal_stack /* normal path if not */ /* Use the emergency stack */ -1: andi. r10,r12,MSR_PR /* Set CR0 correctly for label */ +.Lemergency_stack: + andi. r10,r12,MSR_PR /* Set CR0 correctly for label */ /* 3 in EXCEPTION_PROLOG_COMMON */ mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ __ISTACK(program_check)=0 __GEN_COMMON_BODY program_check - b 3f -2: + b .Ldo_program_check + +.Lnormal_stack: __ISTACK(program_check)=1 __GEN_COMMON_BODY program_check -3: + +.Ldo_program_check: addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ From d0afd44c05f8f4e4c91487c02d43c87a31552462 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 5 Oct 2021 00:56:39 +1000 Subject: [PATCH 130/235] powerpc/traps: do not enable irqs in _exception _exception can be called by machine check handlers when the MCE hits user code (e.g., pseries and powernv). This will enable local irqs because, which is a dicey thing to do in NMI or hard irq context. This seemed to worked out okay because a userspace MCE can basically be treated like a synchronous interrupt (after async / imprecise MCEs are filtered out). Since NMI and hard irq handlers have started growing nmi_enter / irq_enter, and more irq state sanity checks, this has started to cause problems (or at least trigger warnings). The Fixes tag to the commit which introduced this rather than try to work out exactly which commit was the first that could possibly cause a problem because that may be difficult to prove. Fixes: 9f2f79e3a3c1 ("powerpc: Disable interrupts in 64-bit kernel FP and vector faults") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211004145642.1331214-3-npiggin@gmail.com --- arch/powerpc/kernel/traps.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index aac8c0412ff9..e453b666613b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -340,10 +340,16 @@ static bool exception_common(int signr, struct pt_regs *regs, int code, return false; } - show_signal_msg(signr, regs, code, addr); + /* + * Must not enable interrupts even for user-mode exception, because + * this can be called from machine check, which may be a NMI or IRQ + * which don't like interrupts being enabled. Could check for + * in_hardirq || in_nmi perhaps, but there doesn't seem to be a good + * reason why _exception() should enable irqs for an exception handler, + * the handlers themselves do that directly. + */ - if (arch_irqs_disabled()) - interrupt_cond_local_irq_enable(regs); + show_signal_msg(signr, regs, code, addr); current->thread.trap_nr = code; From ff058a8ada5df0d84e5537cfaf89d06d71501580 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 5 Oct 2021 00:56:40 +1000 Subject: [PATCH 131/235] powerpc/64: warn if local irqs are enabled in NMI or hardirq context This can help catch bugs such as the one fixed by the previous change to prevent _exception() from enabling irqs. ppc32 could have a similar warning but it has no good config option to debug this stuff (the test may be overkill to add for production kernels). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211004145642.1331214-4-npiggin@gmail.com --- arch/powerpc/kernel/irq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 551b653228c4..c4f1d6b7d992 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -229,6 +229,9 @@ notrace void arch_local_irq_restore(unsigned long mask) return; } + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(in_nmi() || in_hardirq()); + /* * After the stb, interrupts are unmasked and there are no interrupts * pending replay. The restart sequence makes this atomic with @@ -321,6 +324,9 @@ notrace void arch_local_irq_restore(unsigned long mask) if (mask) return; + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(in_nmi() || in_hardirq()); + /* * From this point onward, we can take interrupts, preempt, * etc... unless we got hard-disabled. We check if an event From 768c47010392ece9766a56479b4e0cf04a536916 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 5 Oct 2021 00:56:41 +1000 Subject: [PATCH 132/235] powerpc/64/interrupt: Reconcile soft-mask state in NMI and fix false BUG If a NMI hits early in an interrupt handler before the irq soft-mask state is reconciled, that can cause a false-positive BUG with a CONFIG_PPC_IRQ_SOFT_MASK_DEBUG assertion. Remove that assertion and instead check the case that if regs->msr has EE clear, then regs->softe should be marked as disabled so the irq state looks correct to NMI handlers, the same as how it's fixed up in the case it was implicit soft-masked. This doesn't fix a known problem -- the change that was fixed by commit 4ec5feec1ad02 ("powerpc/64s: Make NMI record implicitly soft-masked code as irqs disabled") was the addition of a warning in the soft-nmi watchdog interrupt which can never actually fire when MSR[EE]=0. However it may be important if NMI handlers grow more code, and it's less surprising to anything using 'regs' - (I tripped over this when working in the area). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211004145642.1331214-5-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 6b800d3e2681..b894b7169706 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -265,13 +265,16 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte local_paca->irq_soft_mask = IRQS_ALL_DISABLED; local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - if (is_implicit_soft_masked(regs)) { - // Adjust regs->softe soft implicit soft-mask, so - // arch_irq_disabled_regs(regs) behaves as expected. + if (!(regs->msr & MSR_EE) || is_implicit_soft_masked(regs)) { + /* + * Adjust regs->softe to be soft-masked if it had not been + * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe + * not yet set disabled), or if it was in an implicit soft + * masked state. This makes arch_irq_disabled_regs(regs) + * behave as expected. + */ regs->softe = IRQS_ALL_DISABLED; } - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE)); /* Don't do any per-CPU operations until interrupt state is fixed */ From f08fb25bc66986b0952724530a640d9970fa52c1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 5 Oct 2021 00:56:42 +1000 Subject: [PATCH 133/235] powerpc/64s: Fix unrecoverable MCE calling async handler from NMI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The machine check handler is not considered NMI on 64s. The early handler is the true NMI handler, and then it schedules the machine_check_exception handler to run when interrupts are enabled. This works fine except the case of an unrecoverable MCE, where the true NMI is taken when MSR[RI] is clear, it can not recover, so it calls machine_check_exception directly so something might be done about it. Calling an async handler from NMI context can result in irq state and other things getting corrupted. This can also trigger the BUG at arch/powerpc/include/asm/interrupt.h:168 BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE)); Fix this by making an _async version of the handler which is called in the normal case, and a NMI version that is called for unrecoverable interrupts. Fixes: 2b43dd7653cc ("powerpc/64: enable MSR[EE] in irq replay pt_regs") Signed-off-by: Nicholas Piggin Tested-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211004145642.1331214-6-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 5 ++--- arch/powerpc/kernel/exceptions-64s.S | 8 +++++-- arch/powerpc/kernel/traps.c | 31 ++++++++++++++++------------ 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index b894b7169706..a1d238255f07 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -528,10 +528,9 @@ static __always_inline long ____##func(struct pt_regs *regs) /* kernel/traps.c */ DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception); #ifdef CONFIG_PPC_BOOK3S_64 -DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception); -#else -DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception); +DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async); #endif +DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception); DECLARE_INTERRUPT_HANDLER(SMIException); DECLARE_INTERRUPT_HANDLER(handle_hmi_exception); DECLARE_INTERRUPT_HANDLER(unknown_exception); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 024d9231f88c..eaf1f72131a1 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1243,7 +1243,7 @@ EXC_COMMON_BEGIN(machine_check_common) li r10,MSR_RI mtmsrd r10,1 addi r3,r1,STACK_FRAME_OVERHEAD - bl machine_check_exception + bl machine_check_exception_async b interrupt_return_srr @@ -1303,7 +1303,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) subi r12,r12,1 sth r12,PACA_IN_MCE(r13) - /* Invoke machine_check_exception to print MCE event and panic. */ + /* + * Invoke machine_check_exception to print MCE event and panic. + * This is the NMI version of the handler because we are called from + * the early handler which is a true NMI. + */ addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index e453b666613b..11741703d26e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -796,24 +796,22 @@ void die_mce(const char *str, struct pt_regs *regs, long err) * do_exit() checks for in_interrupt() and panics in that case, so * exit the irq/nmi before calling die. */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) - irq_exit(); - else + if (in_nmi()) nmi_exit(); + else + irq_exit(); die(str, regs, err); } /* - * BOOK3S_64 does not call this handler as a non-maskable interrupt + * BOOK3S_64 does not usually call this handler as a non-maskable interrupt * (it uses its own early real-mode handler to handle the MCE proper * and then raises irq_work to call this handler when interrupts are - * enabled). + * enabled). The only time when this is not true is if the early handler + * is unrecoverable, then it does call this directly to try to get a + * message out. */ -#ifdef CONFIG_PPC_BOOK3S_64 -DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception) -#else -DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception) -#endif +static void __machine_check_exception(struct pt_regs *regs) { int recover = 0; @@ -847,12 +845,19 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception) /* Must die if the interrupt is not recoverable */ if (regs_is_unrecoverable(regs)) die_mce("Unrecoverable Machine check", regs, SIGBUS); +} #ifdef CONFIG_PPC_BOOK3S_64 - return; -#else - return 0; +DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async) +{ + __machine_check_exception(regs); +} #endif +DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception) +{ + __machine_check_exception(regs); + + return 0; } DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */ From 5a4b0320783a19f877dd595813569b3c25f4ff81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 30 Sep 2021 12:25:35 +0200 Subject: [PATCH 134/235] powerpc/pseries/msi: Add an empty irq_write_msi_msg() handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The IPR drivers tests for MSI support at probe time with MSI vector 0 and when done, frees the IRQ with free_irq(). This test was introduced by 95fecd90397e ("ipr: add test for MSI interrupt support") as an improvement of commit 5a9ef25b14d3 ("[SCSI] ipr: add MSI support") because a boot failure was reported on a Bimini PowerPC system: https://lore.kernel.org/r/1242926159.3007.5.camel@localhost.localdomain It was finally decided to remove MSI support on Bimini systems in 6eb0ac03899a ("powerpc/maple: Add a quirk to disable MSI for IPR on Bimini"). Linux 5.15-rc1 added MSI domain support to the pseries machine and when free_irq is called() in the driver, msi_domain_deactivate() also is. This resets the MSI table entry of the associate vector by calling __pci_write_msi_msg() with an empty message and breaks any further activation of the same vector. In the case of the IPR driver, it breaks the initialization sequence of the IOA. Introduce an empty irq_write_msi_msg() handler in the MSI domain of the pseries machine to avoid clearing the MSI vector entry. Updating the entry is not strictly necessary since it is initialized by the underlying hypervisor, PowerVM or QEMU/KVM. Fixes: a5f3d2c17b07 ("powerpc/pseries/pci: Add MSI domains") Signed-off-by: Cédric Le Goater Reported-by: Abdul Haleem Tested-by: Mahesh Salgaonkar [mpe: Tweak comment wording and formatting slightly] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210930102535.1047230-1-clg@kaod.org --- arch/powerpc/platforms/pseries/msi.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 1b305e411862..8627362f613e 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -507,12 +507,27 @@ static void pseries_msi_unmask(struct irq_data *d) irq_chip_unmask_parent(d); } +static void pseries_msi_write_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct msi_desc *entry = irq_data_get_msi_desc(data); + + /* + * Do not update the MSIx vector table. It's not strictly necessary + * because the table is initialized by the underlying hypervisor, PowerVM + * or QEMU/KVM. However, if the MSIx vector entry is cleared, any further + * activation will fail. This can happen in some drivers (eg. IPR) which + * deactivate an IRQ used for testing MSI support. + */ + entry->msg = *msg; +} + static struct irq_chip pseries_pci_msi_irq_chip = { .name = "pSeries-PCI-MSI", .irq_shutdown = pseries_msi_shutdown, .irq_mask = pseries_msi_mask, .irq_unmask = pseries_msi_unmask, .irq_eoi = irq_chip_eoi_parent, + .irq_write_msi_msg = pseries_msi_write_msg, }; static struct msi_domain_info pseries_msi_domain_info = { From d93f9e23744b7bf11a98b2ddb091d129482ae179 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 15 Sep 2021 16:12:24 +0200 Subject: [PATCH 135/235] powerpc/32s: Fix kuap_kernel_restore() At interrupt exit, kuap_kernel_restore() calls kuap_unlock() with the value contained in regs->kuap. However, when regs->kuap contains 0xffffffff it means that KUAP was not unlocked so calling kuap_unlock() is unrelevant and results in jeopardising the contents of kernel space segment registers. So check that regs->kuap doesn't contain KUAP_NONE before calling kuap_unlock(). In the meantime it also means that if KUAP has not been correcly locked back at interrupt exit, it must be locked before continuing. This is done by checking the content of current->thread.kuap which was returned by kuap_get_and_assert_locked() Fixes: 16132529cee5 ("powerpc/32s: Rework Kernel Userspace Access Protection") Reported-by: Stan Johnson Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0d0c4d0f050a637052287c09ba521bad960a2790.1631715131.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index d4b145b279f6..9f38040f0641 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -136,6 +136,14 @@ static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) if (kuap_is_disabled()) return; + if (unlikely(kuap != KUAP_NONE)) { + current->thread.kuap = KUAP_NONE; + kuap_lock(kuap, false); + } + + if (likely(regs->kuap == KUAP_NONE)) + return; + current->thread.kuap = regs->kuap; kuap_unlock(regs->kuap, false); From eb8257a12192f43ffd41bd90932c39dade958042 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 20 Sep 2021 22:03:26 +0530 Subject: [PATCH 136/235] pseries/eeh: Fix the kdump kernel crash during eeh_pseries_init On pseries LPAR when an empty slot is assigned to partition OR in single LPAR mode, kdump kernel crashes during issuing PHB reset. In the kdump scenario, we traverse all PHBs and issue reset using the pe_config_addr of the first child device present under each PHB. However the code assumes that none of the PHB slots can be empty and uses list_first_entry() to get the first child device under the PHB. Since list_first_entry() expects the list to be non-empty, it returns an invalid pci_dn entry and ends up accessing NULL phb pointer under pci_dn->phb causing kdump kernel crash. This patch fixes the below kdump kernel crash by skipping empty slots: audit: initializing netlink subsys (disabled) thermal_sys: Registered thermal governor 'fair_share' thermal_sys: Registered thermal governor 'step_wise' cpuidle: using governor menu pstore: Registered nvram as persistent store backend Issue PHB reset ... audit: type=2000 audit(1631267818.000:1): state=initialized audit_enabled=0 res=1 BUG: Kernel NULL pointer dereference on read at 0x00000268 Faulting instruction address: 0xc000000008101fb0 Oops: Kernel access of bad area, sig: 7 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 7 PID: 1 Comm: swapper/7 Not tainted 5.14.0 #1 NIP: c000000008101fb0 LR: c000000009284ccc CTR: c000000008029d70 REGS: c00000001161b840 TRAP: 0300 Not tainted (5.14.0) MSR: 8000000002009033 CR: 28000224 XER: 20040002 CFAR: c000000008101f0c DAR: 0000000000000268 DSISR: 00080000 IRQMASK: 0 ... NIP pseries_eeh_get_pe_config_addr+0x100/0x1b0 LR __machine_initcall_pseries_eeh_pseries_init+0x2cc/0x350 Call Trace: 0xc00000001161bb80 (unreliable) __machine_initcall_pseries_eeh_pseries_init+0x2cc/0x350 do_one_initcall+0x60/0x2d0 kernel_init_freeable+0x350/0x3f8 kernel_init+0x3c/0x17c ret_from_kernel_thread+0x5c/0x64 Fixes: 5a090f7c363fd ("powerpc/pseries: PCIE PHB reset") Signed-off-by: Mahesh Salgaonkar [mpe: Tweak wording and trim oops] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/163215558252.413351.8600189949820258982.stgit@jupiter --- arch/powerpc/platforms/pseries/eeh_pseries.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index bc15200852b7..09fafcf2d3a0 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -867,6 +867,10 @@ static int __init eeh_pseries_init(void) if (is_kdump_kernel() || reset_devices) { pr_info("Issue PHB reset ...\n"); list_for_each_entry(phb, &hose_list, list_node) { + // Skip if the slot is empty + if (list_empty(&PCI_DN(phb->dn)->child_list)) + continue; + pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list); config_addr = pseries_eeh_get_pe_config_addr(pdn); From 951cd3a0866d29cb9c01ebc1d9c17590e598226e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Sep 2021 09:50:26 +0200 Subject: [PATCH 137/235] firmware: include drivers/firmware/Kconfig unconditionally Compile-testing drivers that require access to a firmware layer fails when that firmware symbol is unavailable. This happened twice this week: - My proposed to change to rework the QCOM_SCM firmware symbol broke on ppc64 and others. - The cs_dsp firmware patch added device specific firmware loader into drivers/firmware, which broke on the same set of architectures. We should probably do the same thing for other subsystems as well, but fix this one first as this is a dependency for other patches getting merged. Reviewed-by: Bjorn Andersson Reviewed-by: Charles Keepax Acked-by: Will Deacon Acked-by: Bjorn Andersson Cc: Mark Brown Cc: Liam Girdwood Cc: Charles Keepax Cc: Simon Trimmer Cc: Michael Ellerman Reviewed-by: Mark Brown Signed-off-by: Arnd Bergmann --- arch/arm/Kconfig | 2 -- arch/arm64/Kconfig | 2 -- arch/ia64/Kconfig | 2 -- arch/mips/Kconfig | 2 -- arch/parisc/Kconfig | 2 -- arch/riscv/Kconfig | 2 -- arch/x86/Kconfig | 2 -- drivers/Kconfig | 2 ++ 8 files changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fc196421b2ce..59baf6c132a7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1989,8 +1989,6 @@ config ARCH_HIBERNATION_POSSIBLE endmenu -source "drivers/firmware/Kconfig" - if CRYPTO source "arch/arm/crypto/Kconfig" endif diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 077f2ec4eeb2..407b4addea36 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1931,8 +1931,6 @@ source "drivers/cpufreq/Kconfig" endmenu -source "drivers/firmware/Kconfig" - source "drivers/acpi/Kconfig" source "arch/arm64/kvm/Kconfig" diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 045792cde481..1e33666fa679 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -388,8 +388,6 @@ config CRASH_DUMP help Generate crash dump after being started by kexec. -source "drivers/firmware/Kconfig" - endmenu menu "Power management and ACPI options" diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 771ca53af06d..6b8f591c5054 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -3316,8 +3316,6 @@ source "drivers/cpuidle/Kconfig" endmenu -source "drivers/firmware/Kconfig" - source "arch/mips/kvm/Kconfig" source "arch/mips/vdso/Kconfig" diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 4742b6f169b7..27a8b49af11f 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -384,6 +384,4 @@ config KEXEC_FILE endmenu -source "drivers/firmware/Kconfig" - source "drivers/parisc/Kconfig" diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c3f3fd583e04..8bc71ab143e3 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -561,5 +561,3 @@ menu "Power management options" source "kernel/power/Kconfig" endmenu - -source "drivers/firmware/Kconfig" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4e001bbbb425..4dca39744ee9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2828,8 +2828,6 @@ config HAVE_ATOMIC_IOMAP def_bool y depends on X86_32 -source "drivers/firmware/Kconfig" - source "arch/x86/kvm/Kconfig" source "arch/x86/Kconfig.assembler" diff --git a/drivers/Kconfig b/drivers/Kconfig index 30d2db37cc87..0d399ddaa185 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -17,6 +17,8 @@ source "drivers/bus/Kconfig" source "drivers/connector/Kconfig" +source "drivers/firmware/Kconfig" + source "drivers/gnss/Kconfig" source "drivers/mtd/Kconfig" From 424953cf3c6657f1e67e1a2c5d6e3bb518ea4e9a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Sep 2021 09:50:27 +0200 Subject: [PATCH 138/235] qcom_scm: hide Kconfig symbol Now that SCM can be a loadable module, we have to add another dependency to avoid link failures when ipa or adreno-gpu are built-in: aarch64-linux-ld: drivers/net/ipa/ipa_main.o: in function `ipa_probe': ipa_main.c:(.text+0xfc4): undefined reference to `qcom_scm_is_available' ld.lld: error: undefined symbol: qcom_scm_is_available >>> referenced by adreno_gpu.c >>> gpu/drm/msm/adreno/adreno_gpu.o:(adreno_zap_shader_load) in archive drivers/built-in.a This can happen when CONFIG_ARCH_QCOM is disabled and we don't select QCOM_MDT_LOADER, but some other module selects QCOM_SCM. Ideally we'd use a similar dependency here to what we have for QCOM_RPROC_COMMON, but that causes dependency loops from other things selecting QCOM_SCM. This appears to be an endless problem, so try something different this time: - CONFIG_QCOM_SCM becomes a hidden symbol that nothing 'depends on' but that is simply selected by all of its users - All the stubs in include/linux/qcom_scm.h can go away - arm-smccc.h needs to provide a stub for __arm_smccc_smc() to allow compile-testing QCOM_SCM on all architectures. - To avoid a circular dependency chain involving RESET_CONTROLLER and PINCTRL_SUNXI, drop the 'select RESET_CONTROLLER' statement. According to my testing this still builds fine, and the QCOM platform selects this symbol already. Acked-by: Kalle Valo Acked-by: Alex Elder Signed-off-by: Arnd Bergmann --- drivers/firmware/Kconfig | 5 +- drivers/gpu/drm/msm/Kconfig | 4 +- drivers/iommu/Kconfig | 3 +- drivers/iommu/arm/arm-smmu/Makefile | 3 +- drivers/iommu/arm/arm-smmu/arm-smmu-impl.c | 3 +- drivers/media/platform/Kconfig | 2 +- drivers/mmc/host/Kconfig | 2 +- drivers/net/ipa/Kconfig | 1 + drivers/net/wireless/ath/ath10k/Kconfig | 2 +- drivers/pinctrl/qcom/Kconfig | 3 +- include/linux/arm-smccc.h | 10 +++ include/linux/qcom_scm.h | 71 ---------------------- 12 files changed, 24 insertions(+), 85 deletions(-) diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 220a58cf0a44..cda7d7162cbb 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -203,10 +203,7 @@ config INTEL_STRATIX10_RSU Say Y here if you want Intel RSU support. config QCOM_SCM - tristate "Qcom SCM driver" - depends on ARM || ARM64 - depends on HAVE_ARM_SMCCC - select RESET_CONTROLLER + tristate config QCOM_SCM_DOWNLOAD_MODE_DEFAULT bool "Qualcomm download mode enabled by default" diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index e9c6af78b1d7..3ddf739a6f9b 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -17,7 +17,7 @@ config DRM_MSM select DRM_SCHED select SHMEM select TMPFS - select QCOM_SCM if ARCH_QCOM + select QCOM_SCM select WANT_DEV_COREDUMP select SND_SOC_HDMI_CODEC if SND_SOC select SYNC_FILE @@ -55,7 +55,7 @@ config DRM_MSM_GPU_SUDO config DRM_MSM_HDMI_HDCP bool "Enable HDMI HDCP support in MSM DRM driver" - depends on DRM_MSM && QCOM_SCM + depends on DRM_MSM default y help Choose this option to enable HDCP state machine diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 124c41adeca1..c5c71b7ab7e8 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -308,7 +308,6 @@ config APPLE_DART config ARM_SMMU tristate "ARM Ltd. System MMU (SMMU) Support" depends on ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64) - depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU if ARM @@ -438,7 +437,7 @@ config QCOM_IOMMU # Note: iommu drivers cannot (yet?) be built as modules bool "Qualcomm IOMMU Support" depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64) - depends on QCOM_SCM=y + select QCOM_SCM select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU diff --git a/drivers/iommu/arm/arm-smmu/Makefile b/drivers/iommu/arm/arm-smmu/Makefile index e240a7bcf310..b0cc01aa20c9 100644 --- a/drivers/iommu/arm/arm-smmu/Makefile +++ b/drivers/iommu/arm/arm-smmu/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o obj-$(CONFIG_ARM_SMMU) += arm_smmu.o -arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o arm-smmu-qcom.o +arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o +arm_smmu-$(CONFIG_ARM_SMMU_QCOM) += arm-smmu-qcom.o diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c index 9f465e146799..2c25cce38060 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c @@ -215,7 +215,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) of_device_is_compatible(np, "nvidia,tegra186-smmu")) return nvidia_smmu_impl_init(smmu); - smmu = qcom_smmu_impl_init(smmu); + if (IS_ENABLED(CONFIG_ARM_SMMU_QCOM)) + smmu = qcom_smmu_impl_init(smmu); if (of_device_is_compatible(np, "marvell,ap806-smmu-500")) smmu->impl = &mrvl_mmu500_impl; diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index 157c924686e4..80321e03809a 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -565,7 +565,7 @@ config VIDEO_QCOM_VENUS depends on VIDEO_DEV && VIDEO_V4L2 && QCOM_SMEM depends on (ARCH_QCOM && IOMMU_DMA) || COMPILE_TEST select QCOM_MDT_LOADER if ARCH_QCOM - select QCOM_SCM if ARCH_QCOM + select QCOM_SCM select VIDEOBUF2_DMA_CONTIG select V4L2_MEM2MEM_DEV help diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 71313961cc54..95b3511b0560 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -547,7 +547,7 @@ config MMC_SDHCI_MSM depends on MMC_SDHCI_PLTFM select MMC_SDHCI_IO_ACCESSORS select MMC_CQHCI - select QCOM_SCM if MMC_CRYPTO && ARCH_QCOM + select QCOM_SCM if MMC_CRYPTO help This selects the Secure Digital Host Controller Interface (SDHCI) support present in Qualcomm SOCs. The controller supports diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig index 8f99cfa14680..d037682fb7ad 100644 --- a/drivers/net/ipa/Kconfig +++ b/drivers/net/ipa/Kconfig @@ -4,6 +4,7 @@ config QCOM_IPA depends on ARCH_QCOM || COMPILE_TEST depends on QCOM_RPROC_COMMON || (QCOM_RPROC_COMMON=n && COMPILE_TEST) select QCOM_MDT_LOADER if ARCH_QCOM + select QCOM_SCM select QCOM_QMI_HELPERS help Choose Y or M here to include support for the Qualcomm diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig index 741289e385d5..ca007b800f75 100644 --- a/drivers/net/wireless/ath/ath10k/Kconfig +++ b/drivers/net/wireless/ath/ath10k/Kconfig @@ -44,7 +44,7 @@ config ATH10K_SNOC tristate "Qualcomm ath10k SNOC support" depends on ATH10K depends on ARCH_QCOM || COMPILE_TEST - depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y + select QCOM_SCM select QCOM_QMI_HELPERS help This module adds support for integrated WCN3990 chip connected diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig index 32ea2a8ec02b..5ff4207df66e 100644 --- a/drivers/pinctrl/qcom/Kconfig +++ b/drivers/pinctrl/qcom/Kconfig @@ -3,7 +3,8 @@ if (ARCH_QCOM || COMPILE_TEST) config PINCTRL_MSM tristate "Qualcomm core pin controller driver" - depends on GPIOLIB && (QCOM_SCM || !QCOM_SCM) #if QCOM_SCM=m this can't be =y + depends on GPIOLIB + select QCOM_SCM select PINMUX select PINCONF select GENERIC_PINCONF diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 7d1cabe15262..63ccb5252190 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -321,10 +321,20 @@ asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0); * from register 0 to 3 on return from the SMC instruction. An optional * quirk structure provides vendor specific behavior. */ +#ifdef CONFIG_HAVE_ARM_SMCCC asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5, unsigned long a6, unsigned long a7, struct arm_smccc_res *res, struct arm_smccc_quirk *quirk); +#else +static inline void __arm_smccc_smc(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, unsigned long a4, + unsigned long a5, unsigned long a6, unsigned long a7, + struct arm_smccc_res *res, struct arm_smccc_quirk *quirk) +{ + *res = (struct arm_smccc_res){}; +} +#endif /** * __arm_smccc_hvc() - make HVC calls diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index c0475d1c9885..81cad9e1e412 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -61,7 +61,6 @@ enum qcom_scm_ice_cipher { #define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) #define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) -#if IS_ENABLED(CONFIG_QCOM_SCM) extern bool qcom_scm_is_available(void); extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); @@ -115,74 +114,4 @@ extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, extern int qcom_scm_lmh_profile_change(u32 profile_id); extern bool qcom_scm_lmh_dcvsh_available(void); -#else - -#include - -static inline bool qcom_scm_is_available(void) { return false; } - -static inline int qcom_scm_set_cold_boot_addr(void *entry, - const cpumask_t *cpus) { return -ENODEV; } -static inline int qcom_scm_set_warm_boot_addr(void *entry, - const cpumask_t *cpus) { return -ENODEV; } -static inline void qcom_scm_cpu_power_down(u32 flags) {} -static inline u32 qcom_scm_set_remote_state(u32 state,u32 id) - { return -ENODEV; } - -static inline int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size) { return -ENODEV; } -static inline int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, - phys_addr_t size) { return -ENODEV; } -static inline int qcom_scm_pas_auth_and_reset(u32 peripheral) - { return -ENODEV; } -static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; } -static inline bool qcom_scm_pas_supported(u32 peripheral) { return false; } - -static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) - { return -ENODEV; } -static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) - { return -ENODEV; } - -static inline bool qcom_scm_restore_sec_cfg_available(void) { return false; } -static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) - { return -ENODEV; } -static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) - { return -ENODEV; } -static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) - { return -ENODEV; } -extern inline int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, - u32 cp_nonpixel_start, - u32 cp_nonpixel_size) - { return -ENODEV; } -static inline int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, - unsigned int *src, const struct qcom_scm_vmperm *newvm, - unsigned int dest_cnt) { return -ENODEV; } - -static inline bool qcom_scm_ocmem_lock_available(void) { return false; } -static inline int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size, u32 mode) { return -ENODEV; } -static inline int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, - u32 offset, u32 size) { return -ENODEV; } - -static inline bool qcom_scm_ice_available(void) { return false; } -static inline int qcom_scm_ice_invalidate_key(u32 index) { return -ENODEV; } -static inline int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, - enum qcom_scm_ice_cipher cipher, - u32 data_unit_size) { return -ENODEV; } - -static inline bool qcom_scm_hdcp_available(void) { return false; } -static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, - u32 *resp) { return -ENODEV; } - -static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en) - { return -ENODEV; } - -static inline int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, - u64 limit_node, u32 node_id, u64 version) - { return -ENODEV; } - -static inline int qcom_scm_lmh_profile_change(u32 profile_id) { return -ENODEV; } - -static inline bool qcom_scm_lmh_dcvsh_available(void) { return -ENODEV; } -#endif #endif From c2e99d47973796c3fafd13079337dcadecd49d8a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 26 Sep 2021 21:55:02 +0900 Subject: [PATCH 139/235] ksmbd: check strictly data area in ksmbd_smb2_check_message() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When invalid data offset and data length in request, ksmbd_smb2_check_message check strictly and doesn't allow to process such requests. Cc: Tom Talpey Cc: Ronnie Sahlberg Cc: Ralph Böhme Acked-by: Hyunchul Lee Reviewed-by: Ralph Boehme Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2misc.c | 98 ++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 51 deletions(-) diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c index 9aa46bb3e10d..9edd9c161b27 100644 --- a/fs/ksmbd/smb2misc.c +++ b/fs/ksmbd/smb2misc.c @@ -80,18 +80,21 @@ static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = { }; /* - * Returns the pointer to the beginning of the data area. Length of the data - * area and the offset to it (from the beginning of the smb are also returned. + * Set length of the data area and the offset to arguments. + * if they are invalid, return error. */ -static char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) +static int smb2_get_data_area_len(unsigned int *off, unsigned int *len, + struct smb2_hdr *hdr) { + int ret = 0; + *off = 0; *len = 0; /* error reqeusts do not have data area */ if (hdr->Status && hdr->Status != STATUS_MORE_PROCESSING_REQUIRED && (((struct smb2_err_rsp *)hdr)->StructureSize) == SMB2_ERROR_STRUCTURE_SIZE2_LE) - return NULL; + return ret; /* * Following commands have data areas so we have to get the location @@ -165,69 +168,60 @@ static char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) case SMB2_IOCTL: *off = le32_to_cpu(((struct smb2_ioctl_req *)hdr)->InputOffset); *len = le32_to_cpu(((struct smb2_ioctl_req *)hdr)->InputCount); - break; default: ksmbd_debug(SMB, "no length check for command\n"); break; } - /* - * Invalid length or offset probably means data area is invalid, but - * we have little choice but to ignore the data area in this case. - */ if (*off > 4096) { - ksmbd_debug(SMB, "offset %d too large, data area ignored\n", - *off); - *len = 0; - *off = 0; - } else if (*off < 0) { - ksmbd_debug(SMB, - "negative offset %d to data invalid ignore data area\n", - *off); - *off = 0; - *len = 0; - } else if (*len < 0) { - ksmbd_debug(SMB, - "negative data length %d invalid, data area ignored\n", - *len); - *len = 0; - } else if (*len > 128 * 1024) { - ksmbd_debug(SMB, "data area larger than 128K: %d\n", *len); - *len = 0; + ksmbd_debug(SMB, "offset %d too large\n", *off); + ret = -EINVAL; + } else if ((u64)*off + *len > MAX_STREAM_PROT_LEN) { + ksmbd_debug(SMB, "Request is larger than maximum stream protocol length(%u): %llu\n", + MAX_STREAM_PROT_LEN, (u64)*off + *len); + ret = -EINVAL; } - /* return pointer to beginning of data area, ie offset from SMB start */ - if ((*off != 0) && (*len != 0)) - return (char *)hdr + *off; - else - return NULL; + return ret; } /* * Calculate the size of the SMB message based on the fixed header * portion, the number of word parameters and the data portion of the message. */ -static unsigned int smb2_calc_size(void *buf) +static int smb2_calc_size(void *buf, unsigned int *len) { struct smb2_pdu *pdu = (struct smb2_pdu *)buf; struct smb2_hdr *hdr = &pdu->hdr; - int offset; /* the offset from the beginning of SMB to data area */ - int data_length; /* the length of the variable length data area */ + unsigned int offset; /* the offset from the beginning of SMB to data area */ + unsigned int data_length; /* the length of the variable length data area */ + int ret; + /* Structure Size has already been checked to make sure it is 64 */ - int len = le16_to_cpu(hdr->StructureSize); + *len = le16_to_cpu(hdr->StructureSize); /* * StructureSize2, ie length of fixed parameter area has already * been checked to make sure it is the correct length. */ - len += le16_to_cpu(pdu->StructureSize2); + *len += le16_to_cpu(pdu->StructureSize2); + /* + * StructureSize2 of smb2_lock pdu is set to 48, indicating + * the size of smb2 lock request with single smb2_lock_element + * regardless of number of locks. Subtract single + * smb2_lock_element for correct buffer size check. + */ + if (hdr->Command == SMB2_LOCK) + *len -= sizeof(struct smb2_lock_element); if (has_smb2_data_area[le16_to_cpu(hdr->Command)] == false) goto calc_size_exit; - smb2_get_data_area_len(&offset, &data_length, hdr); - ksmbd_debug(SMB, "SMB2 data length %d offset %d\n", data_length, + ret = smb2_get_data_area_len(&offset, &data_length, hdr); + if (ret) + return ret; + ksmbd_debug(SMB, "SMB2 data length %u offset %u\n", data_length, offset); if (data_length > 0) { @@ -237,16 +231,19 @@ static unsigned int smb2_calc_size(void *buf) * for some commands, typically those with odd StructureSize, * so we must add one to the calculation. */ - if (offset + 1 < len) + if (offset + 1 < *len) { ksmbd_debug(SMB, - "data area offset %d overlaps SMB2 header %d\n", - offset + 1, len); - else - len = offset + data_length; + "data area offset %d overlaps SMB2 header %u\n", + offset + 1, *len); + return -EINVAL; + } + + *len = offset + data_length; } + calc_size_exit: - ksmbd_debug(SMB, "SMB2 len %d\n", len); - return len; + ksmbd_debug(SMB, "SMB2 len %u\n", *len); + return 0; } static inline int smb2_query_info_req_len(struct smb2_query_info_req *h) @@ -391,9 +388,11 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) return 1; } - clc_len = smb2_calc_size(hdr); + if (smb2_calc_size(hdr, &clc_len)) + return 1; + if (len != clc_len) { - /* server can return one byte more due to implied bcc[0] */ + /* client can return one byte more due to implied bcc[0] */ if (clc_len == len + 1) return 0; @@ -418,9 +417,6 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) return 0; } - if (command == SMB2_LOCK_HE && len == 88) - return 0; - ksmbd_debug(SMB, "cli req too short, len %d not %d. cmd:%d mid:%llu\n", len, clc_len, command, From 51a1387393d98c2ba52d53d089720fa9f1463178 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 29 Sep 2021 13:09:24 +0900 Subject: [PATCH 140/235] ksmbd: remove the leftover of smb2.0 dialect support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although ksmbd doesn't send SMB2.0 support in supported dialect list of smb negotiate response, There is the leftover of smb2.0 dialect. This patch remove it not to support SMB2.0 in ksmbd. Cc: Tom Talpey Cc: Ronnie Sahlberg Cc: Ralph Böhme Cc: Hyunchul Lee Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2ops.c | 5 ----- fs/ksmbd/smb2pdu.c | 34 +++++++++------------------------- fs/ksmbd/smb2pdu.h | 1 - fs/ksmbd/smb_common.c | 6 +++--- 4 files changed, 12 insertions(+), 34 deletions(-) diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c index 197473871aa4..b06456eb587b 100644 --- a/fs/ksmbd/smb2ops.c +++ b/fs/ksmbd/smb2ops.c @@ -187,11 +187,6 @@ static struct smb_version_cmds smb2_0_server_cmds[NUMBER_OF_SMB2_COMMANDS] = { [SMB2_CHANGE_NOTIFY_HE] = { .proc = smb2_notify}, }; -int init_smb2_0_server(struct ksmbd_conn *conn) -{ - return -EOPNOTSUPP; -} - /** * init_smb2_1_server() - initialize a smb server connection with smb2.1 * command dispatcher diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index dcf907738610..cd2313af811d 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -236,9 +236,6 @@ int init_smb2_neg_rsp(struct ksmbd_work *work) if (conn->need_neg == false) return -EINVAL; - if (!(conn->dialect >= SMB20_PROT_ID && - conn->dialect <= SMB311_PROT_ID)) - return -EINVAL; rsp_hdr = work->response_buf; @@ -1166,13 +1163,6 @@ int smb2_handle_negotiate(struct ksmbd_work *work) case SMB21_PROT_ID: init_smb2_1_server(conn); break; - case SMB20_PROT_ID: - rc = init_smb2_0_server(conn); - if (rc) { - rsp->hdr.Status = STATUS_NOT_SUPPORTED; - goto err_out; - } - break; case SMB2X_PROT_ID: case BAD_PROT_ID: default: @@ -1191,11 +1181,9 @@ int smb2_handle_negotiate(struct ksmbd_work *work) rsp->MaxReadSize = cpu_to_le32(conn->vals->max_read_size); rsp->MaxWriteSize = cpu_to_le32(conn->vals->max_write_size); - if (conn->dialect > SMB20_PROT_ID) { - memcpy(conn->ClientGUID, req->ClientGUID, - SMB2_CLIENT_GUID_SIZE); - conn->cli_sec_mode = le16_to_cpu(req->SecurityMode); - } + memcpy(conn->ClientGUID, req->ClientGUID, + SMB2_CLIENT_GUID_SIZE); + conn->cli_sec_mode = le16_to_cpu(req->SecurityMode); rsp->StructureSize = cpu_to_le16(65); rsp->DialectRevision = cpu_to_le16(conn->dialect); @@ -1537,11 +1525,9 @@ static int ntlm_authenticate(struct ksmbd_work *work) } } - if (conn->dialect > SMB20_PROT_ID) { - if (!ksmbd_conn_lookup_dialect(conn)) { - pr_err("fail to verify the dialect\n"); - return -ENOENT; - } + if (!ksmbd_conn_lookup_dialect(conn)) { + pr_err("fail to verify the dialect\n"); + return -ENOENT; } return 0; } @@ -1623,11 +1609,9 @@ static int krb5_authenticate(struct ksmbd_work *work) } } - if (conn->dialect > SMB20_PROT_ID) { - if (!ksmbd_conn_lookup_dialect(conn)) { - pr_err("fail to verify the dialect\n"); - return -ENOENT; - } + if (!ksmbd_conn_lookup_dialect(conn)) { + pr_err("fail to verify the dialect\n"); + return -ENOENT; } return 0; } diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h index 261825d06391..a6dec5ec6a54 100644 --- a/fs/ksmbd/smb2pdu.h +++ b/fs/ksmbd/smb2pdu.h @@ -1637,7 +1637,6 @@ struct smb2_posix_info { } __packed; /* functions */ -int init_smb2_0_server(struct ksmbd_conn *conn); void init_smb2_1_server(struct ksmbd_conn *conn); void init_smb3_0_server(struct ksmbd_conn *conn); void init_smb3_02_server(struct ksmbd_conn *conn); diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index b6c4c7e960fa..707490ab1f4c 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -88,7 +88,7 @@ unsigned int ksmbd_server_side_copy_max_total_size(void) inline int ksmbd_min_protocol(void) { - return SMB2_PROT; + return SMB21_PROT; } inline int ksmbd_max_protocol(void) @@ -427,7 +427,7 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname, static int __smb2_negotiate(struct ksmbd_conn *conn) { - return (conn->dialect >= SMB20_PROT_ID && + return (conn->dialect >= SMB21_PROT_ID && conn->dialect <= SMB311_PROT_ID); } @@ -457,7 +457,7 @@ int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command) } } - if (command == SMB2_NEGOTIATE_HE) { + if (command == SMB2_NEGOTIATE_HE && __smb2_negotiate(conn)) { ret = smb2_handle_negotiate(work); init_smb2_neg_rsp(work); return ret; From c7705eec78c999485609274c7ac5c27def8b84f1 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 4 Oct 2021 20:44:52 +0900 Subject: [PATCH 141/235] ksmbd: use buf_data_size instead of recalculation in smb3_decrypt_req() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tom suggested to use buf_data_size that is already calculated, to verify these offsets. Cc: Tom Talpey Cc: Ronnie Sahlberg Cc: Ralph Böhme Suggested-by: Tom Talpey Acked-by: Hyunchul Lee Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index cd2313af811d..721b9a89c2d4 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -8395,20 +8395,18 @@ int smb3_decrypt_req(struct ksmbd_work *work) struct smb2_hdr *hdr; unsigned int pdu_length = get_rfc1002_len(buf); struct kvec iov[2]; - unsigned int buf_data_size = pdu_length + 4 - + int buf_data_size = pdu_length + 4 - sizeof(struct smb2_transform_hdr); struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf; int rc = 0; - if (pdu_length + 4 < - sizeof(struct smb2_transform_hdr) + sizeof(struct smb2_hdr)) { + if (buf_data_size < sizeof(struct smb2_hdr)) { pr_err("Transform message is too small (%u)\n", pdu_length); return -ECONNABORTED; } - if (pdu_length + 4 < - le32_to_cpu(tr_hdr->OriginalMessageSize) + sizeof(struct smb2_transform_hdr)) { + if (buf_data_size < le32_to_cpu(tr_hdr->OriginalMessageSize)) { pr_err("Transform message is broken\n"); return -ECONNABORTED; } From 2db72604f3eaebd6175548bf64372e163724ebe3 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 1 Oct 2021 11:53:49 +0900 Subject: [PATCH 142/235] ksmbd: fix version mismatch with out of tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix version mismatch with out of tree, This updated version will be matched with ksmbd-tools. Cc: Tom Talpey Cc: Ronnie Sahlberg Cc: Ralph Böhme Cc: Steve French Cc: Hyunchul Lee Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/glob.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ksmbd/glob.h b/fs/ksmbd/glob.h index 49a5a3afa118..5b8f3e0ebdb3 100644 --- a/fs/ksmbd/glob.h +++ b/fs/ksmbd/glob.h @@ -12,7 +12,7 @@ #include "unicode.h" #include "vfs_cache.h" -#define KSMBD_VERSION "3.1.9" +#define KSMBD_VERSION "3.4.2" extern int ksmbd_debug_types; From 64e7875560270b8f669fca9fcd6a689fea56fbeb Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 3 Oct 2021 13:19:00 +0900 Subject: [PATCH 143/235] ksmbd: fix oops from fuse driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marios reported kernel oops from fuse driver when ksmbd call mark_inode_dirty(). This patch directly update ->i_ctime after removing mark_inode_ditry() and notify_change will put inode to dirty list. Cc: Tom Talpey Cc: Ronnie Sahlberg Cc: Ralph Böhme Cc: Hyunchul Lee Reported-by: Marios Makassikis Tested-by: Marios Makassikis Acked-by: Hyunchul Lee Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 721b9a89c2d4..005aa93a49d6 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -5483,7 +5483,6 @@ static int set_file_basic_info(struct ksmbd_file *fp, struct ksmbd_share_config *share) { struct iattr attrs; - struct timespec64 ctime; struct file *filp; struct inode *inode; struct user_namespace *user_ns; @@ -5505,13 +5504,11 @@ static int set_file_basic_info(struct ksmbd_file *fp, attrs.ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); } - if (file_info->ChangeTime) { + attrs.ia_valid |= ATTR_CTIME; + if (file_info->ChangeTime) attrs.ia_ctime = ksmbd_NTtimeToUnix(file_info->ChangeTime); - ctime = attrs.ia_ctime; - attrs.ia_valid |= ATTR_CTIME; - } else { - ctime = inode->i_ctime; - } + else + attrs.ia_ctime = inode->i_ctime; if (file_info->LastWriteTime) { attrs.ia_mtime = ksmbd_NTtimeToUnix(file_info->LastWriteTime); @@ -5557,11 +5554,9 @@ static int set_file_basic_info(struct ksmbd_file *fp, return -EACCES; inode_lock(inode); + inode->i_ctime = attrs.ia_ctime; + attrs.ia_valid &= ~ATTR_CTIME; rc = notify_change(user_ns, dentry, &attrs, NULL); - if (!rc) { - inode->i_ctime = ctime; - mark_inode_dirty(inode); - } inode_unlock(inode); } return rc; From 3fb937f441c64af1eec60bfd3732f64001fcc534 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 1 Oct 2021 15:58:10 +0200 Subject: [PATCH 144/235] PCI: ACPI: Check parent pointer in acpi_pci_find_companion() If acpi_pci_find_companion() is called for a device whose parent pointer is NULL, it will crash when attempting to get the ACPI companion of the parent due to a NULL pointer dereference in the ACPI_COMPANION() macro. This was not a problem before commit 375553a93201 ("PCI: Setup ACPI fwnode early and at the same time with OF") that made pci_setup_device() call pci_set_acpi_fwnode() and so it allowed devices with NULL parent pointers to be passed to acpi_pci_find_companion() which is the case in pci_iov_add_virtfn(), for instance. Fix this issue by making acpi_pci_find_companion() check the device's parent pointer upfront and bail out if it is NULL. While pci_iov_add_virtfn() can be changed to set the device's parent pointer before calling pci_setup_device() for it, checking pointers against NULL before dereferencing them is prudent anyway and looking for ACPI companions of virtual functions isn't really useful. Fixes: 375553a93201 ("PCI: Setup ACPI fwnode early and at the same time with OF") Link: https://lore.kernel.org/linux-acpi/8e4bbd5c59de31db71f718556654c0aa077df03d.camel@linux.ibm.com/ Reported-by: Niklas Schnelle Tested-by: Niklas Schnelle Signed-off-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas --- drivers/pci/pci-acpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 0f40943a9a18..260a06fb78a6 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -1249,6 +1249,9 @@ static struct acpi_device *acpi_pci_find_companion(struct device *dev) bool check_children; u64 addr; + if (!dev->parent) + return NULL; + down_read(&pci_acpi_companion_lookup_sem); adev = pci_acpi_find_companion_hook ? From 19ea40dddf1833db868533958ca066f368862211 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 14 Sep 2021 14:57:59 +0800 Subject: [PATCH 145/235] btrfs: unlock newly allocated extent buffer after error [BUG] There is a bug report that injected ENOMEM error could leave a tree block locked while we return to user-space: BTRFS info (device loop0): enabling ssd optimizations FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 0 CPU: 0 PID: 7579 Comm: syz-executor Not tainted 5.15.0-rc1 #16 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x8d/0xcf lib/dump_stack.c:106 fail_dump lib/fault-inject.c:52 [inline] should_fail+0x13c/0x160 lib/fault-inject.c:146 should_failslab+0x5/0x10 mm/slab_common.c:1328 slab_pre_alloc_hook.constprop.99+0x4e/0xc0 mm/slab.h:494 slab_alloc_node mm/slub.c:3120 [inline] slab_alloc mm/slub.c:3214 [inline] kmem_cache_alloc+0x44/0x280 mm/slub.c:3219 btrfs_alloc_delayed_extent_op fs/btrfs/delayed-ref.h:299 [inline] btrfs_alloc_tree_block+0x38c/0x670 fs/btrfs/extent-tree.c:4833 __btrfs_cow_block+0x16f/0x7d0 fs/btrfs/ctree.c:415 btrfs_cow_block+0x12a/0x300 fs/btrfs/ctree.c:570 btrfs_search_slot+0x6b0/0xee0 fs/btrfs/ctree.c:1768 btrfs_insert_empty_items+0x80/0xf0 fs/btrfs/ctree.c:3905 btrfs_new_inode+0x311/0xa60 fs/btrfs/inode.c:6530 btrfs_create+0x12b/0x270 fs/btrfs/inode.c:6783 lookup_open+0x660/0x780 fs/namei.c:3282 open_last_lookups fs/namei.c:3352 [inline] path_openat+0x465/0xe20 fs/namei.c:3557 do_filp_open+0xe3/0x170 fs/namei.c:3588 do_sys_openat2+0x357/0x4a0 fs/open.c:1200 do_sys_open+0x87/0xd0 fs/open.c:1216 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x34/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x46ae99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f46711b9c48 EFLAGS: 00000246 ORIG_RAX: 0000000000000055 RAX: ffffffffffffffda RBX: 000000000078c0a0 RCX: 000000000046ae99 RDX: 0000000000000000 RSI: 00000000000000a1 RDI: 0000000020005800 RBP: 00007f46711b9c80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000017 R13: 0000000000000000 R14: 000000000078c0a0 R15: 00007ffc129da6e0 ================================================ WARNING: lock held when returning to user space! 5.15.0-rc1 #16 Not tainted ------------------------------------------------ syz-executor/7579 is leaving the kernel with locks still held! 1 lock held by syz-executor/7579: #0: ffff888104b73da8 (btrfs-tree-01/1){+.+.}-{3:3}, at: __btrfs_tree_lock+0x2e/0x1a0 fs/btrfs/locking.c:112 [CAUSE] In btrfs_alloc_tree_block(), after btrfs_init_new_buffer(), the new extent buffer @buf is locked, but if later operations like adding delayed tree ref fail, we just free @buf without unlocking it, resulting above warning. [FIX] Unlock @buf in out_free_buf: label. Reported-by: Hao Sun Link: https://lore.kernel.org/linux-btrfs/CACkBjsZ9O6Zr0KK1yGn=1rQi6Crh1yeCRdTSBxx9R99L4xdn-Q@mail.gmail.com/ CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fc3da7585fb7..0ab456cb4bf8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4859,6 +4859,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, out_free_delayed: btrfs_free_delayed_extent_op(extent_op); out_free_buf: + btrfs_tree_unlock(buf); free_extent_buffer(buf); out_free_reserved: btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0); From d175209be04d7d263fa1a54cde7608c706c9d0d7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 1 Oct 2021 13:57:18 -0400 Subject: [PATCH 146/235] btrfs: update refs for any root except tree log roots I hit a stuck relocation on btrfs/061 during my overnight testing. This turned out to be because we had left over extent entries in our extent root for a data reloc inode that no longer existed. This happened because in btrfs_drop_extents() we only update refs if we have SHAREABLE set or we are the tree_root. This regression was introduced by aeb935a45581 ("btrfs: don't set SHAREABLE flag for data reloc tree") where we stopped setting SHAREABLE for the data reloc tree. The problem here is we actually do want to update extent references for data extents in the data reloc tree, in fact we only don't want to update extent references if the file extents are in the log tree. Update this check to only skip updating references in the case of the log tree. This is relatively rare, because you have to be running scrub at the same time, which is what btrfs/061 does. The data reloc inode has its extents pre-allocated, and then we copy the extent into the pre-allocated chunks. We theoretically should never be calling btrfs_drop_extents() on a data reloc inode. The exception of course is with scrub, if our pre-allocated extent falls inside of the block group we are scrubbing, then the block group will be marked read only and we will be forced to cow that extent. This means we will call btrfs_drop_extents() on that range when we COW that file extent. This isn't really problematic if we do this, the data reloc inode requires that our extent lengths match exactly with the extent we are copying, thankfully we validate the extent is correct with get_new_location(), so if we happen to COW only part of the extent we won't link it in when we do the relocation, so we are safe from any other shenanigans that arise because of this interaction with scrub. Fixes: aeb935a45581 ("btrfs: don't set SHAREABLE flag for data reloc tree") CC: stable@vger.kernel.org # 5.8+ Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7ff577005d0f..a949cb2894f4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -734,8 +734,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, if (args->start >= inode->disk_i_size && !args->replace_extent) modify_tree = 0; - update_refs = (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) || - root == fs_info->tree_root); + update_refs = (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID); while (1) { recow = 0; ret = btrfs_lookup_file_extent(trans, root, path, ino, From 77a5b9e3d14cbce49ceed2766b2003c034c066dc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 1 Oct 2021 13:52:30 +0100 Subject: [PATCH 147/235] btrfs: deal with errors when checking if a dir entry exists during log replay Currently inode_in_dir() ignores errors returned from btrfs_lookup_dir_index_item() and from btrfs_lookup_dir_item(), treating any errors as if the directory entry does not exists in the fs/subvolume tree, which is obviously not correct, as we can get errors such as -EIO when reading extent buffers while searching the fs/subvolume's tree. Fix that by making inode_in_dir() return the errors and making its only caller, add_inode_ref(), deal with returned errors as well. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 47 ++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f7efc26aa82a..491d4ba22cfc 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -939,9 +939,11 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, } /* - * helper function to see if a given name and sequence number found - * in an inode back reference are already in a directory and correctly - * point to this inode + * See if a given name and sequence number found in an inode back reference are + * already in a directory and correctly point to this inode. + * + * Returns: < 0 on error, 0 if the directory entry does not exists and 1 if it + * exists. */ static noinline int inode_in_dir(struct btrfs_root *root, struct btrfs_path *path, @@ -950,29 +952,35 @@ static noinline int inode_in_dir(struct btrfs_root *root, { struct btrfs_dir_item *di; struct btrfs_key location; - int match = 0; + int ret = 0; di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, index, name, name_len, 0); - if (di && !IS_ERR(di)) { + if (IS_ERR(di)) { + if (PTR_ERR(di) != -ENOENT) + ret = PTR_ERR(di); + goto out; + } else if (di) { btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); if (location.objectid != objectid) goto out; - } else + } else { goto out; - btrfs_release_path(path); + } + btrfs_release_path(path); di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); - if (di && !IS_ERR(di)) { - btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); - if (location.objectid != objectid) - goto out; - } else + if (IS_ERR(di)) { + ret = PTR_ERR(di); goto out; - match = 1; + } else if (di) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); + if (location.objectid == objectid) + ret = 1; + } out: btrfs_release_path(path); - return match; + return ret; } /* @@ -1517,10 +1525,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, if (ret) goto out; - /* if we already have a perfect match, we're done */ - if (!inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)), - btrfs_ino(BTRFS_I(inode)), ref_index, - name, namelen)) { + ret = inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)), + btrfs_ino(BTRFS_I(inode)), ref_index, + name, namelen); + if (ret < 0) { + goto out; + } else if (ret == 0) { /* * look for a conflicting back reference in the * metadata. if we find one we have to unlink that name @@ -1580,6 +1590,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, if (ret) goto out; } + /* Else, ret == 1, we already have a perfect match, we're done. */ ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; kfree(name); From e15ac6413745e3def00e663de00aea5a717311c1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 1 Oct 2021 13:52:31 +0100 Subject: [PATCH 148/235] btrfs: deal with errors when replaying dir entry during log replay At replay_one_one(), we are treating any error returned from btrfs_lookup_dir_item() or from btrfs_lookup_dir_index_item() as meaning that there is no existing directory entry in the fs/subvolume tree. This is not correct since we can get errors such as, for example, -EIO when reading extent buffers while searching the fs/subvolume's btree. So fix that and return the error to the caller when it is not -ENOENT. CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 491d4ba22cfc..007b5051b2f1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1988,7 +1988,14 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, ret = -EINVAL; goto out; } - if (IS_ERR_OR_NULL(dst_di)) { + + if (dst_di == ERR_PTR(-ENOENT)) + dst_di = NULL; + + if (IS_ERR(dst_di)) { + ret = PTR_ERR(dst_di); + goto out; + } else if (!dst_di) { /* we need a sequence number to insert, so we only * do inserts for the BTRFS_DIR_INDEX_KEY types */ From 52db77791fe24538c8aa2a183248399715f6b380 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 1 Oct 2021 13:52:32 +0100 Subject: [PATCH 149/235] btrfs: deal with errors when adding inode reference during log replay At __inode_add_ref(), we treating any error returned from btrfs_lookup_dir_item() or from btrfs_lookup_dir_index_item() as meaning that there is no existing directory entry in the fs/subvolume tree. This is not correct since we can get errors such as, for example, -EIO when reading extent buffers while searching the fs/subvolume's btree. So fix that and return the error to the caller when it is not -ENOENT. CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 007b5051b2f1..1b6008240ec6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1190,7 +1190,10 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, /* look for a conflicting sequence number */ di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), ref_index, name, namelen, 0); - if (di && !IS_ERR(di)) { + if (IS_ERR(di)) { + if (PTR_ERR(di) != -ENOENT) + return PTR_ERR(di); + } else if (di) { ret = drop_one_dir_item(trans, root, path, dir, di); if (ret) return ret; @@ -1200,7 +1203,9 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, /* look for a conflicting name */ di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), name, namelen, 0); - if (di && !IS_ERR(di)) { + if (IS_ERR(di)) { + return PTR_ERR(di); + } else if (di) { ret = drop_one_dir_item(trans, root, path, dir, di); if (ret) return ret; From 8dcbc26194eb872cc3430550fb70bb461424d267 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 1 Oct 2021 13:52:33 +0100 Subject: [PATCH 150/235] btrfs: unify lookup return value when dir entry is missing btrfs_lookup_dir_index_item() and btrfs_lookup_dir_item() lookup for dir entries and both are used during log replay or when updating a log tree during an unlink. However when the dir item does not exists, btrfs_lookup_dir_item() returns NULL while btrfs_lookup_dir_index_item() returns PTR_ERR(-ENOENT), and if the dir item exists but there is no matching entry for a given name or index, both return NULL. This makes the call sites during log replay to be more verbose than necessary and it makes it easy to miss this slight difference. Since we don't need to distinguish between those two cases, make btrfs_lookup_dir_index_item() always return NULL when there is no matching directory entry - either because there isn't any dir entry or because there is one but it does not match the given name and index. Also rename the argument 'objectid' of btrfs_lookup_dir_index_item() to 'index' since it is supposed to match an index number, and the name 'objectid' is not very good because it can easily be confused with an inode number (like the inode number a dir entry points to). CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 +- fs/btrfs/dir-item.c | 48 ++++++++++++++++++++++++++++++++++----------- fs/btrfs/tree-log.c | 14 ++++--------- 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f07c82fafa04..852a49dcc22b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3030,7 +3030,7 @@ struct btrfs_dir_item * btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, - u64 objectid, const char *name, int name_len, + u64 index, const char *name, int name_len, int mod); struct btrfs_dir_item * btrfs_search_dir_index_item(struct btrfs_root *root, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index f1274d5c3805..7721ce0c0604 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -190,9 +190,20 @@ static struct btrfs_dir_item *btrfs_lookup_match_dir( } /* - * lookup a directory item based on name. 'dir' is the objectid - * we're searching in, and 'mod' tells us if you plan on deleting the - * item (use mod < 0) or changing the options (use mod > 0) + * Lookup for a directory item by name. + * + * @trans: The transaction handle to use. Can be NULL if @mod is 0. + * @root: The root of the target tree. + * @path: Path to use for the search. + * @dir: The inode number (objectid) of the directory. + * @name: The name associated to the directory entry we are looking for. + * @name_len: The length of the name. + * @mod: Used to indicate if the tree search is meant for a read only + * lookup, for a modification lookup or for a deletion lookup, so + * its value should be 0, 1 or -1, respectively. + * + * Returns: NULL if the dir item does not exists, an error pointer if an error + * happened, or a pointer to a dir item if a dir item exists for the given name. */ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -273,27 +284,42 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, } /* - * lookup a directory item based on index. 'dir' is the objectid - * we're searching in, and 'mod' tells us if you plan on deleting the - * item (use mod < 0) or changing the options (use mod > 0) + * Lookup for a directory index item by name and index number. * - * The name is used to make sure the index really points to the name you were - * looking for. + * @trans: The transaction handle to use. Can be NULL if @mod is 0. + * @root: The root of the target tree. + * @path: Path to use for the search. + * @dir: The inode number (objectid) of the directory. + * @index: The index number. + * @name: The name associated to the directory entry we are looking for. + * @name_len: The length of the name. + * @mod: Used to indicate if the tree search is meant for a read only + * lookup, for a modification lookup or for a deletion lookup, so + * its value should be 0, 1 or -1, respectively. + * + * Returns: NULL if the dir index item does not exists, an error pointer if an + * error happened, or a pointer to a dir item if the dir index item exists and + * matches the criteria (name and index number). */ struct btrfs_dir_item * btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, - u64 objectid, const char *name, int name_len, + u64 index, const char *name, int name_len, int mod) { + struct btrfs_dir_item *di; struct btrfs_key key; key.objectid = dir; key.type = BTRFS_DIR_INDEX_KEY; - key.offset = objectid; + key.offset = index; - return btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod); + di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod); + if (di == ERR_PTR(-ENOENT)) + return NULL; + + return di; } struct btrfs_dir_item * diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1b6008240ec6..1bb5ebf13383 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -957,8 +957,7 @@ static noinline int inode_in_dir(struct btrfs_root *root, di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, index, name, name_len, 0); if (IS_ERR(di)) { - if (PTR_ERR(di) != -ENOENT) - ret = PTR_ERR(di); + ret = PTR_ERR(di); goto out; } else if (di) { btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); @@ -1191,8 +1190,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), ref_index, name, namelen, 0); if (IS_ERR(di)) { - if (PTR_ERR(di) != -ENOENT) - return PTR_ERR(di); + return PTR_ERR(di); } else if (di) { ret = drop_one_dir_item(trans, root, path, dir, di); if (ret) @@ -1994,9 +1992,6 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, goto out; } - if (dst_di == ERR_PTR(-ENOENT)) - dst_di = NULL; - if (IS_ERR(dst_di)) { ret = PTR_ERR(dst_di); goto out; @@ -2304,7 +2299,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, dir_key->offset, name, name_len, 0); } - if (!log_di || log_di == ERR_PTR(-ENOENT)) { + if (!log_di) { btrfs_dir_item_key_to_cpu(eb, di, &location); btrfs_release_path(path); btrfs_release_path(log_path); @@ -3563,8 +3558,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, if (err == -ENOSPC) { btrfs_set_log_full_commit(trans); err = 0; - } else if (err < 0 && err != -ENOENT) { - /* ENOENT can be returned if the entry hasn't been fsynced yet */ + } else if (err < 0) { btrfs_abort_transaction(trans, err); } From cfd312695b71df04c3a2597859ff12c470d1e2e4 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 1 Oct 2021 13:48:18 +0100 Subject: [PATCH 151/235] btrfs: check for error when looking up inode during dir entry replay At replay_one_name(), we are treating any error from btrfs_lookup_inode() as if the inode does not exists. Fix this by checking for an error and returning it to the caller. CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1bb5ebf13383..b415c5ec03ea 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1950,8 +1950,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, struct btrfs_key log_key; struct inode *dir; u8 log_type; - int exists; - int ret = 0; + bool exists; + int ret; bool update_size = (key->type == BTRFS_DIR_INDEX_KEY); bool name_added = false; @@ -1971,12 +1971,12 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, name_len); btrfs_dir_item_key_to_cpu(eb, di, &log_key); - exists = btrfs_lookup_inode(trans, root, path, &log_key, 0); - if (exists == 0) - exists = 1; - else - exists = 0; + ret = btrfs_lookup_inode(trans, root, path, &log_key, 0); btrfs_release_path(path); + if (ret < 0) + goto out; + exists = (ret == 0); + ret = 0; if (key->type == BTRFS_DIR_ITEM_KEY) { dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, From 4afb912f439c4bc4e6a4f3e7547f2e69e354108f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Oct 2021 16:35:27 -0400 Subject: [PATCH 152/235] btrfs: fix abort logic in btrfs_replace_file_extents Error injection testing uncovered a case where we'd end up with a corrupt file system with a missing extent in the middle of a file. This occurs because the if statement to decide if we should abort is wrong. The only way we would abort in this case is if we got a ret != -EOPNOTSUPP and we called from the file clone code. However the prealloc code uses this path too. Instead we need to abort if there is an error, and the only error we _don't_ abort on is -EOPNOTSUPP and only if we came from the clone file code. CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Nikolay Borisov Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a949cb2894f4..a1762363f61f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2703,14 +2703,16 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, drop_args.bytes_found); if (ret != -ENOSPC) { /* - * When cloning we want to avoid transaction aborts when - * nothing was done and we are attempting to clone parts - * of inline extents, in such cases -EOPNOTSUPP is - * returned by __btrfs_drop_extents() without having - * changed anything in the file. + * The only time we don't want to abort is if we are + * attempting to clone a partial inline extent, in which + * case we'll get EOPNOTSUPP. However if we aren't + * clone we need to abort no matter what, because if we + * got EOPNOTSUPP via prealloc then we messed up and + * need to abort. */ - if (extent_info && !extent_info->is_new_extent && - ret && ret != -EOPNOTSUPP) + if (ret && + (ret != -EOPNOTSUPP || + (extent_info && extent_info->is_new_extent))) btrfs_abort_transaction(trans, ret); break; } From 3ef6ca4f354c53abf263cbeb51e7272523c294d8 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Sat, 2 Oct 2021 16:57:13 -0700 Subject: [PATCH 153/235] checksyscalls: Unconditionally ignore fstat{,at}64 These can be replaced by statx(). Since rv32 has a 64-bit time_t we just never ended up with them in the first place. This is now an error due to -Werror. Suggested-by: Arnd Bergmann Reviewed-by: Arnd Bergmann Signed-off-by: Palmer Dabbelt --- scripts/checksyscalls.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh index fd9777f63f14..9dbab13329fa 100755 --- a/scripts/checksyscalls.sh +++ b/scripts/checksyscalls.sh @@ -82,10 +82,8 @@ cat << EOF #define __IGNORE_truncate64 #define __IGNORE_stat64 #define __IGNORE_lstat64 -#define __IGNORE_fstat64 #define __IGNORE_fcntl64 #define __IGNORE_fadvise64_64 -#define __IGNORE_fstatat64 #define __IGNORE_fstatfs64 #define __IGNORE_statfs64 #define __IGNORE_llseek @@ -253,6 +251,10 @@ cat << EOF #define __IGNORE_getpmsg #define __IGNORE_putpmsg #define __IGNORE_vserver + +/* 64-bit ports never needed these, and new 32-bit ports can use statx */ +#define __IGNORE_fstat64 +#define __IGNORE_fstatat64 EOF } From d298b03506d3e161f7492c440babb0bfae35e650 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 6 Oct 2021 18:33:52 +0200 Subject: [PATCH 154/235] x86/fpu: Restore the masking out of reserved MXCSR bits Ser Olmy reported a boot failure: init[1] bad frame in sigreturn frame:(ptrval) ip:b7c9fbe6 sp:bf933310 orax:ffffffff \ in libc-2.33.so[b7bed000+156000] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b CPU: 0 PID: 1 Comm: init Tainted: G W 5.14.9 #1 Hardware name: Hewlett-Packard HP PC/HP Board, BIOS JD.00.06 12/06/2001 Call Trace: dump_stack_lvl dump_stack panic do_exit.cold do_group_exit get_signal arch_do_signal_or_restart ? force_sig_info_to_task ? force_sig exit_to_user_mode_prepare syscall_exit_to_user_mode do_int80_syscall_32 entry_INT80_32 on an old 32-bit Intel CPU: vendor_id : GenuineIntel cpu family : 6 model : 6 model name : Celeron (Mendocino) stepping : 5 microcode : 0x3 Ser bisected the problem to the commit in Fixes. tglx suggested reverting the rejection of invalid MXCSR values which this commit introduced and replacing it with what the old code did - simply masking them out to zero. Further debugging confirmed his suggestion: fpu->state.fxsave.mxcsr: 0xb7be13b4, mxcsr_feature_mask: 0xffbf WARNING: CPU: 0 PID: 1 at arch/x86/kernel/fpu/signal.c:384 __fpu_restore_sig+0x51f/0x540 so restore the original behavior only for 32-bit kernels where you have ancient machines with buggy hardware. For 32-bit programs on 64-bit kernels, user space which supplies wrong MXCSR values is considered malicious so fail the sigframe restoration there. Fixes: 6f9866a166cd ("x86/fpu/signal: Let xrstor handle the features to init") Reported-by: Ser Olmy Signed-off-by: Borislav Petkov Tested-by: Ser Olmy Cc: Link: https://lkml.kernel.org/r/YVtA67jImg3KlBTw@zn.tnic --- arch/x86/kernel/fpu/signal.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 445c57c9c539..fa17a27390ab 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -379,9 +379,14 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, sizeof(fpu->state.fxsave))) return -EFAULT; - /* Reject invalid MXCSR values. */ - if (fpu->state.fxsave.mxcsr & ~mxcsr_feature_mask) - return -EINVAL; + if (IS_ENABLED(CONFIG_X86_64)) { + /* Reject invalid MXCSR values. */ + if (fpu->state.fxsave.mxcsr & ~mxcsr_feature_mask) + return -EINVAL; + } else { + /* Mask invalid bits out for historical reasons (broken hardware). */ + fpu->state.fxsave.mxcsr &= ~mxcsr_feature_mask; + } /* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */ if (use_xsave()) From 2fbc349911e45d4ea5187b608c8d58db66496260 Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Sun, 12 Sep 2021 23:26:06 +0200 Subject: [PATCH 155/235] asm-generic/io.h: give stub iounmap() on !MMU same prototype as elsewhere It made -Werror sad. Signed-off-by: Adam Borowski Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index e93375c710b9..dea1d36a6402 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -957,7 +957,7 @@ static inline void __iomem *ioremap(phys_addr_t offset, size_t size) #ifndef iounmap #define iounmap iounmap -static inline void iounmap(void __iomem *addr) +static inline void iounmap(volatile void __iomem *addr) { } #endif From 612f71d7328c14369924384ad2170aae2a6abd92 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 7 Oct 2021 15:05:00 -0700 Subject: [PATCH 156/235] mptcp: fix possible stall on recvmsg() recvmsg() can enter an infinite loop if the caller provides the MSG_WAITALL, the data present in the receive queue is not sufficient to fulfill the request, and no more data is received by the peer. When the above happens, mptcp_wait_data() will always return with no wait, as the MPTCP_DATA_READY flag checked by such function is set and never cleared in such code path. Leveraging the above syzbot was able to trigger an RCU stall: rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 0-...!: (10499 ticks this GP) idle=0af/1/0x4000000000000000 softirq=10678/10678 fqs=1 (t=10500 jiffies g=13089 q=109) rcu: rcu_preempt kthread starved for 10497 jiffies! g13089 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=1 rcu: Unless rcu_preempt kthread gets sufficient CPU time, OOM is now expected behavior. rcu: RCU grace-period kthread stack dump: task:rcu_preempt state:R running task stack:28696 pid: 14 ppid: 2 flags:0x00004000 Call Trace: context_switch kernel/sched/core.c:4955 [inline] __schedule+0x940/0x26f0 kernel/sched/core.c:6236 schedule+0xd3/0x270 kernel/sched/core.c:6315 schedule_timeout+0x14a/0x2a0 kernel/time/timer.c:1881 rcu_gp_fqs_loop+0x186/0x810 kernel/rcu/tree.c:1955 rcu_gp_kthread+0x1de/0x320 kernel/rcu/tree.c:2128 kthread+0x405/0x4f0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 rcu: Stack dump where RCU GP kthread last ran: Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 8510 Comm: syz-executor827 Not tainted 5.15.0-rc2-next-20210920-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:bytes_is_nonzero mm/kasan/generic.c:84 [inline] RIP: 0010:memory_is_nonzero mm/kasan/generic.c:102 [inline] RIP: 0010:memory_is_poisoned_n mm/kasan/generic.c:128 [inline] RIP: 0010:memory_is_poisoned mm/kasan/generic.c:159 [inline] RIP: 0010:check_region_inline mm/kasan/generic.c:180 [inline] RIP: 0010:kasan_check_range+0xc8/0x180 mm/kasan/generic.c:189 Code: 38 00 74 ed 48 8d 50 08 eb 09 48 83 c0 01 48 39 d0 74 7a 80 38 00 74 f2 48 89 c2 b8 01 00 00 00 48 85 d2 75 56 5b 5d 41 5c c3 <48> 85 d2 74 5e 48 01 ea eb 09 48 83 c0 01 48 39 d0 74 50 80 38 00 RSP: 0018:ffffc9000cd676c8 EFLAGS: 00000283 RAX: ffffed100e9a110e RBX: ffffed100e9a110f RCX: ffffffff88ea062a RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff888074d08870 RBP: ffffed100e9a110e R08: 0000000000000001 R09: ffff888074d08877 R10: ffffed100e9a110e R11: 0000000000000000 R12: ffff888074d08000 R13: ffff888074d08000 R14: ffff888074d08088 R15: ffff888074d08000 FS: 0000555556d8e300(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 S: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000180 CR3: 0000000068909000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: instrument_atomic_read_write include/linux/instrumented.h:101 [inline] test_and_clear_bit include/asm-generic/bitops/instrumented-atomic.h:83 [inline] mptcp_release_cb+0x14a/0x210 net/mptcp/protocol.c:3016 release_sock+0xb4/0x1b0 net/core/sock.c:3204 mptcp_wait_data net/mptcp/protocol.c:1770 [inline] mptcp_recvmsg+0xfd1/0x27b0 net/mptcp/protocol.c:2080 inet6_recvmsg+0x11b/0x5e0 net/ipv6/af_inet6.c:659 sock_recvmsg_nosec net/socket.c:944 [inline] ____sys_recvmsg+0x527/0x600 net/socket.c:2626 ___sys_recvmsg+0x127/0x200 net/socket.c:2670 do_recvmmsg+0x24d/0x6d0 net/socket.c:2764 __sys_recvmmsg net/socket.c:2843 [inline] __do_sys_recvmmsg net/socket.c:2866 [inline] __se_sys_recvmmsg net/socket.c:2859 [inline] __x64_sys_recvmmsg+0x20b/0x260 net/socket.c:2859 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fc200d2dc39 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 41 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffc5758e5a8 EFLAGS: 00000246 ORIG_RAX: 000000000000012b RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fc200d2dc39 RDX: 0000000000000002 RSI: 00000000200017c0 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000f0b5ff R10: 0000000000000100 R11: 0000000000000246 R12: 0000000000000003 R13: 00007ffc5758e5d0 R14: 00007ffc5758e5c0 R15: 0000000000000003 Fix the issue by replacing the MPTCP_DATA_READY bit with direct inspection of the msk receive queue. Reported-and-tested-by: syzbot+3360da629681aa0d22fe@syzkaller.appspotmail.com Fixes: 7a6a6cbc3e59 ("mptcp: recvmsg() can drain data from multiple subflow") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 55 ++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 40 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e5df0b5971c8..d073b2111382 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -528,7 +528,6 @@ static bool mptcp_check_data_fin(struct sock *sk) sk->sk_shutdown |= RCV_SHUTDOWN; smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ - set_bit(MPTCP_DATA_READY, &msk->flags); switch (sk->sk_state) { case TCP_ESTABLISHED: @@ -742,10 +741,9 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) /* Wake-up the reader only for in-sequence data */ mptcp_data_lock(sk); - if (move_skbs_to_msk(msk, ssk)) { - set_bit(MPTCP_DATA_READY, &msk->flags); + if (move_skbs_to_msk(msk, ssk)) sk->sk_data_ready(sk); - } + mptcp_data_unlock(sk); } @@ -847,7 +845,6 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk) sk->sk_shutdown |= RCV_SHUTDOWN; smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ - set_bit(MPTCP_DATA_READY, &msk->flags); sk->sk_data_ready(sk); } @@ -1759,21 +1756,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return copied ? : ret; } -static void mptcp_wait_data(struct sock *sk, long *timeo) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - struct mptcp_sock *msk = mptcp_sk(sk); - - add_wait_queue(sk_sleep(sk), &wait); - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - - sk_wait_event(sk, timeo, - test_bit(MPTCP_DATA_READY, &msk->flags), &wait); - - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - remove_wait_queue(sk_sleep(sk), &wait); -} - static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, struct msghdr *msg, size_t len, int flags, @@ -2077,19 +2059,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } pr_debug("block timeout %ld", timeo); - mptcp_wait_data(sk, &timeo); - } - - if (skb_queue_empty_lockless(&sk->sk_receive_queue) && - skb_queue_empty(&msk->receive_queue)) { - /* entire backlog drained, clear DATA_READY. */ - clear_bit(MPTCP_DATA_READY, &msk->flags); - - /* .. race-breaker: ssk might have gotten new data - * after last __mptcp_move_skbs() returned false. - */ - if (unlikely(__mptcp_move_skbs(msk))) - set_bit(MPTCP_DATA_READY, &msk->flags); + sk_wait_data(sk, &timeo, NULL); } out_err: @@ -2098,9 +2068,9 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, tcp_recv_timestamp(msg, sk, &tss); } - pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d", - msk, test_bit(MPTCP_DATA_READY, &msk->flags), - skb_queue_empty_lockless(&sk->sk_receive_queue), copied); + pr_debug("msk=%p rx queue empty=%d:%d copied=%d", + msk, skb_queue_empty_lockless(&sk->sk_receive_queue), + skb_queue_empty(&msk->receive_queue), copied); if (!(flags & MSG_PEEK)) mptcp_rcv_space_adjust(msk, copied); @@ -2368,7 +2338,6 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) inet_sk_state_store(sk, TCP_CLOSE); sk->sk_shutdown = SHUTDOWN_MASK; smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ - set_bit(MPTCP_DATA_READY, &msk->flags); set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); mptcp_close_wake_up(sk); @@ -3385,8 +3354,14 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, static __poll_t mptcp_check_readable(struct mptcp_sock *msk) { - return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM : - 0; + /* Concurrent splices from sk_receive_queue into receive_queue will + * always show at least one non-empty queue when checked in this order. + */ + if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) && + skb_queue_empty_lockless(&msk->receive_queue)) + return 0; + + return EPOLLIN | EPOLLRDNORM; } static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) @@ -3421,7 +3396,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, state = inet_sk_state_load(sk); pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags); if (state == TCP_LISTEN) - return mptcp_check_readable(msk); + return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM : 0; if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) { mask |= mptcp_check_readable(msk); From 075da584bae2da6a37428d59a477b6bdad430ac3 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 8 Oct 2021 12:34:37 +0200 Subject: [PATCH 157/235] net: stmmac: fix get_hw_feature() on old hardware Some old IPs do not provide the hardware feature register. On these IPs, this register is read 0x00000000. In old driver version, this feature was handled but a regression came with the commit f10a6a3541b4 ("stmmac: rework get_hw_feature function"). Indeed, this commit removes the return value in dma->get_hw_feature(). This return value was used to indicate the validity of retrieved information and used later on in stmmac_hw_init() to override priv->plat data if this hardware feature were valid. This patch restores the return code in ->get_hw_feature() in order to indicate the hardware feature validity and override priv->plat data only if this hardware feature is valid. Fixes: f10a6a3541b4 ("stmmac: rework get_hw_feature function") Signed-off-by: Herve Codina Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c | 13 +++++++++++-- drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c | 6 ++++-- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 6 ++++-- drivers/net/ethernet/stmicro/stmmac/hwif.h | 6 +++--- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c index 90383abafa66..f5581db0ba9b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c @@ -218,11 +218,18 @@ static void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space) readl(ioaddr + DMA_BUS_MODE + i * 4); } -static void dwmac1000_get_hw_feature(void __iomem *ioaddr, - struct dma_features *dma_cap) +static int dwmac1000_get_hw_feature(void __iomem *ioaddr, + struct dma_features *dma_cap) { u32 hw_cap = readl(ioaddr + DMA_HW_FEATURE); + if (!hw_cap) { + /* 0x00000000 is the value read on old hardware that does not + * implement this register + */ + return -EOPNOTSUPP; + } + dma_cap->mbps_10_100 = (hw_cap & DMA_HW_FEAT_MIISEL); dma_cap->mbps_1000 = (hw_cap & DMA_HW_FEAT_GMIISEL) >> 1; dma_cap->half_duplex = (hw_cap & DMA_HW_FEAT_HDSEL) >> 2; @@ -252,6 +259,8 @@ static void dwmac1000_get_hw_feature(void __iomem *ioaddr, dma_cap->number_tx_channel = (hw_cap & DMA_HW_FEAT_TXCHCNT) >> 22; /* Alternate (enhanced) DESC mode */ dma_cap->enh_desc = (hw_cap & DMA_HW_FEAT_ENHDESSEL) >> 24; + + return 0; } static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 5be8e6a631d9..d99fa028c646 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -347,8 +347,8 @@ static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode, writel(mtl_tx_op, ioaddr + MTL_CHAN_TX_OP_MODE(channel)); } -static void dwmac4_get_hw_feature(void __iomem *ioaddr, - struct dma_features *dma_cap) +static int dwmac4_get_hw_feature(void __iomem *ioaddr, + struct dma_features *dma_cap) { u32 hw_cap = readl(ioaddr + GMAC_HW_FEATURE0); @@ -437,6 +437,8 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr, dma_cap->frpbs = (hw_cap & GMAC_HW_FEAT_FRPBS) >> 11; dma_cap->frpsel = (hw_cap & GMAC_HW_FEAT_FRPSEL) >> 10; dma_cap->dvlan = (hw_cap & GMAC_HW_FEAT_DVLAN) >> 5; + + return 0; } /* Enable/disable TSO feature and set MSS */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 906e985441a9..5e98355f422b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -371,8 +371,8 @@ static int dwxgmac2_dma_interrupt(void __iomem *ioaddr, return ret; } -static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, - struct dma_features *dma_cap) +static int dwxgmac2_get_hw_feature(void __iomem *ioaddr, + struct dma_features *dma_cap) { u32 hw_cap; @@ -445,6 +445,8 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, dma_cap->frpes = (hw_cap & XGMAC_HWFEAT_FRPES) >> 11; dma_cap->frpbs = (hw_cap & XGMAC_HWFEAT_FRPPB) >> 9; dma_cap->frpsel = (hw_cap & XGMAC_HWFEAT_FRPSEL) >> 3; + + return 0; } static void dwxgmac2_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 queue) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 6dc1c98ebec8..fe2660d5694d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -203,8 +203,8 @@ struct stmmac_dma_ops { int (*dma_interrupt) (void __iomem *ioaddr, struct stmmac_extra_stats *x, u32 chan, u32 dir); /* If supported then get the optional core features */ - void (*get_hw_feature)(void __iomem *ioaddr, - struct dma_features *dma_cap); + int (*get_hw_feature)(void __iomem *ioaddr, + struct dma_features *dma_cap); /* Program the HW RX Watchdog */ void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 queue); void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan); @@ -255,7 +255,7 @@ struct stmmac_dma_ops { #define stmmac_dma_interrupt_status(__priv, __args...) \ stmmac_do_callback(__priv, dma, dma_interrupt, __args) #define stmmac_get_hw_feature(__priv, __args...) \ - stmmac_do_void_callback(__priv, dma, get_hw_feature, __args) + stmmac_do_callback(__priv, dma, get_hw_feature, __args) #define stmmac_rx_watchdog(__priv, __args...) \ stmmac_do_void_callback(__priv, dma, rx_watchdog, __args) #define stmmac_set_tx_ring_len(__priv, __args...) \ From 3781b6ad2ee1b1c3cf8b6523ac22a58f44c2c337 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 8 Oct 2021 12:34:38 +0200 Subject: [PATCH 158/235] dt-bindings: net: snps,dwmac: add dwmac 3.40a IP version dwmac 3.40a is an old ip version that can be found on SPEAr3xx soc. Signed-off-by: Herve Codina Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/snps,dwmac.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 42689b7d03a2..c115c95ee584 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -21,6 +21,7 @@ select: contains: enum: - snps,dwmac + - snps,dwmac-3.40a - snps,dwmac-3.50a - snps,dwmac-3.610 - snps,dwmac-3.70a @@ -76,6 +77,7 @@ properties: - rockchip,rk3399-gmac - rockchip,rv1108-gmac - snps,dwmac + - snps,dwmac-3.40a - snps,dwmac-3.50a - snps,dwmac-3.610 - snps,dwmac-3.70a From 9cb1d19f47fafad7dcf7c8564e633440c946cfd7 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 8 Oct 2021 12:34:39 +0200 Subject: [PATCH 159/235] net: stmmac: add support for dwmac 3.40a dwmac 3.40a is an old ip version that can be found on SPEAr3xx soc. Signed-off-by: Herve Codina Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c | 1 + drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index fbfda55b4c52..5e731a72cce8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -71,6 +71,7 @@ static int dwmac_generic_probe(struct platform_device *pdev) static const struct of_device_id dwmac_generic_match[] = { { .compatible = "st,spear600-gmac"}, + { .compatible = "snps,dwmac-3.40a"}, { .compatible = "snps,dwmac-3.50a"}, { .compatible = "snps,dwmac-3.610"}, { .compatible = "snps,dwmac-3.70a"}, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 62cec9bfcd33..232ac98943cd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -508,6 +508,14 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) plat->pmt = 1; } + if (of_device_is_compatible(np, "snps,dwmac-3.40a")) { + plat->has_gmac = 1; + plat->enh_desc = 1; + plat->tx_coe = 1; + plat->bugged_jumbo = 1; + plat->pmt = 1; + } + if (of_device_is_compatible(np, "snps,dwmac-4.00") || of_device_is_compatible(np, "snps,dwmac-4.10a") || of_device_is_compatible(np, "snps,dwmac-4.20a") || From 6636fec29cdf6665bd219564609e8651f6ddc142 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 8 Oct 2021 12:34:40 +0200 Subject: [PATCH 160/235] ARM: dts: spear3xx: Fix gmac node On SPEAr3xx, ethernet driver is not compatible with the SPEAr600 one. Indeed, SPEAr3xx uses an earlier version of this IP (v3.40) and needs some driver tuning compare to SPEAr600. The v3.40 IP support was added to stmmac driver and this patch fixes this issue and use the correct compatible string for SPEAr3xx Signed-off-by: Herve Codina Signed-off-by: David S. Miller --- arch/arm/boot/dts/spear3xx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/spear3xx.dtsi b/arch/arm/boot/dts/spear3xx.dtsi index f266b7b03482..cc88ebe7a60c 100644 --- a/arch/arm/boot/dts/spear3xx.dtsi +++ b/arch/arm/boot/dts/spear3xx.dtsi @@ -47,7 +47,7 @@ dma@fc400000 { }; gmac: eth@e0800000 { - compatible = "st,spear600-gmac"; + compatible = "snps,dwmac-3.40a"; reg = <0xe0800000 0x8000>; interrupts = <23 22>; interrupt-names = "macirq", "eth_wake_irq"; From 95f7f3e7dc6bd2e735cb5de11734ea2222b1e05a Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 7 Oct 2021 16:14:40 +0200 Subject: [PATCH 161/235] net/smc: improved fix wait on already cleared link Commit 8f3d65c16679 ("net/smc: fix wait on already cleared link") introduced link refcounting to avoid waits on already cleared links. This patch extents and improves the refcounting to cover all remaining possible cases for this kind of error situation. Fixes: 15e1b99aadfb ("net/smc: no WR buffer wait for terminating link group") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 7 +++++- net/smc/smc_core.c | 20 ++++++++------- net/smc/smc_llc.c | 63 +++++++++++++++++++++++++++++++++++----------- net/smc/smc_tx.c | 22 ++++------------ net/smc/smc_wr.h | 14 +++++++++++ 5 files changed, 85 insertions(+), 41 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index f23f558054a7..99acd337ba90 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -150,9 +150,11 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) again: link = conn->lnk; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend); if (rc) - return rc; + goto put_out; spin_lock_bh(&conn->send_lock); if (link != conn->lnk) { @@ -160,6 +162,7 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) spin_unlock_bh(&conn->send_lock); smc_wr_tx_put_slot(link, (struct smc_wr_tx_pend_priv *)pend); + smc_wr_tx_link_put(link); if (again) return -ENOLINK; again = true; @@ -167,6 +170,8 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) } rc = smc_cdc_msg_send(conn, wr_buf, pend); spin_unlock_bh(&conn->send_lock); +put_out: + smc_wr_tx_link_put(link); return rc; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 8280c938be80..d2206743dc71 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -949,7 +949,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, to_lnk = &lgr->lnk[i]; break; } - if (!to_lnk) { + if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) { smc_lgr_terminate_sched(lgr); return NULL; } @@ -981,24 +981,26 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, read_unlock_bh(&lgr->conns_lock); /* pre-fetch buffer outside of send_lock, might sleep */ rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend); - if (rc) { - smcr_link_down_cond_sched(to_lnk); - return NULL; - } + if (rc) + goto err_out; /* avoid race with smcr_tx_sndbuf_nonempty() */ spin_lock_bh(&conn->send_lock); smc_switch_link_and_count(conn, to_lnk); rc = smc_switch_cursor(smc, pend, wr_buf); spin_unlock_bh(&conn->send_lock); sock_put(&smc->sk); - if (rc) { - smcr_link_down_cond_sched(to_lnk); - return NULL; - } + if (rc) + goto err_out; goto again; } read_unlock_bh(&lgr->conns_lock); + smc_wr_tx_link_put(to_lnk); return to_lnk; + +err_out: + smcr_link_down_cond_sched(to_lnk); + smc_wr_tx_link_put(to_lnk); + return NULL; } static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 2e7560eba981..72f4b72eb175 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -383,9 +383,11 @@ int smc_llc_send_confirm_link(struct smc_link *link, struct smc_wr_buf *wr_buf; int rc; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; confllc = (struct smc_llc_msg_confirm_link *)wr_buf; memset(confllc, 0, sizeof(*confllc)); confllc->hd.common.type = SMC_LLC_CONFIRM_LINK; @@ -402,6 +404,8 @@ int smc_llc_send_confirm_link(struct smc_link *link, confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* send llc message */ rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); return rc; } @@ -415,9 +419,11 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, struct smc_link *link; int i, rc, rtok_ix; + if (!smc_wr_tx_link_hold(send_link)) + return -ENOLINK; rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf; memset(rkeyllc, 0, sizeof(*rkeyllc)); rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY; @@ -444,6 +450,8 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, (u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl)); /* send llc message */ rc = smc_wr_tx_send(send_link, pend); +put_out: + smc_wr_tx_link_put(send_link); return rc; } @@ -456,9 +464,11 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, struct smc_wr_buf *wr_buf; int rc; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf; memset(rkeyllc, 0, sizeof(*rkeyllc)); rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY; @@ -467,6 +477,8 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey); /* send llc message */ rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); return rc; } @@ -480,9 +492,11 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], struct smc_wr_buf *wr_buf; int rc; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; addllc = (struct smc_llc_msg_add_link *)wr_buf; memset(addllc, 0, sizeof(*addllc)); @@ -504,6 +518,8 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], } /* send llc message */ rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); return rc; } @@ -517,9 +533,11 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, struct smc_wr_buf *wr_buf; int rc; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; delllc = (struct smc_llc_msg_del_link *)wr_buf; memset(delllc, 0, sizeof(*delllc)); @@ -536,6 +554,8 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, delllc->reason = htonl(reason); /* send llc message */ rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); return rc; } @@ -547,9 +567,11 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16]) struct smc_wr_buf *wr_buf; int rc; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; testllc = (struct smc_llc_msg_test_link *)wr_buf; memset(testllc, 0, sizeof(*testllc)); testllc->hd.common.type = SMC_LLC_TEST_LINK; @@ -557,6 +579,8 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16]) memcpy(testllc->user_data, user_data, sizeof(testllc->user_data)); /* send llc message */ rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); return rc; } @@ -567,13 +591,16 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf) struct smc_wr_buf *wr_buf; int rc; - if (!smc_link_usable(link)) + if (!smc_wr_tx_link_hold(link)) return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg)); - return smc_wr_tx_send(link, pend); + rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); + return rc; } /* schedule an llc send on link, may wait for buffers, @@ -586,13 +613,16 @@ static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf) struct smc_wr_buf *wr_buf; int rc; - if (!smc_link_usable(link)) + if (!smc_wr_tx_link_hold(link)) return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg)); - return smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME); + rc = smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME); +put_out: + smc_wr_tx_link_put(link); + return rc; } /********************************* receive ***********************************/ @@ -672,9 +702,11 @@ static int smc_llc_add_link_cont(struct smc_link *link, struct smc_buf_desc *rmb; u8 n; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf; memset(addc_llc, 0, sizeof(*addc_llc)); @@ -706,7 +738,10 @@ static int smc_llc_add_link_cont(struct smc_link *link, addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont); if (lgr->role == SMC_CLNT) addc_llc->hd.flags |= SMC_LLC_FLAG_RESP; - return smc_wr_tx_send(link, pend); + rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); + return rc; } static int smc_llc_cli_rkey_exchange(struct smc_link *link, diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index c79361dfcdfb..738a4a99c827 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -496,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn, /* Wakeup sndbuf consumers from any context (IRQ or process) * since there is more data to transmit; usable snd_wnd as max transmit */ -static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn) +static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; struct smc_link *link = conn->lnk; @@ -505,8 +505,11 @@ static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn) struct smc_wr_buf *wr_buf; int rc; + if (!link || !smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend); if (rc < 0) { + smc_wr_tx_link_put(link); if (rc == -EBUSY) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); @@ -547,22 +550,7 @@ static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn) out_unlock: spin_unlock_bh(&conn->send_lock); - return rc; -} - -static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) -{ - struct smc_link *link = conn->lnk; - int rc = -ENOLINK; - - if (!link) - return rc; - - atomic_inc(&link->wr_tx_refcnt); - if (smc_link_usable(link)) - rc = _smcr_tx_sndbuf_nonempty(conn); - if (atomic_dec_and_test(&link->wr_tx_refcnt)) - wake_up_all(&link->wr_tx_wait); + smc_wr_tx_link_put(link); return rc; } diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 423b8709f1c9..2bc626f230a5 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -60,6 +60,20 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val) atomic_long_set(wr_tx_id, val); } +static inline bool smc_wr_tx_link_hold(struct smc_link *link) +{ + if (!smc_link_usable(link)) + return false; + atomic_inc(&link->wr_tx_refcnt); + return true; +} + +static inline void smc_wr_tx_link_put(struct smc_link *link) +{ + if (atomic_dec_and_test(&link->wr_tx_refcnt)) + wake_up_all(&link->wr_tx_wait); +} + static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk) { wake_up_all(&lnk->wr_tx_wait); From 1b1499a817c90fd1ce9453a2c98d2a01cca0e775 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 7 Oct 2021 19:44:30 +0200 Subject: [PATCH 162/235] nfc: nci: fix the UAF of rf_conn_info object The nci_core_conn_close_rsp_packet() function will release the conn_info with given conn_id. However, it needs to set the rf_conn_info to NULL to prevent other routines like nci_rf_intf_activated_ntf_packet() to trigger the UAF. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lin Ma Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- net/nfc/nci/rsp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index a2e72c003805..b911ab78bed9 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -334,6 +334,8 @@ static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev, ndev->cur_conn_id); if (conn_info) { list_del(&conn_info->list); + if (conn_info == ndev->rf_conn_info) + ndev->rf_conn_info = NULL; devm_kfree(&ndev->nfc_dev->dev, conn_info); } } From 1bec0f05062cf21e78093b1c4a2ae744e2873b8a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 7 Oct 2021 19:47:08 +0300 Subject: [PATCH 163/235] net: dsa: fix bridge_num not getting cleared after ports leaving the bridge The dp->bridge_num is zero-based, with -1 being the encoding for an invalid value. But dsa_bridge_num_put used to check for an invalid value by comparing bridge_num with 0, which is of course incorrect. The result is that the bridge_num will never get cleared by dsa_bridge_num_put, and further port joins to other bridges will get a bridge_num larger than the previous one, and once all the available bridges with TX forwarding offload supported by the hardware get exhausted, the TX forwarding offload feature is simply disabled. In the case of sja1105, 7 iterations of the loop below are enough to exhaust the TX forwarding offload bits, and further bridge joins operate without that feature. ip link add br0 type bridge vlan_filtering 1 while :; do ip link set sw0p2 master br0 && sleep 1 ip link set sw0p2 nomaster && sleep 1 done This issue is enough of an indication that having the dp->bridge_num invalid encoding be a negative number is prone to bugs, so this will be changed to a one-based value, with the dp->bridge_num of zero being the indication of no bridge. However, that is material for net-next. Fixes: f5e165e72b29 ("net: dsa: track unique bridge numbers across all DSA switch trees") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- net/dsa/dsa2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index b29262eee00b..6d5cc0217133 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -170,7 +170,7 @@ void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num) /* Check if the bridge is still in use, otherwise it is time * to clean it up so we can reuse this bridge_num later. */ - if (!dsa_bridge_num_find(bridge_dev)) + if (dsa_bridge_num_find(bridge_dev) < 0) clear_bit(bridge_num, &dsa_fwd_offloading_bridges); } From c7709a02c18aabebc3b2988d24661763a0449443 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 7 Oct 2021 19:47:09 +0300 Subject: [PATCH 164/235] net: dsa: tag_dsa: send packets with TX fwd offload from VLAN-unaware bridges using VID 0 The present code is structured this way due to an incomplete thought process. In Documentation/networking/switchdev.rst we document that if a bridge is VLAN-unaware, then the presence or lack of a pvid on a bridge port (or on the bridge itself, for that matter) should not affect the ability to receive and transmit tagged or untagged packets. If the bridge on behalf of which we are sending this packet is VLAN-aware, then the TX forwarding offload API ensures that the skb will be VLAN-tagged (if the packet was sent by user space as untagged, it will get transmitted town to the driver as tagged with the bridge device's pvid). But if the bridge is VLAN-unaware, it may or may not be VLAN-tagged. In fact the logic to insert the bridge's PVID came from the idea that we should emulate what is being done in the VLAN-aware case. But we shouldn't. It appears that injecting packets using a VLAN ID of 0 serves the purpose of forwarding the packets to the egress port with no VLAN tag added or stripped by the hardware, and no filtering being performed. So we can simply remove the superfluous logic. One reason why this logic is broken is that when CONFIG_BRIDGE_VLAN_FILTERING=n, we call br_vlan_get_pvid_rcu() but that returns an error and we do error out, dropping all packets on xmit. Not really smart. This is also an issue when the user deletes the bridge pvid: $ bridge vlan del dev br0 vid 1 self As mentioned, in both cases, packets should still flow freely, and they do just that on any net device where the bridge is not offloaded, but on mv88e6xxx they don't. Fixes: d82f8ab0d874 ("net: dsa: tag_dsa: offload the bridge forwarding process") Reported-by: Andrew Lunn Link: https://patchwork.kernel.org/project/netdevbpf/patch/20211003155141.2241314-1-andrew@lunn.ch/ Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210928233708.1246774-1-vladimir.oltean@nxp.com/ Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- net/dsa/tag_dsa.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index e5127b7d1c6a..68d5ddc3ef35 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -129,12 +129,9 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, u8 tag_dev, tag_port; enum dsa_cmd cmd; u8 *dsa_header; - u16 pvid = 0; - int err; if (skb->offload_fwd_mark) { struct dsa_switch_tree *dst = dp->ds->dst; - struct net_device *br = dp->bridge_dev; cmd = DSA_CMD_FORWARD; @@ -144,19 +141,6 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, */ tag_dev = dst->last_switch + 1 + dp->bridge_num; tag_port = 0; - - /* If we are offloading forwarding for a VLAN-unaware bridge, - * inject packets to hardware using the bridge's pvid, since - * that's where the packets ingressed from. - */ - if (!br_vlan_enabled(br)) { - /* Safe because __dev_queue_xmit() runs under - * rcu_read_lock_bh() - */ - err = br_vlan_get_pvid_rcu(br, &pvid); - if (err) - return NULL; - } } else { cmd = DSA_CMD_FROM_CPU; tag_dev = dp->ds->index; @@ -188,8 +172,8 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, dsa_header[0] = (cmd << 6) | tag_dev; dsa_header[1] = tag_port << 3; - dsa_header[2] = pvid >> 8; - dsa_header[3] = pvid & 0xff; + dsa_header[2] = 0; + dsa_header[3] = 0; } return skb; From 8b6836d824702cacf68190982181f8ca3aff9c3e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 7 Oct 2021 19:47:10 +0300 Subject: [PATCH 165/235] net: dsa: mv88e6xxx: keep the pvid at 0 when VLAN-unaware The VLAN support in mv88e6xxx has a loaded history. Commit 2ea7a679ca2a ("net: dsa: Don't add vlans when vlan filtering is disabled") noticed some issues with VLAN and decided the best way to deal with them was to make the DSA core ignore VLANs added by the bridge while VLAN awareness is turned off. Those issues were never explained, just presented as "at least one corner case". That approach had problems of its own, presented by commit 54a0ed0df496 ("net: dsa: provide an option for drivers to always receive bridge VLANs") for the DSA core, followed by commit 1fb74191988f ("net: dsa: mv88e6xxx: fix vlan setup") which applied ds->configure_vlan_while_not_filtering = true for mv88e6xxx in particular. We still don't know what corner case Andrew saw when he wrote commit 2ea7a679ca2a ("net: dsa: Don't add vlans when vlan filtering is disabled"), but Tobias now reports that when we use TX forwarding offload, pinging an external station from the bridge device is broken if the front-facing DSA user port has flooding turned off. The full description is in the link below, but for short, when a mv88e6xxx port is under a VLAN-unaware bridge, it inherits that bridge's pvid. So packets ingressing a user port will be classified to e.g. VID 1 (assuming that value for the bridge_default_pvid), whereas when tag_dsa.c xmits towards a user port, it always sends packets using a VID of 0 if that port is standalone or under a VLAN-unaware bridge - or at least it did so prior to commit d82f8ab0d874 ("net: dsa: tag_dsa: offload the bridge forwarding process"). In any case, when there is a conversation between the CPU and a station connected to a user port, the station's MAC address is learned in VID 1 but the CPU tries to transmit through VID 0. The packets reach the intended station, but via flooding and not by virtue of matching the existing ATU entry. DSA has established (and enforced in other drivers: sja1105, felix, mt7530) that a VLAN-unaware port should use a private pvid, and not inherit the one from the bridge. The bridge's pvid should only be inherited when that bridge is VLAN-aware, so all state transitions need to be handled. On the other hand, all bridge VLANs should sit in the VTU starting with the moment when the bridge offloads them via switchdev, they are just not used. This solves the problem that Tobias sees because packets ingressing on VLAN-unaware user ports now get classified to VID 0, which is also the VID used by tag_dsa.c on xmit. Fixes: d82f8ab0d874 ("net: dsa: tag_dsa: offload the bridge forwarding process") Link: https://patchwork.kernel.org/project/netdevbpf/patch/20211003222312.284175-2-vladimir.oltean@nxp.com/#24491503 Reported-by: Tobias Waldekranz Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 53 ++++++++++++++++++++++++++++---- drivers/net/dsa/mv88e6xxx/chip.h | 6 ++++ drivers/net/dsa/mv88e6xxx/port.c | 21 +++++++++++++ drivers/net/dsa/mv88e6xxx/port.h | 2 ++ 4 files changed, 76 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 03744d1c43fc..d672112afffd 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1677,6 +1677,26 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, return 0; } +static int mv88e6xxx_port_commit_pvid(struct mv88e6xxx_chip *chip, int port) +{ + struct dsa_port *dp = dsa_to_port(chip->ds, port); + struct mv88e6xxx_port *p = &chip->ports[port]; + bool drop_untagged = false; + u16 pvid = 0; + int err; + + if (dp->bridge_dev && br_vlan_enabled(dp->bridge_dev)) { + pvid = p->bridge_pvid.vid; + drop_untagged = !p->bridge_pvid.valid; + } + + err = mv88e6xxx_port_set_pvid(chip, port, pvid); + if (err) + return err; + + return mv88e6xxx_port_drop_untagged(chip, port, drop_untagged); +} + static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, struct netlink_ext_ack *extack) @@ -1690,7 +1710,16 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, return -EOPNOTSUPP; mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_port_set_8021q_mode(chip, port, mode); + if (err) + goto unlock; + + err = mv88e6xxx_port_commit_pvid(chip, port); + if (err) + goto unlock; + +unlock: mv88e6xxx_reg_unlock(chip); return err; @@ -2123,6 +2152,7 @@ static int mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, struct mv88e6xxx_chip *chip = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + struct mv88e6xxx_port *p = &chip->ports[port]; bool warn; u8 member; int err; @@ -2156,13 +2186,21 @@ static int mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, } if (pvid) { - err = mv88e6xxx_port_set_pvid(chip, port, vlan->vid); - if (err) { - dev_err(ds->dev, "p%d: failed to set PVID %d\n", - port, vlan->vid); + p->bridge_pvid.vid = vlan->vid; + p->bridge_pvid.valid = true; + + err = mv88e6xxx_port_commit_pvid(chip, port); + if (err) + goto out; + } else if (vlan->vid && p->bridge_pvid.vid == vlan->vid) { + /* The old pvid was reinstalled as a non-pvid VLAN */ + p->bridge_pvid.valid = false; + + err = mv88e6xxx_port_commit_pvid(chip, port); + if (err) goto out; - } } + out: mv88e6xxx_reg_unlock(chip); @@ -2212,6 +2250,7 @@ static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_port *p = &chip->ports[port]; int err = 0; u16 pvid; @@ -2229,7 +2268,9 @@ static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, goto unlock; if (vlan->vid == pvid) { - err = mv88e6xxx_port_set_pvid(chip, port, 0); + p->bridge_pvid.valid = false; + + err = mv88e6xxx_port_commit_pvid(chip, port); if (err) goto unlock; } diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 59f316cc8583..33d067e8396d 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -246,9 +246,15 @@ struct mv88e6xxx_policy { u16 vid; }; +struct mv88e6xxx_vlan { + u16 vid; + bool valid; +}; + struct mv88e6xxx_port { struct mv88e6xxx_chip *chip; int port; + struct mv88e6xxx_vlan bridge_pvid; u64 serdes_stats[2]; u64 atu_member_violation; u64 atu_miss_violation; diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 451028c57af8..d9817b20ea64 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -1257,6 +1257,27 @@ int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port, return 0; } +int mv88e6xxx_port_drop_untagged(struct mv88e6xxx_chip *chip, int port, + bool drop_untagged) +{ + u16 old, new; + int err; + + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL2, &old); + if (err) + return err; + + if (drop_untagged) + new = old | MV88E6XXX_PORT_CTL2_DISCARD_UNTAGGED; + else + new = old & ~MV88E6XXX_PORT_CTL2_DISCARD_UNTAGGED; + + if (new == old) + return 0; + + return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL2, new); +} + int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port) { u16 reg; diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index b10e5aebacf6..03382b66f800 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -423,6 +423,8 @@ int mv88e6393x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, phy_interface_t mode); int mv88e6185_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode); int mv88e6352_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode); +int mv88e6xxx_port_drop_untagged(struct mv88e6xxx_chip *chip, int port, + bool drop_untagged); int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port); int mv88e6095_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port, int upstream_port); From 5bded8259ee3815a91791462dfb3312480779c3d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 7 Oct 2021 19:47:11 +0300 Subject: [PATCH 166/235] net: dsa: mv88e6xxx: isolate the ATU databases of standalone and bridged ports Similar to commit 6087175b7991 ("net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges"), software forwarding between an unoffloaded LAG port (a bonding interface with an unsupported policy) and a mv88e6xxx user port directly under a bridge is broken. We adopt the same strategy, which is to make the standalone ports not find any ATU entry learned on a bridge port. Theory: the mv88e6xxx ATU is looked up by FID and MAC address. There are as many FIDs as VIDs (4096). The FID is derived from the VID when possible (the VTU maps a VID to a FID), with a fallback to the port based default FID value when not (802.1Q Mode is disabled on the port, or the classified VID isn't present in the VTU). The mv88e6xxx driver makes the following use of FIDs and VIDs: - the port's DefaultVID (to which untagged & pvid-tagged packets get classified) is 0 and is absent from the VTU, so this kind of packets is processed in FID 0, the default FID assigned by mv88e6xxx_setup_port. - every time a bridge VLAN is created, mv88e6xxx_port_vlan_join() -> mv88e6xxx_atu_new() associates a FID with that VID which increases linearly starting from 1. Like this: bridge vlan add dev lan0 vid 100 # FID 1 bridge vlan add dev lan1 vid 100 # still FID 1 bridge vlan add dev lan2 vid 1024 # FID 2 The FID allocation made by the driver is sub-optimal for the following reasons: (a) A standalone port has a DefaultPVID of 0 and a default FID of 0 too. A VLAN-unaware bridged port has a DefaultPVID of 0 and a default FID of 0 too. The difference is that the bridged ports may learn ATU entries, while the standalone port has the requirement that it must not, and must not find them either. Standalone ports must not use the same FID as ports belonging to a bridge. All standalone ports can use the same FID, since the ATU will never have an entry in that FID. (b) Multiple VLAN-unaware bridges will all use a DefaultPVID of 0 and a default FID of 0 on all their ports. The FDBs will not be isolated between these bridges. Every VLAN-unaware bridge must use the same FID on all its ports, different from the FID of other bridge ports. (c) Each bridge VLAN uses a unique FID which is useful for Independent VLAN Learning, but the same VLAN ID on multiple VLAN-aware bridges will result in the same FID being used by mv88e6xxx_atu_new(). The correct behavior is for VLAN 1 in br0 to have a different FID compared to VLAN 1 in br1. This patch cannot fix all the above. Traditionally the DSA framework did not care about this, and the reality is that DSA core involvement is needed for the aforementioned issues to be solved. The only thing we can solve here is an issue which does not require API changes, and that is issue (a), aka use a different FID for standalone ports vs ports under VLAN-unaware bridges. The first step is deciding what VID and FID to use for standalone ports, and what VID and FID for bridged ports. The 0/0 pair for standalone ports is what they used up till now, let's keep using that. For bridged ports, there are 2 cases: - VLAN-aware ports will never end up using the port default FID, because packets will always be classified to a VID in the VTU or dropped otherwise. The FID is the one associated with the VID in the VTU. - On VLAN-unaware ports, we _could_ leave their DefaultVID (pvid) at zero (just as in the case of standalone ports), and just change the port's default FID from 0 to a different number (say 1). However, Tobias points out that there is one more requirement to cater to: cross-chip bridging. The Marvell DSA header does not carry the FID in it, only the VID. So once a packet crosses a DSA link, if it has a VID of zero it will get classified to the default FID of that cascade port. Relying on a port default FID for upstream cascade ports results in contradictions: a default FID of 0 breaks ATU isolation of bridged ports on the downstream switch, a default FID of 1 breaks standalone ports on the downstream switch. So not only must standalone ports have different FIDs compared to bridged ports, they must also have different DefaultVID values. IEEE 802.1Q defines two reserved VID values: 0 and 4095. So we simply choose 4095 as the DefaultVID of ports belonging to VLAN-unaware bridges, and VID 4095 maps to FID 1. For the xmit operation to look up the same ATU database, we need to put VID 4095 in DSA tags sent to ports belonging to VLAN-unaware bridges too. All shared ports are configured to map this VID to the bridging FID, because they are members of that VLAN in the VTU. Shared ports don't need to have 802.1QMode enabled in any way, they always parse the VID from the DSA header, they don't need to look at the 802.1Q header. We install VID 4095 to the VTU in mv88e6xxx_setup_port(), with the mention that mv88e6xxx_vtu_setup() which was located right below that call was flushing the VTU so those entries wouldn't be preserved. So we need to relocate the VTU flushing prior to the port initialization during ->setup(). Also note that this is why it is safe to assume that VID 4095 will get associated with FID 1: the user ports haven't been created, so there is no avenue for the user to create a bridge VLAN which could otherwise race with the creation of another FID which would otherwise use up the non-reserved FID value of 1. [ Currently mv88e6xxx_port_vlan_join() doesn't have the option of specifying a preferred FID, it always calls mv88e6xxx_atu_new(). ] mv88e6xxx_port_db_load_purge() is the function to access the ATU for FDB/MDB entries, and it used to determine the FID to use for VLAN-unaware FDB entries (VID=0) using mv88e6xxx_port_get_fid(). But the driver only called mv88e6xxx_port_set_fid() once, during probe, so no surprises, the port FID was always 0, the call to get_fid() was redundant. As much as I would have wanted to not touch that code, the logic is broken when we add a new FID which is not the port-based default. Now the port-based default FID only corresponds to standalone ports, and FDB/MDB entries belong to the bridging service. So while in the future, when the DSA API will support FDB isolation, we will have to figure out the FID based on the bridge number, for now there's a single bridging FID, so hardcode that. Lastly, the tagger needs to check, when it is transmitting a VLAN untagged skb, whether it is sending it towards a bridged or a standalone port. When we see it is bridged we assume the bridge is VLAN-unaware. Not because it cannot be VLAN-aware but: - if we are transmitting from a VLAN-aware bridge we are likely doing so using TX forwarding offload. That code path guarantees that skbs have a vlan hwaccel tag in them, so we would not enter the "else" branch of the "if (skb->protocol == htons(ETH_P_8021Q))" condition. - if we are transmitting on behalf of a VLAN-aware bridge but with no TX forwarding offload (no PVT support, out of space in the PVT, whatever), we would indeed be transmitting with VLAN 4095 instead of the bridge device's pvid. However we would be injecting a "From CPU" frame, and the switch won't learn from that - it only learns from "Forward" frames. So it is inconsequential for address learning. And VLAN 4095 is absolutely enough for the frame to exit the switch, since we never remove that VLAN from any port. Fixes: 57e661aae6a8 ("net: dsa: mv88e6xxx: Link aggregation support") Reported-by: Tobias Waldekranz Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + drivers/net/dsa/mv88e6xxx/chip.c | 67 +++++++++++++++++++++++++------- drivers/net/dsa/mv88e6xxx/chip.h | 3 ++ include/linux/dsa/mv88e6xxx.h | 13 +++++++ net/dsa/tag_dsa.c | 12 ++++-- 5 files changed, 80 insertions(+), 16 deletions(-) create mode 100644 include/linux/dsa/mv88e6xxx.h diff --git a/MAINTAINERS b/MAINTAINERS index a4a0c2baaf27..17f652b2f653 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11153,6 +11153,7 @@ S: Maintained F: Documentation/devicetree/bindings/net/dsa/marvell.txt F: Documentation/networking/devlink/mv88e6xxx.rst F: drivers/net/dsa/mv88e6xxx/ +F: include/linux/dsa/mv88e6xxx.h F: include/linux/platform_data/mv88e6xxx.h MARVELL ARMADA 3700 PHY DRIVERS diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d672112afffd..d7b29792732b 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -1681,13 +1682,17 @@ static int mv88e6xxx_port_commit_pvid(struct mv88e6xxx_chip *chip, int port) { struct dsa_port *dp = dsa_to_port(chip->ds, port); struct mv88e6xxx_port *p = &chip->ports[port]; + u16 pvid = MV88E6XXX_VID_STANDALONE; bool drop_untagged = false; - u16 pvid = 0; int err; - if (dp->bridge_dev && br_vlan_enabled(dp->bridge_dev)) { - pvid = p->bridge_pvid.vid; - drop_untagged = !p->bridge_pvid.valid; + if (dp->bridge_dev) { + if (br_vlan_enabled(dp->bridge_dev)) { + pvid = p->bridge_pvid.vid; + drop_untagged = !p->bridge_pvid.valid; + } else { + pvid = MV88E6XXX_VID_BRIDGED; + } } err = mv88e6xxx_port_set_pvid(chip, port, pvid); @@ -1754,11 +1759,15 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, u16 fid; int err; - /* Null VLAN ID corresponds to the port private database */ + /* Ports have two private address databases: one for when the port is + * standalone and one for when the port is under a bridge and the + * 802.1Q mode is disabled. When the port is standalone, DSA wants its + * address database to remain 100% empty, so we never load an ATU entry + * into a standalone port's database. Therefore, translate the null + * VLAN ID into the port's database used for VLAN-unaware bridging. + */ if (vid == 0) { - err = mv88e6xxx_port_get_fid(chip, port, &fid); - if (err) - return err; + fid = MV88E6XXX_FID_BRIDGED; } else { err = mv88e6xxx_vtu_get(chip, vid, &vlan); if (err) @@ -2434,7 +2443,16 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, int err; mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_bridge_map(chip, br); + if (err) + goto unlock; + + err = mv88e6xxx_port_commit_pvid(chip, port); + if (err) + goto unlock; + +unlock: mv88e6xxx_reg_unlock(chip); return err; @@ -2444,11 +2462,20 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port, struct net_device *br) { struct mv88e6xxx_chip *chip = ds->priv; + int err; mv88e6xxx_reg_lock(chip); + if (mv88e6xxx_bridge_map(chip, br) || mv88e6xxx_port_vlan_map(chip, port)) dev_err(ds->dev, "failed to remap in-chip Port VLAN\n"); + + err = mv88e6xxx_port_commit_pvid(chip, port); + if (err) + dev_err(ds->dev, + "port %d failed to restore standalone pvid: %pe\n", + port, ERR_PTR(err)); + mv88e6xxx_reg_unlock(chip); } @@ -2894,6 +2921,20 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (err) return err; + /* Associate MV88E6XXX_VID_BRIDGED with MV88E6XXX_FID_BRIDGED in the + * ATU by virtue of the fact that mv88e6xxx_atu_new() will pick it as + * the first free FID after MV88E6XXX_FID_STANDALONE. This will be used + * as the private PVID on ports under a VLAN-unaware bridge. + * Shared (DSA and CPU) ports must also be members of it, to translate + * the VID from the DSA tag into MV88E6XXX_FID_BRIDGED, instead of + * relying on their port default FID. + */ + err = mv88e6xxx_port_vlan_join(chip, port, MV88E6XXX_VID_BRIDGED, + MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNTAGGED, + false); + if (err) + return err; + if (chip->info->ops->port_set_jumbo_size) { err = chip->info->ops->port_set_jumbo_size(chip, port, 10218); if (err) @@ -2966,7 +3007,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * database, and allow bidirectional communication between the * CPU and DSA port(s), and the other ports. */ - err = mv88e6xxx_port_set_fid(chip, port, 0); + err = mv88e6xxx_port_set_fid(chip, port, MV88E6XXX_FID_STANDALONE); if (err) return err; @@ -3156,6 +3197,10 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) } } + err = mv88e6xxx_vtu_setup(chip); + if (err) + goto unlock; + /* Setup Switch Port Registers */ for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { if (dsa_is_unused_port(ds, i)) @@ -3185,10 +3230,6 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; - err = mv88e6xxx_vtu_setup(chip); - if (err) - goto unlock; - err = mv88e6xxx_pvt_setup(chip); if (err) goto unlock; diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 33d067e8396d..8271b8aa7b71 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -21,6 +21,9 @@ #define EDSA_HLEN 8 #define MV88E6XXX_N_FID 4096 +#define MV88E6XXX_FID_STANDALONE 0 +#define MV88E6XXX_FID_BRIDGED 1 + /* PVT limits for 4-bit port and 5-bit switch */ #define MV88E6XXX_MAX_PVT_SWITCHES 32 #define MV88E6XXX_MAX_PVT_PORTS 16 diff --git a/include/linux/dsa/mv88e6xxx.h b/include/linux/dsa/mv88e6xxx.h new file mode 100644 index 000000000000..8c3d45eca46b --- /dev/null +++ b/include/linux/dsa/mv88e6xxx.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright 2021 NXP + */ + +#ifndef _NET_DSA_TAG_MV88E6XXX_H +#define _NET_DSA_TAG_MV88E6XXX_H + +#include + +#define MV88E6XXX_VID_STANDALONE 0 +#define MV88E6XXX_VID_BRIDGED (VLAN_N_VID - 1) + +#endif diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 68d5ddc3ef35..b3da4b2ea11c 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -45,6 +45,7 @@ * 6 6 2 2 4 2 N */ +#include #include #include #include @@ -164,16 +165,21 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, dsa_header[2] &= ~0x10; } } else { + struct net_device *br = dp->bridge_dev; + u16 vid; + + vid = br ? MV88E6XXX_VID_BRIDGED : MV88E6XXX_VID_STANDALONE; + skb_push(skb, DSA_HLEN + extra); dsa_alloc_etype_header(skb, DSA_HLEN + extra); - /* Construct untagged DSA tag. */ + /* Construct DSA header from untagged frame. */ dsa_header = dsa_etype_header_pos_tx(skb) + extra; dsa_header[0] = (cmd << 6) | tag_dev; dsa_header[1] = tag_port << 3; - dsa_header[2] = 0; - dsa_header[3] = 0; + dsa_header[2] = vid >> 8; + dsa_header[3] = vid & 0xff; } return skb; From 14132690860e4d06aa3e1c4d7d8e9866ba7756dd Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 7 Oct 2021 19:49:57 +0200 Subject: [PATCH 167/235] mqprio: Correct stats in mqprio_dump_class_stats(). Introduction of lockless subqueues broke the class statistics. Before the change stats were accumulated in `bstats' and `qstats' on the stack which was then copied to struct gnet_dump. After the change the `bstats' and `qstats' are initialized to 0 and never updated, yet still fed to gnet_dump. The code updates the global qdisc->cpu_bstats and qdisc->cpu_qstats instead, clobbering them. Most likely a copy-paste error from the code in mqprio_dump(). __gnet_stats_copy_basic() and __gnet_stats_copy_queue() accumulate the values for per-CPU case but for global stats they overwrite the value, so only stats from the last loop iteration / tc end up in sch->[bq]stats. Use the on-stack [bq]stats variables again and add the stats manually in the global case. Fixes: ce679e8df7ed2 ("net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mqprio") Cc: John Fastabend Signed-off-by: Sebastian Andrzej Siewior https://lore.kernel.org/all/20211007175000.2334713-2-bigeasy@linutronix.de/ Signed-off-by: Jakub Kicinski --- net/sched/sch_mqprio.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 8766ab5b8788..5eb3b1b7ae5e 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -529,22 +529,28 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, for (i = tc.offset; i < tc.offset + tc.count; i++) { struct netdev_queue *q = netdev_get_tx_queue(dev, i); struct Qdisc *qdisc = rtnl_dereference(q->qdisc); - struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; - struct gnet_stats_queue __percpu *cpu_qstats = NULL; spin_lock_bh(qdisc_lock(qdisc)); - if (qdisc_is_percpu_stats(qdisc)) { - cpu_bstats = qdisc->cpu_bstats; - cpu_qstats = qdisc->cpu_qstats; - } - qlen = qdisc_qlen_sum(qdisc); - __gnet_stats_copy_basic(NULL, &sch->bstats, - cpu_bstats, &qdisc->bstats); - __gnet_stats_copy_queue(&sch->qstats, - cpu_qstats, - &qdisc->qstats, - qlen); + if (qdisc_is_percpu_stats(qdisc)) { + qlen = qdisc_qlen_sum(qdisc); + + __gnet_stats_copy_basic(NULL, &bstats, + qdisc->cpu_bstats, + &qdisc->bstats); + __gnet_stats_copy_queue(&qstats, + qdisc->cpu_qstats, + &qdisc->qstats, + qlen); + } else { + qlen += qdisc->q.qlen; + bstats.bytes += qdisc->bstats.bytes; + bstats.packets += qdisc->bstats.packets; + qstats.backlog += qdisc->qstats.backlog; + qstats.drops += qdisc->qstats.drops; + qstats.requeues += qdisc->qstats.requeues; + qstats.overlimits += qdisc->qstats.overlimits; + } spin_unlock_bh(qdisc_lock(qdisc)); } From 1f3e2e97c003f80c4b087092b225c8787ff91e4d Mon Sep 17 00:00:00 2001 From: Xiaolong Huang Date: Fri, 8 Oct 2021 14:58:30 +0800 Subject: [PATCH 168/235] isdn: cpai: check ctr->cnr to avoid array index out of bound The cmtp_add_connection() would add a cmtp session to a controller and run a kernel thread to process cmtp. __module_get(THIS_MODULE); session->task = kthread_run(cmtp_session, session, "kcmtpd_ctr_%d", session->num); During this process, the kernel thread would call detach_capi_ctr() to detach a register controller. if the controller was not attached yet, detach_capi_ctr() would trigger an array-index-out-bounds bug. [ 46.866069][ T6479] UBSAN: array-index-out-of-bounds in drivers/isdn/capi/kcapi.c:483:21 [ 46.867196][ T6479] index -1 is out of range for type 'capi_ctr *[32]' [ 46.867982][ T6479] CPU: 1 PID: 6479 Comm: kcmtpd_ctr_0 Not tainted 5.15.0-rc2+ #8 [ 46.869002][ T6479] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [ 46.870107][ T6479] Call Trace: [ 46.870473][ T6479] dump_stack_lvl+0x57/0x7d [ 46.870974][ T6479] ubsan_epilogue+0x5/0x40 [ 46.871458][ T6479] __ubsan_handle_out_of_bounds.cold+0x43/0x48 [ 46.872135][ T6479] detach_capi_ctr+0x64/0xc0 [ 46.872639][ T6479] cmtp_session+0x5c8/0x5d0 [ 46.873131][ T6479] ? __init_waitqueue_head+0x60/0x60 [ 46.873712][ T6479] ? cmtp_add_msgpart+0x120/0x120 [ 46.874256][ T6479] kthread+0x147/0x170 [ 46.874709][ T6479] ? set_kthread_struct+0x40/0x40 [ 46.875248][ T6479] ret_from_fork+0x1f/0x30 [ 46.875773][ T6479] Signed-off-by: Xiaolong Huang Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211008065830.305057-1-butterflyhuangxx@gmail.com Signed-off-by: Jakub Kicinski --- drivers/isdn/capi/kcapi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index cb0afe897162..7313454e403a 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -480,6 +480,11 @@ int detach_capi_ctr(struct capi_ctr *ctr) ctr_down(ctr, CAPI_CTR_DETACHED); + if (ctr->cnr < 1 || ctr->cnr - 1 >= CAPI_MAXCONTR) { + err = -EINVAL; + goto unlock_out; + } + if (capi_controller[ctr->cnr - 1] != ctr) { err = -EINVAL; goto unlock_out; From be0499369d6376e70b5b80bbced94c0c32d508b1 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 8 Oct 2021 06:11:31 -0700 Subject: [PATCH 169/235] net: mana: Fix error handling in mana_create_rxq() Fix error handling in mana_create_rxq() when cq->gdma_id >= gc->max_num_cqs. Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Signed-off-by: Haiyang Zhang Link: https://lore.kernel.org/r/1633698691-31721-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 1b21030308e5..030ae89f3a33 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1477,8 +1477,10 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, if (err) goto out; - if (cq->gdma_id >= gc->max_num_cqs) + if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { + err = -EINVAL; goto out; + } gc->cq_table[cq->gdma_id] = cq->gdma_cq; From 5c976a56570f29aaf4a2f9a1bf99789c252183c9 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 8 Oct 2021 12:38:01 -0700 Subject: [PATCH 170/235] ionic: don't remove netdev->dev_addr when syncing uc list Bridging, and possibly other upper stack gizmos, adds the lower device's netdev->dev_addr to its own uc list, and then requests it be deleted when the upper bridge device is removed. This delete request also happens with the bridging vlan_filtering is enabled and then disabled. Bonding has a similar behavior with the uc list, but since it also uses set_mac to manage netdev->dev_addr, it doesn't have the same the failure case. Because we store our netdev->dev_addr in our uc list, we need to ignore the delete request from dev_uc_sync so as to not lose the address and all hope of communicating. Note that ndo_set_mac_address is expressly changing netdev->dev_addr, so no limitation is set there. Fixes: 2a654540be10 ("ionic: Add Rx filter and rx_mode ndo support") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index ccf3ffcd3939..7f3322ce044c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1379,6 +1379,10 @@ static int ionic_addr_add(struct net_device *netdev, const u8 *addr) static int ionic_addr_del(struct net_device *netdev, const u8 *addr) { + /* Don't delete our own address from the uc list */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + return ionic_lif_list_addr(netdev_priv(netdev), addr, DEL_ADDR); } From 6510e80a0b81b5d814e3aea6297ba42f5e76f73c Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Sat, 9 Oct 2021 11:33:49 +0000 Subject: [PATCH 171/235] isdn: mISDN: Fix sleeping function called from invalid context The driver can call card->isac.release() function from an atomic context. Fix this by calling this function after releasing the lock. The following log reveals it: [ 44.168226 ] BUG: sleeping function called from invalid context at kernel/workqueue.c:3018 [ 44.168941 ] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 5475, name: modprobe [ 44.169574 ] INFO: lockdep is turned off. [ 44.169899 ] irq event stamp: 0 [ 44.170160 ] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [ 44.170627 ] hardirqs last disabled at (0): [] copy_process+0x132d/0x3e00 [ 44.171240 ] softirqs last enabled at (0): [] copy_process+0x135a/0x3e00 [ 44.171852 ] softirqs last disabled at (0): [<0000000000000000>] 0x0 [ 44.172318 ] Preemption disabled at: [ 44.172320 ] [] nj_release+0x69/0x500 [netjet] [ 44.174441 ] Call Trace: [ 44.174630 ] dump_stack_lvl+0xa8/0xd1 [ 44.174912 ] dump_stack+0x15/0x17 [ 44.175166 ] ___might_sleep+0x3a2/0x510 [ 44.175459 ] ? nj_release+0x69/0x500 [netjet] [ 44.175791 ] __might_sleep+0x82/0xe0 [ 44.176063 ] ? start_flush_work+0x20/0x7b0 [ 44.176375 ] start_flush_work+0x33/0x7b0 [ 44.176672 ] ? trace_irq_enable_rcuidle+0x85/0x170 [ 44.177034 ] ? kasan_quarantine_put+0xaa/0x1f0 [ 44.177372 ] ? kasan_quarantine_put+0xaa/0x1f0 [ 44.177711 ] __flush_work+0x11a/0x1a0 [ 44.177991 ] ? flush_work+0x20/0x20 [ 44.178257 ] ? lock_release+0x13c/0x8f0 [ 44.178550 ] ? __kasan_check_write+0x14/0x20 [ 44.178872 ] ? do_raw_spin_lock+0x148/0x360 [ 44.179187 ] ? read_lock_is_recursive+0x20/0x20 [ 44.179530 ] ? __kasan_check_read+0x11/0x20 [ 44.179846 ] ? do_raw_spin_unlock+0x55/0x900 [ 44.180168 ] ? ____kasan_slab_free+0x116/0x140 [ 44.180505 ] ? _raw_spin_unlock_irqrestore+0x41/0x60 [ 44.180878 ] ? skb_queue_purge+0x1a3/0x1c0 [ 44.181189 ] ? kfree+0x13e/0x290 [ 44.181438 ] flush_work+0x17/0x20 [ 44.181695 ] mISDN_freedchannel+0xe8/0x100 [ 44.182006 ] isac_release+0x210/0x260 [mISDNipac] [ 44.182366 ] nj_release+0xf6/0x500 [netjet] [ 44.182685 ] nj_remove+0x48/0x70 [netjet] [ 44.182989 ] pci_device_remove+0xa9/0x250 Signed-off-by: Zheyu Ma Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/netjet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c index 2a1ddd47a096..a52f275f8263 100644 --- a/drivers/isdn/hardware/mISDN/netjet.c +++ b/drivers/isdn/hardware/mISDN/netjet.c @@ -949,8 +949,8 @@ nj_release(struct tiger_hw *card) nj_disable_hwirq(card); mode_tiger(&card->bc[0], ISDN_P_NONE); mode_tiger(&card->bc[1], ISDN_P_NONE); - card->isac.release(&card->isac); spin_unlock_irqrestore(&card->lock, flags); + card->isac.release(&card->isac); release_region(card->base, card->base_s); card->base_s = 0; } From 1951b3f19cfe822709c890a337906823c223c7c3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Oct 2021 15:26:07 +0300 Subject: [PATCH 172/235] net: dsa: hold rtnl_lock in dsa_switch_setup_tag_protocol It was a documented fact that ds->ops->change_tag_protocol() offered rtnetlink mutex protection to the switch driver, since there was an ASSERT_RTNL right before the call in dsa_switch_change_tag_proto() (initiated from sysfs). The blamed commit introduced another call path for ds->ops->change_tag_protocol() which does not hold the rtnl_mutex. This is: dsa_tree_setup -> dsa_tree_setup_switches -> dsa_switch_setup -> dsa_switch_setup_tag_protocol -> ds->ops->change_tag_protocol() -> dsa_port_setup -> dsa_slave_create -> register_netdevice(slave_dev) -> dsa_tree_setup_master -> dsa_master_setup -> dev->dsa_ptr = cpu_dp The reason why the rtnl_mutex is held in the sysfs call path is to ensure that, once the master and all the DSA interfaces are down (which is required so that no packets flow), they remain down during the tagging protocol change. The above calling order illustrates the fact that it should not be risky to change the initial tagging protocol to the one specified in the device tree at the given time: - packets cannot enter the dsa_switch_rcv() packet type handler since netdev_uses_dsa() for the master will not yet return true, since dev->dsa_ptr has not yet been populated - packets cannot enter the dsa_slave_xmit() function because no DSA interface has yet been registered So from the DSA core's perspective, holding the rtnl_mutex is indeed not necessary. Yet, drivers may need to do things which need rtnl_mutex protection. For example: felix_set_tag_protocol -> felix_setup_tag_8021q -> dsa_tag_8021q_register -> dsa_tag_8021q_setup -> dsa_tag_8021q_port_setup -> vlan_vid_add -> ASSERT_RTNL These drivers do not really have a choice to take the rtnl_mutex themselves, since in the sysfs case, the rtnl_mutex is already held. Fixes: deff710703d8 ("net: dsa: Allow default tag protocol to be overridden from DT") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 6d5cc0217133..da18094b5a04 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -811,7 +811,9 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds) if (!dsa_is_cpu_port(ds, port)) continue; + rtnl_lock(); err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto); + rtnl_unlock(); if (err) { dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n", tag_ops->name, ERR_PTR(err)); From a5a14ea7b4e55604acb0dc9d88fdb4cb6945bc77 Mon Sep 17 00:00:00 2001 From: chongjiapeng Date: Sat, 9 Oct 2021 16:09:26 +0800 Subject: [PATCH 173/235] qed: Fix missing error code in qed_slowpath_start() The error code is missing in this code scenario, add the error code '-EINVAL' to the return value 'rc'. Eliminate the follow smatch warning: drivers/net/ethernet/qlogic/qed/qed_main.c:1298 qed_slowpath_start() warn: missing error code 'rc'. Reported-by: Abaci Robot Fixes: d51e4af5c209 ("qed: aRFS infrastructure support") Signed-off-by: chongjiapeng Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 15ef59aa34ff..d10e1cd6d2ba 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1299,6 +1299,7 @@ static int qed_slowpath_start(struct qed_dev *cdev, } else { DP_NOTICE(cdev, "Failed to acquire PTT for aRFS\n"); + rc = -EINVAL; goto err; } } From f49823939e41121fdffada4d583e3e38d28336f9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 8 Oct 2021 14:42:52 -0700 Subject: [PATCH 174/235] net: phy: Do not shutdown PHYs in READY state In case a PHY device was probed thus in the PHY_READY state, but not configured and with no network device attached yet, we should not be trying to shut it down because it has been brought back into reset by phy_device_reset() towards the end of phy_probe() and anyway we have not configured the PHY yet. Fixes: e2f016cf7751 ("net: phy: add a shutdown procedure") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ba5ad86ec826..4f9990b47a37 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3125,6 +3125,9 @@ static void phy_shutdown(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); + if (phydev->state == PHY_READY || !phydev->attached_dev) + return; + phy_disable_interrupts(phydev); } From 732b74d647048668f0f8dc0c848f0746c69e2e2f Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Sat, 9 Oct 2021 05:17:53 -0400 Subject: [PATCH 175/235] virtio-net: fix for skb_over_panic inside big mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 126285651b7f ("Merge ra.kernel.org:/pub/scm/linux/kernel/git/netdev/net") accidentally reverted the effect of commit 1a8024239da ("virtio-net: fix for skb_over_panic inside big mode") on drivers/net/virtio_net.c As a result, users of crosvm (which is using large packet mode) are experiencing crashes with 5.14-rc1 and above that do not occur with 5.13. Crash trace: [ 61.346677] skbuff: skb_over_panic: text:ffffffff881ae2c7 len:3762 put:3762 head:ffff8a5ec8c22000 data:ffff8a5ec8c22010 tail:0xec2 end:0xec0 dev: [ 61.369192] kernel BUG at net/core/skbuff.c:111! [ 61.372840] invalid opcode: 0000 [#1] SMP PTI [ 61.374892] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 5.14.0-rc1 linux-v5.14-rc1-for-mesa-ci.tar.bz2 #1 [ 61.376450] Hardware name: ChromiumOS crosvm, BIOS 0 .. [ 61.393635] Call Trace: [ 61.394127] [ 61.394488] skb_put.cold+0x10/0x10 [ 61.395095] page_to_skb+0xf7/0x410 [ 61.395689] receive_buf+0x81/0x1660 [ 61.396228] ? netif_receive_skb_list_internal+0x1ad/0x2b0 [ 61.397180] ? napi_gro_flush+0x97/0xe0 [ 61.397896] ? detach_buf_split+0x67/0x120 [ 61.398573] virtnet_poll+0x2cf/0x420 [ 61.399197] __napi_poll+0x25/0x150 [ 61.399764] net_rx_action+0x22f/0x280 [ 61.400394] __do_softirq+0xba/0x257 [ 61.401012] irq_exit_rcu+0x8e/0xb0 [ 61.401618] common_interrupt+0x7b/0xa0 [ 61.402270] See https://lore.kernel.org/r/5edaa2b7c2fe4abd0347b8454b2ac032b6694e2c.camel%40collabora.com for the report. Apply the original 1a8024239da ("virtio-net: fix for skb_over_panic inside big mode") again, the original logic still holds: In virtio-net's large packet mode, there is a hole in the space behind buf. hdr_padded_len - hdr_len We must take this into account when calculating tailroom. Cc: Greg KH Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom") Fixes: 126285651b7f ("Merge ra.kernel.org:/pub/scm/linux/kernel/git/netdev/net") Signed-off-by: Xuan Zhuo Reported-by: Corentin Noël Tested-by: Corentin Noël Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 79bd2585ec6b..4ad25a8b0870 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -406,7 +406,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, * add_recvbuf_mergeable() + get_mergeable_buf_len() */ truesize = headroom ? PAGE_SIZE : truesize; - tailroom = truesize - len - headroom; + tailroom = truesize - len - headroom - (hdr_padded_len - hdr_len); buf = p - headroom; len -= hdr_len; From 64570fbc14f8d7cb3fe3995f20e26bc25ce4b2cc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Oct 2021 17:01:59 -0700 Subject: [PATCH 176/235] Linux 5.15-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7b74223d1309..4d0c0ed9236e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Opossums on Parade # *DOCUMENTATION* From 023a062f238129e8a542b5163c4350ceb076283e Mon Sep 17 00:00:00 2001 From: Cameron Berkenpas Date: Sun, 10 Oct 2021 15:54:11 -0700 Subject: [PATCH 177/235] ALSA: hda/realtek: Fix for quirk to enable speaker output on the Lenovo 13s Gen2 The previous patch's HDA verb initialization for the Lenovo 13s sequence was slightly off. This updated verb sequence has been tested and confirmed working. Fixes: ad7cc2d41b7a ("ALSA: hda/realtek: Quirks to enable speaker output for Lenovo Legion 7i 15IMHG05, Yoga 7i 14ITL5/15ITL5, and 13s Gen2 laptops.") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=208555 Cc: Signed-off-by: Cameron Berkenpas Link: https://lore.kernel.org/r/20211010225410.23423-1-cam@neo-zeon.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 73e7a92c3728..bca5830ff706 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8367,7 +8367,7 @@ static const struct hda_fixup alc269_fixups[] = { .v.verbs = (const struct hda_verb[]) { { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 }, { 0x20, AC_VERB_SET_PROC_COEF, 0x41 }, - { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, { 0x20, AC_VERB_SET_PROC_COEF, 0x2 }, { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, From 9b024201693e397441668cca0d2df7055fe572eb Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Mon, 27 Sep 2021 17:22:13 +0300 Subject: [PATCH 178/235] platform/mellanox: mlxreg-io: Fix argument base in kstrtou32() call Change kstrtou32() argument 'base' to be zero instead of 'len'. It works by chance for setting one bit value, but it is not supposed to work in case value passed to mlxreg_io_attr_store() is greater than 1. It works for example, for: echo 1 > /sys/devices/platform/mlxplat/mlxreg-io/hwmon/.../jtag_enable But it will fail for: echo n > /sys/devices/platform/mlxplat/mlxreg-io/hwmon/.../jtag_enable, where n > 1. The flow for input buffer conversion is as below: _kstrtoull(const char *s, unsigned int base, unsigned long long *res) calls: rv = _parse_integer(s, base, &_res); For the second case, where n > 1: - _parse_integer() converts 's' to 'val'. For n=2, 'len' is set to 2 (string buffer is 0x32 0x0a), for n=3 'len' is set to 3 (string buffer 0x33 0x0a), etcetera. - 'base' is equal or greater then '2' (length of input buffer). As a result, _parse_integer() exits with result zero (rv): rv = 0; while (1) { ... if (val >= base)-> (2 >= 2) break; ... rv++; ... } And _kstrtoull() in their turn will fail: if (rv == 0) return -EINVAL; Fixes: 5ec4a8ace06c ("platform/mellanox: Introduce support for Mellanox register access driver") Signed-off-by: Vadim Pasternak Link: https://lore.kernel.org/r/20210927142214.2613929-2-vadimp@nvidia.com Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxreg-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/mellanox/mlxreg-io.c b/drivers/platform/mellanox/mlxreg-io.c index 7646708d57e4..a023ec02126b 100644 --- a/drivers/platform/mellanox/mlxreg-io.c +++ b/drivers/platform/mellanox/mlxreg-io.c @@ -141,7 +141,7 @@ mlxreg_io_attr_store(struct device *dev, struct device_attribute *attr, return -EINVAL; /* Convert buffer to input value. */ - ret = kstrtou32(buf, len, &input_val); + ret = kstrtou32(buf, 0, &input_val); if (ret) return ret; From db9cc7d6f95e7d89b0ce57e785cfd9d67a7505d8 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Mon, 27 Sep 2021 17:22:14 +0300 Subject: [PATCH 179/235] platform/mellanox: mlxreg-io: Fix read access of n-bytes size attributes Fix shift argument for function rol32(). It should be provided in bits, while was provided in bytes. Fixes: 86148190a7db ("platform/mellanox: mlxreg-io: Add support for complex attributes") Signed-off-by: Vadim Pasternak Link: https://lore.kernel.org/r/20210927142214.2613929-3-vadimp@nvidia.com Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxreg-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/mellanox/mlxreg-io.c b/drivers/platform/mellanox/mlxreg-io.c index a023ec02126b..a916cd89cbbe 100644 --- a/drivers/platform/mellanox/mlxreg-io.c +++ b/drivers/platform/mellanox/mlxreg-io.c @@ -98,7 +98,7 @@ mlxreg_io_get_reg(void *regmap, struct mlxreg_core_data *data, u32 in_val, if (ret) goto access_error; - *regval |= rol32(val, regsize * i); + *regval |= rol32(val, regsize * i * 8); } } From 92813dafcd8cae40b6256fd9392a44ecd5c9f505 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 11 Oct 2021 15:23:38 +0200 Subject: [PATCH 180/235] platform/x86: dell: Make DELL_WMI_PRIVACY depend on DELL_WMI DELL_WMI_PRIVACY is a feature toggle for the main dell-wmi driver, so it must depend on the Kconfig option which enables the main dell-wmi driver. Fixes: 8af9fa37b8a3 ("platform/x86: dell-privacy: Add support for Dell hardware privacy") Reported-by: Randy Dunlap Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211011132338.407571-1-hdegoede@redhat.com --- drivers/platform/x86/dell/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig index 42513eab1d06..2fffa57e596e 100644 --- a/drivers/platform/x86/dell/Kconfig +++ b/drivers/platform/x86/dell/Kconfig @@ -167,6 +167,7 @@ config DELL_WMI config DELL_WMI_PRIVACY bool "Dell WMI Hardware Privacy Support" depends on LEDS_TRIGGER_AUDIO = y || DELL_WMI = LEDS_TRIGGER_AUDIO + depends on DELL_WMI help This option adds integration with the "Dell Hardware Privacy" feature of Dell laptops to the dell-wmi driver. From 41512e4dc0b84525495e784295092592adb87f1b Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Tue, 28 Sep 2021 03:19:30 -0700 Subject: [PATCH 181/235] platform/x86: intel_scu_ipc: Fix busy loop expiry time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macro IPC_TIMEOUT is already in jiffies (it is also used like that elsewhere in the file when calling wait_for_completion_timeout()). Don’t convert it using helper functions for the purposes of calculating the busy loop expiry time. Fixes: e7b7ab3847c9 (“platform/x86: intel_scu_ipc: Sleeping is fine when polling”) Signed-off-by: Prashant Malani Cc: Benson Leung Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/r/20210928101932.2543937-2-pmalani@chromium.org Signed-off-by: Hans de Goede --- drivers/platform/x86/intel_scu_ipc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index bfa0cc20750d..cfb249da2a7b 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -232,7 +232,7 @@ static inline u32 ipc_data_readl(struct intel_scu_ipc_dev *scu, u32 offset) /* Wait till scu status is busy */ static inline int busy_loop(struct intel_scu_ipc_dev *scu) { - unsigned long end = jiffies + msecs_to_jiffies(IPC_TIMEOUT); + unsigned long end = jiffies + IPC_TIMEOUT; do { u32 status; From 5c02b581ce84eea240d25c8318a1f65133a04415 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Tue, 28 Sep 2021 03:19:32 -0700 Subject: [PATCH 182/235] platform/x86: intel_scu_ipc: Increase virtual timeout to 10s Commit a7d53dbbc70a ("platform/x86: intel_scu_ipc: Increase virtual timeout from 3 to 5 seconds") states that the recommended timeout range is 5-10 seconds. Adjust the timeout value to the higher of those i.e 10 seconds, to account for situations where the 5 seconds is insufficient for disconnect command success. Signed-off-by: Prashant Malani Cc: Benson Leung Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/r/20210928101932.2543937-3-pmalani@chromium.org Signed-off-by: Hans de Goede --- drivers/platform/x86/intel_scu_ipc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index cfb249da2a7b..d71a1dce781c 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -75,7 +75,7 @@ struct intel_scu_ipc_dev { #define IPC_READ_BUFFER 0x90 /* Timeout in jiffies */ -#define IPC_TIMEOUT (5 * HZ) +#define IPC_TIMEOUT (10 * HZ) static struct intel_scu_ipc_dev *ipcdev; /* Only one for now */ static DEFINE_MUTEX(ipclock); /* lock used to prevent multiple call to SCU */ From a0c5814b9933f25ecb6de169483c5b88cf632bca Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Tue, 28 Sep 2021 03:19:34 -0700 Subject: [PATCH 183/235] platform/x86: intel_scu_ipc: Update timeout value in comment The comment decribing the IPC timeout hadn't been updated when the actual timeout was changed from 3 to 5 seconds in commit a7d53dbbc70a ("platform/x86: intel_scu_ipc: Increase virtual timeout from 3 to 5 seconds") . Since the value is anyway updated to 10s now, take this opportunity to update the value in the comment too. Signed-off-by: Prashant Malani Cc: Benson Leung Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/r/20210928101932.2543937-4-pmalani@chromium.org Signed-off-by: Hans de Goede --- drivers/platform/x86/intel_scu_ipc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index d71a1dce781c..7cc9089d1e14 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -247,7 +247,7 @@ static inline int busy_loop(struct intel_scu_ipc_dev *scu) return -ETIMEDOUT; } -/* Wait till ipc ioc interrupt is received or timeout in 3 HZ */ +/* Wait till ipc ioc interrupt is received or timeout in 10 HZ */ static inline int ipc_wait_for_interrupt(struct intel_scu_ipc_dev *scu) { int status; From c0d84d2c7c23e9cf23a5abdda40eeaa79eabfe69 Mon Sep 17 00:00:00 2001 From: Sachi King Date: Sat, 2 Oct 2021 14:18:39 +1000 Subject: [PATCH 184/235] platform/x86: amd-pmc: Add alternative acpi id for PMC controller The Surface Laptop 4 AMD has used the AMD0005 to identify this controller instead of using the appropriate ACPI ID AMDI0005. Include AMD0005 in the acpi id list. Link: https://github.com/linux-surface/acpidumps/tree/master/surface_laptop_4_amd Link: https://gist.github.com/nakato/2a1a7df1a45fe680d7a08c583e1bf863 Cc: # 5.14+ Signed-off-by: Sachi King Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20211002041840.2058647-1-nakato@nakato.io Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index d6a7c896ac86..fc95620101e8 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -476,6 +476,7 @@ static const struct acpi_device_id amd_pmc_acpi_ids[] = { {"AMDI0006", 0}, {"AMDI0007", 0}, {"AMD0004", 0}, + {"AMD0005", 0}, { } }; MODULE_DEVICE_TABLE(acpi, amd_pmc_acpi_ids); From 0f607d6b227470456a69a37d7c7badea51d52844 Mon Sep 17 00:00:00 2001 From: "Zephaniah E. Loss-Cutler-Hull" Date: Mon, 4 Oct 2021 21:48:55 -0700 Subject: [PATCH 185/235] platform/x86: gigabyte-wmi: add support for B550 AORUS ELITE AX V2 This works just fine on my system. Signed-off-by: Zephaniah E. Loss-Cutler-Hull Cc: Link: https://lore.kernel.org/r/20211005044855.1429724-1-zephaniah@gmail.com Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index d53634c8a6e0..658bab4b7964 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -141,6 +141,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev) static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M S2H V2"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE AX V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"), From c005828744f584bfcd2cf3ed64dfef15a5078960 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Fri, 8 Oct 2021 23:46:08 +0100 Subject: [PATCH 186/235] platform/x86: intel_skl_int3472: Correct null check The int3472-discrete driver can enter an error path after initialising int3472->clock.ena_gpio, but before it has registered the clock. This will cause a NULL pointer dereference, because clkdev_drop() is not null aware. Instead of guarding the call to skl_int3472_unregister_clock() by checking for .ena_gpio, check specifically for the presence of the clk_lookup, which will guarantee clkdev_create() has already been called. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=214453 Fixes: 7540599a5ef1 ("platform/x86: intel_skl_int3472: Provide skl_int3472_unregister_clock()") Signed-off-by: Daniel Scally Link: https://lore.kernel.org/r/20211008224608.415949-1-djrscally@gmail.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c b/drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c index 9fe0a2527e1c..e59d79c7e82f 100644 --- a/drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c +++ b/drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c @@ -401,7 +401,7 @@ int skl_int3472_discrete_remove(struct platform_device *pdev) gpiod_remove_lookup_table(&int3472->gpios); - if (int3472->clock.ena_gpio) + if (int3472->clock.cl) skl_int3472_unregister_clock(int3472); gpiod_put(int3472->clock.ena_gpio); From 7df227847ab562c42d318bceccebb0c911c87b04 Mon Sep 17 00:00:00 2001 From: Shravan S Date: Wed, 6 Oct 2021 13:05:25 +0530 Subject: [PATCH 187/235] platform/x86: int1092: Fix non sequential device mode handling SAR information from BIOS may come in non sequential pattern. To overcome the issue, a check is made to extract the right SAR information using the device mode which is currently being used. Remove .owner field if calls are used which set it automatically. Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Shravan S Link: https://lore.kernel.org/r/20211006073525.1332925-1-s.shravan@intel.com Signed-off-by: Hans de Goede Reviewed-by: Hans de Goede --- MAINTAINERS | 2 +- .../platform/x86/intel/int1092/intel_sar.c | 21 ++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index eeb4c70b3d5b..6cfad2bf9d81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9302,7 +9302,7 @@ S: Maintained F: drivers/platform/x86/intel/atomisp2/led.c INTEL BIOS SAR INT1092 DRIVER -M: Shravan S +M: Shravan Sudhakar M: Intel Corporation L: platform-driver-x86@vger.kernel.org S: Maintained diff --git a/drivers/platform/x86/intel/int1092/intel_sar.c b/drivers/platform/x86/intel/int1092/intel_sar.c index 379560fe5df9..e03943e6380a 100644 --- a/drivers/platform/x86/intel/int1092/intel_sar.c +++ b/drivers/platform/x86/intel/int1092/intel_sar.c @@ -42,12 +42,20 @@ static void update_sar_data(struct wwan_sar_context *context) if (config->device_mode_info && context->sar_data.device_mode < config->total_dev_mode) { - struct wwan_device_mode_info *dev_mode = - &config->device_mode_info[context->sar_data.device_mode]; + int itr = 0; - context->sar_data.antennatable_index = dev_mode->antennatable_index; - context->sar_data.bandtable_index = dev_mode->bandtable_index; - context->sar_data.sartable_index = dev_mode->sartable_index; + for (itr = 0; itr < config->total_dev_mode; itr++) { + if (context->sar_data.device_mode == + config->device_mode_info[itr].device_mode) { + struct wwan_device_mode_info *dev_mode = + &config->device_mode_info[itr]; + + context->sar_data.antennatable_index = dev_mode->antennatable_index; + context->sar_data.bandtable_index = dev_mode->bandtable_index; + context->sar_data.sartable_index = dev_mode->sartable_index; + break; + } + } } } @@ -305,7 +313,6 @@ static struct platform_driver sar_driver = { .remove = sar_remove, .driver = { .name = DRVNAME, - .owner = THIS_MODULE, .acpi_match_table = ACPI_PTR(sar_device_ids) } }; @@ -313,4 +320,4 @@ module_platform_driver(sar_driver); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Platform device driver for INTEL MODEM BIOS SAR"); -MODULE_AUTHOR("Shravan S "); +MODULE_AUTHOR("Shravan Sudhakar "); From 228af5a4fa3a8293bd8b7ac5cf59548ee29627bf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 10 Oct 2021 09:55:46 +0200 Subject: [PATCH 188/235] ALSA: pcm: Workaround for a wrong offset in SYNC_PTR compat ioctl Michael Forney reported an incorrect padding type that was defined in the commit 80fe7430c708 ("ALSA: add new 32-bit layout for snd_pcm_mmap_status/control") for PCM control mmap data. His analysis is correct, and this caused the misplacements of PCM control data on 32bit arch and 32bit compat mode. The bug is that the __pad2 definition in __snd_pcm_mmap_control64 struct was wrongly with __pad_before_uframe, which should have been __pad_after_uframe instead. This struct is used in SYNC_PTR ioctl and control mmap. Basically this bug leads to two problems: - The offset of avail_min field becomes wrong, it's placed right after appl_ptr without padding on little-endian - When appl_ptr and avail_min are read as 64bit values in kernel side, the values become either zero or corrupted (mixed up) One good news is that, because both user-space and kernel misunderstand the wrong offset, at least, 32bit application running on 32bit kernel works as is. Also, 64bit applications are unaffected because the padding size is zero. The remaining problem is the 32bit compat mode; as mentioned in the above, avail_min is placed right after appl_ptr on little-endian archs, 64bit kernel reads bogus values for appl_ptr updates, which may lead to streaming bugs like jumping, XRUN or whatever unexpected. (However, we haven't heard any serious bug reports due to this over years, so practically seen, it's fairly safe to assume that the impact by this bug is limited.) Ideally speaking, we should correct the wrong mmap status control definition. But this would cause again incompatibility with the existing binaries, and fixing it (e.g. by renumbering ioctls) would be really messy. So, as of this patch, we only correct the behavior of 32bit compat mode and keep the rest as is. Namely, the SYNC_PTR ioctl is now handled differently in compat mode to read/write the 32bit values at the right offsets. The control mmap of 32bit apps on 64bit kernels has been already disabled (which is likely rather an overlook, but this worked fine at this time :), so covering SYNC_PTR ioctl should suffice as a fallback. Fixes: 80fe7430c708 ("ALSA: add new 32-bit layout for snd_pcm_mmap_status/control") Reported-by: Michael Forney Reviewed-by: Arnd Bergmann Cc: Cc: Rich Felker Link: https://lore.kernel.org/r/29QBMJU8DE71E.2YZSH8IHT5HMH@mforney.org Link: https://lore.kernel.org/r/20211010075546.23220-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/pcm_compat.c | 72 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index a59de24695ec..dfe5a64e19d2 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -468,6 +468,76 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream, } #endif /* CONFIG_X86_X32 */ +#ifdef __BIG_ENDIAN +typedef char __pad_before_u32[4]; +typedef char __pad_after_u32[0]; +#else +typedef char __pad_before_u32[0]; +typedef char __pad_after_u32[4]; +#endif + +/* PCM 2.0.15 API definition had a bug in mmap control; it puts the avail_min + * at the wrong offset due to a typo in padding type. + * The bug hits only 32bit. + * A workaround for incorrect read/write is needed only in 32bit compat mode. + */ +struct __snd_pcm_mmap_control64_buggy { + __pad_before_u32 __pad1; + __u32 appl_ptr; + __pad_before_u32 __pad2; /* SiC! here is the bug */ + __pad_before_u32 __pad3; + __u32 avail_min; + __pad_after_uframe __pad4; +}; + +static int snd_pcm_ioctl_sync_ptr_buggy(struct snd_pcm_substream *substream, + struct snd_pcm_sync_ptr __user *_sync_ptr) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct snd_pcm_sync_ptr sync_ptr; + struct __snd_pcm_mmap_control64_buggy *sync_cp; + volatile struct snd_pcm_mmap_status *status; + volatile struct snd_pcm_mmap_control *control; + int err; + + memset(&sync_ptr, 0, sizeof(sync_ptr)); + sync_cp = (struct __snd_pcm_mmap_control64_buggy *)&sync_ptr.c.control; + if (get_user(sync_ptr.flags, (unsigned __user *)&(_sync_ptr->flags))) + return -EFAULT; + if (copy_from_user(sync_cp, &(_sync_ptr->c.control), sizeof(*sync_cp))) + return -EFAULT; + status = runtime->status; + control = runtime->control; + if (sync_ptr.flags & SNDRV_PCM_SYNC_PTR_HWSYNC) { + err = snd_pcm_hwsync(substream); + if (err < 0) + return err; + } + snd_pcm_stream_lock_irq(substream); + if (!(sync_ptr.flags & SNDRV_PCM_SYNC_PTR_APPL)) { + err = pcm_lib_apply_appl_ptr(substream, sync_cp->appl_ptr); + if (err < 0) { + snd_pcm_stream_unlock_irq(substream); + return err; + } + } else { + sync_cp->appl_ptr = control->appl_ptr; + } + if (!(sync_ptr.flags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN)) + control->avail_min = sync_cp->avail_min; + else + sync_cp->avail_min = control->avail_min; + sync_ptr.s.status.state = status->state; + sync_ptr.s.status.hw_ptr = status->hw_ptr; + sync_ptr.s.status.tstamp = status->tstamp; + sync_ptr.s.status.suspended_state = status->suspended_state; + sync_ptr.s.status.audio_tstamp = status->audio_tstamp; + snd_pcm_stream_unlock_irq(substream); + if (copy_to_user(_sync_ptr, &sync_ptr, sizeof(sync_ptr))) + return -EFAULT; + return 0; +} + /* */ enum { @@ -537,7 +607,7 @@ static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned l if (in_x32_syscall()) return snd_pcm_ioctl_sync_ptr_x32(substream, argp); #endif /* CONFIG_X86_X32 */ - return snd_pcm_common_ioctl(file, substream, cmd, argp); + return snd_pcm_ioctl_sync_ptr_buggy(substream, argp); case SNDRV_PCM_IOCTL_HW_REFINE32: return snd_pcm_ioctl_hw_params_compat(substream, 1, argp); case SNDRV_PCM_IOCTL_HW_PARAMS32: From 57116ce17b04fde2fe30f0859df69d8dbe5809f6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 6 Oct 2021 13:58:52 +0200 Subject: [PATCH 189/235] workqueue: fix state-dump console deadlock Console drivers often queue work while holding locks also taken in their console write paths, something which can lead to deadlocks on SMP when dumping workqueue state (e.g. sysrq-t or on suspend failures). For serial console drivers this could look like: CPU0 CPU1 ---- ---- show_workqueue_state(); lock(&pool->lock); lock(&port->lock); schedule_work(); lock(&pool->lock); printk(); lock(console_owner); lock(&port->lock); where workqueues are, for example, used to push data to the line discipline, process break signals and handle modem-status changes. Line disciplines and serdev drivers can also queue work on write-wakeup notifications, etc. Reworking every console driver to avoid queuing work while holding locks also taken in their write paths would complicate drivers and is neither desirable or feasible. Instead use the deferred-printk mechanism to avoid printing while holding pool locks when dumping workqueue state. Note that there are a few WARN_ON() assertions in the workqueue code which could potentially also trigger a deadlock. Hopefully the ongoing printk rework will provide a general solution for this eventually. This was originally reported after a lockdep splat when executing sysrq-t with the imx serial driver. Fixes: 3494fc30846d ("workqueue: dump workqueues on sysrq-t") Cc: stable@vger.kernel.org # 4.0 Reported-by: Fabio Estevam Tested-by: Fabio Estevam Signed-off-by: Johan Hovold Reviewed-by: John Ogness Signed-off-by: Tejun Heo --- kernel/workqueue.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 33a6b4a2443d..1b3eb1e9531f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4830,8 +4830,16 @@ void show_workqueue_state(void) for_each_pwq(pwq, wq) { raw_spin_lock_irqsave(&pwq->pool->lock, flags); - if (pwq->nr_active || !list_empty(&pwq->inactive_works)) + if (pwq->nr_active || !list_empty(&pwq->inactive_works)) { + /* + * Defer printing to avoid deadlocks in console + * drivers that queue work while holding locks + * also taken in their write paths. + */ + printk_deferred_enter(); show_pwq(pwq); + printk_deferred_exit(); + } raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); /* * We could be printing a lot from atomic context, e.g. @@ -4849,7 +4857,12 @@ void show_workqueue_state(void) raw_spin_lock_irqsave(&pool->lock, flags); if (pool->nr_workers == pool->nr_idle) goto next_pool; - + /* + * Defer printing to avoid deadlocks in console drivers that + * queue work while holding locks also taken in their write + * paths. + */ + printk_deferred_enter(); pr_info("pool %d:", pool->id); pr_cont_pool_info(pool); pr_cont(" hung=%us workers=%d", @@ -4864,6 +4877,7 @@ void show_workqueue_state(void) first = false; } pr_cont("\n"); + printk_deferred_exit(); next_pool: raw_spin_unlock_irqrestore(&pool->lock, flags); /* From 2e5809a4ddb15969503e43b06662a9a725f613ea Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 5 Oct 2021 13:25:29 -0700 Subject: [PATCH 190/235] arm64/hugetlb: fix CMA gigantic page order for non-4K PAGE_SIZE For non-4K PAGE_SIZE configs, the largest gigantic huge page size is CONT_PMD_SHIFT order. On arm64 with 64K PAGE_SIZE, the gigantic page is 16G. Therefore, one should be able to specify 'hugetlb_cma=16G' on the kernel command line so that one gigantic page can be allocated from CMA. However, when adding such an option the following message is produced: hugetlb_cma: cma area should be at least 8796093022208 MiB This is because the calculation for non-4K gigantic page order is incorrect in the arm64 specific routine arm64_hugetlb_cma_reserve(). Fixes: abb7962adc80 ("arm64/hugetlb: Reserve CMA areas for gigantic pages on 16K and 64K configs") Cc: # 5.9.x Signed-off-by: Mike Kravetz Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20211005202529.213812-1-mike.kravetz@oracle.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 23505fc35324..a8158c948966 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -43,7 +43,7 @@ void __init arm64_hugetlb_cma_reserve(void) #ifdef CONFIG_ARM64_4K_PAGES order = PUD_SHIFT - PAGE_SHIFT; #else - order = CONT_PMD_SHIFT + PMD_SHIFT - PAGE_SHIFT; + order = CONT_PMD_SHIFT - PAGE_SHIFT; #endif /* * HugeTLB CMA reservation is required for gigantic From 0edf0824e0dc359ed76bf96af986e6570ca2c0b9 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 8 Oct 2021 14:59:45 -0700 Subject: [PATCH 191/235] af_unix: Rename UNIX-DGRAM to UNIX to maintain backwards compatability Then name of this protocol changed in commit 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") because that commit added stream support to the af_unix protocol. Renaming the existing protocol makes a ChromeOS protocol test[1] fail now that the name has changed in /proc/net/protocols from "UNIX" to "UNIX-DGRAM". Let's put the name back to how it was while keeping the stream protocol as "UNIX-STREAM" so that the procfs interface doesn't change. This fixes the test and maintains backwards compatibility in proc. Cc: Jiang Wang Cc: Andrii Nakryiko Cc: Cong Wang Cc: Jakub Sitnicki Cc: John Fastabend Cc: Dmitry Osipenko Link: https://source.chromium.org/chromiumos/chromiumos/codesearch/+/main:src/platform/tast-tests/src/chromiumos/tast/local/bundles/cros/network/supported_protocols.go;l=50;drc=e8b1c3f94cb40a054f4aa1ef1aff61e75dc38f18 [1] Fixes: 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") Signed-off-by: Stephen Boyd Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0878ab86597b..89f9e85ae970 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -828,7 +828,7 @@ static void unix_unhash(struct sock *sk) } struct proto unix_dgram_proto = { - .name = "UNIX-DGRAM", + .name = "UNIX", .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), .close = unix_close, From 74a3bc42fe514098030a78c1ad5e6024463dd378 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Mon, 11 Oct 2021 10:27:41 +0800 Subject: [PATCH 192/235] net: mscc: ocelot: Fix dumplicated argument in ocelot Fix the following coccicheck warning: drivers/net/ethernet/mscc/ocelot.c:474:duplicated argument to & or | drivers/net/ethernet/mscc/ocelot.c:476:duplicated argument to & or | drivers/net/ethernet/mscc/ocelot_net.c:1627:duplicated argument to & or | These DEV_CLOCK_CFG_MAC_TX_RST are duplicate here. Here should be DEV_CLOCK_CFG_MAC_RX_RST. Fixes: e6e12df625f2 ("net: mscc: ocelot: convert to phylink") Signed-off-by: Wan Jiabing Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 4 ++-- drivers/net/ethernet/mscc/ocelot_net.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 559177e6ded4..4de58321907c 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -472,9 +472,9 @@ void ocelot_phylink_mac_link_down(struct ocelot *ocelot, int port, !(quirks & OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP)) ocelot_port_rmwl(ocelot_port, DEV_CLOCK_CFG_MAC_TX_RST | - DEV_CLOCK_CFG_MAC_TX_RST, + DEV_CLOCK_CFG_MAC_RX_RST, DEV_CLOCK_CFG_MAC_TX_RST | - DEV_CLOCK_CFG_MAC_TX_RST, + DEV_CLOCK_CFG_MAC_RX_RST, DEV_CLOCK_CFG); } EXPORT_SYMBOL_GPL(ocelot_phylink_mac_link_down); diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index e54b9fb2a97a..2a85bcb5d0c2 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1625,7 +1625,7 @@ static int ocelot_port_phylink_create(struct ocelot *ocelot, int port, if (phy_mode == PHY_INTERFACE_MODE_QSGMII) ocelot_port_rmwl(ocelot_port, 0, DEV_CLOCK_CFG_MAC_TX_RST | - DEV_CLOCK_CFG_MAC_TX_RST, + DEV_CLOCK_CFG_MAC_RX_RST, DEV_CLOCK_CFG); ocelot_port->phy_mode = phy_mode; From 4a3e0aeddf091f00974b02627c157843ce382a24 Mon Sep 17 00:00:00 2001 From: Maarten Zanders Date: Mon, 11 Oct 2021 16:27:20 +0200 Subject: [PATCH 193/235] net: dsa: mv88e6xxx: don't use PHY_DETECT on internal PHY's mv88e6xxx_port_ppu_updates() interpretes data in the PORT_STS register incorrectly for internal ports (ie no PPU). In these cases, the PHY_DETECT bit indicates link status. This results in forcing the MAC state whenever the PHY link goes down which is not intended. As a side effect, LED's configured to show link status stay lit even though the physical link is down. Add a check in mac_link_down and mac_link_up to see if it concerns an external port and only then, look at PPU status. Fixes: 5d5b231da7ac (net: dsa: mv88e6xxx: use PHY_DETECT in mac_link_up/mac_link_down) Reported-by: Maarten Zanders Reviewed-by: Maxime Chevallier Signed-off-by: Maarten Zanders Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d7b29792732b..8dadcae93c9b 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -750,7 +750,11 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port, ops = chip->info->ops; mv88e6xxx_reg_lock(chip); - if ((!mv88e6xxx_port_ppu_updates(chip, port) || + /* Internal PHYs propagate their configuration directly to the MAC. + * External PHYs depend on whether the PPU is enabled for this port. + */ + if (((!mv88e6xxx_phy_is_internal(ds, port) && + !mv88e6xxx_port_ppu_updates(chip, port)) || mode == MLO_AN_FIXED) && ops->port_sync_link) err = ops->port_sync_link(chip, port, mode, false); mv88e6xxx_reg_unlock(chip); @@ -773,7 +777,12 @@ static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port, ops = chip->info->ops; mv88e6xxx_reg_lock(chip); - if (!mv88e6xxx_port_ppu_updates(chip, port) || mode == MLO_AN_FIXED) { + /* Internal PHYs propagate their configuration directly to the MAC. + * External PHYs depend on whether the PPU is enabled for this port. + */ + if ((!mv88e6xxx_phy_is_internal(ds, port) && + !mv88e6xxx_port_ppu_updates(chip, port)) || + mode == MLO_AN_FIXED) { /* FIXME: for an automedia port, should we force the link * down here - what if the link comes up due to "other" media * while we're bringing the port up, how is the exclusivity From 9973a43012b6ad1720dbc4d5faf5302c28635b8c Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 11 Oct 2021 17:22:49 +0200 Subject: [PATCH 194/235] r8152: select CRC32 and CRYPTO/CRYPTO_HASH/CRYPTO_SHA256 Fix the following build/link errors by adding a dependency on CRYPTO, CRYPTO_HASH, CRYPTO_SHA256 and CRC32: ld: drivers/net/usb/r8152.o: in function `rtl8152_fw_verify_checksum': r8152.c:(.text+0x2b2a): undefined reference to `crypto_alloc_shash' ld: r8152.c:(.text+0x2bed): undefined reference to `crypto_shash_digest' ld: r8152.c:(.text+0x2c50): undefined reference to `crypto_destroy_tfm' ld: drivers/net/usb/r8152.o: in function `_rtl8152_set_rx_mode': r8152.c:(.text+0xdcb0): undefined reference to `crc32_le' Fixes: 9370f2d05a2a1 ("r8152: support request_firmware for RTL8153") Fixes: ac718b69301c7 ("net/usb: new driver for RTL8152") Signed-off-by: Vegard Nossum Signed-off-by: David S. Miller --- drivers/net/usb/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 4c5d69732a7e..f87f17503373 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -99,6 +99,10 @@ config USB_RTL8150 config USB_RTL8152 tristate "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters" select MII + select CRC32 + select CRYPTO + select CRYPTO_HASH + select CRYPTO_SHA256 help This option adds support for Realtek RTL8152 based USB 2.0 10/100 Ethernet adapters and RTL8153 based USB 3.0 10/100/1000 From ef1100ef20f29aec4e62abeccdb5bdbebba1e378 Mon Sep 17 00:00:00 2001 From: Arun Ramadoss Date: Mon, 11 Oct 2021 21:18:08 +0530 Subject: [PATCH 195/235] net: dsa: microchip: Added the condition for scheduling ksz_mib_read_work When the ksz module is installed and removed using rmmod, kernel crashes with null pointer dereferrence error. During rmmod, ksz_switch_remove function tries to cancel the mib_read_workqueue using cancel_delayed_work_sync routine and unregister switch from dsa. During dsa_unregister_switch it calls ksz_mac_link_down, which in turn reschedules the workqueue since mib_interval is non-zero. Due to which queue executed after mib_interval and it tries to access dp->slave. But the slave is unregistered in the ksz_switch_remove function. Hence kernel crashes. To avoid this crash, before canceling the workqueue, resetted the mib_interval to 0. v1 -> v2: -Removed the if condition in ksz_mib_read_work Fixes: 469b390e1ba3 ("net: dsa: microchip: use delayed_work instead of timer + work") Signed-off-by: Arun Ramadoss Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 1542bfb8b5e5..7c2968a639eb 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -449,8 +449,10 @@ EXPORT_SYMBOL(ksz_switch_register); void ksz_switch_remove(struct ksz_device *dev) { /* timer started */ - if (dev->mib_read_interval) + if (dev->mib_read_interval) { + dev->mib_read_interval = 0; cancel_delayed_work_sync(&dev->mib_read); + } dev->dev_ops->exit(dev); dsa_unregister_switch(dev->ds); From 2bbc977ca689e5e18e8cf98f68854f92bd053c97 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 11 Oct 2021 20:04:11 +0200 Subject: [PATCH 196/235] ipv6: ioam: move the check for undefined bits The check for undefined bits in the trace type is moved from the input side to the output side, while the input side is relaxed and now inserts default empty values when an undefined bit is set. Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- net/ipv6/ioam6.c | 70 ++++++++++++++++++++++++++++++++++----- net/ipv6/ioam6_iptunnel.c | 6 +++- 2 files changed, 67 insertions(+), 9 deletions(-) diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index 5e8961004832..d128172bb549 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -770,6 +770,66 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, data += sizeof(__be32); } + /* bit12 undefined: filled with empty value */ + if (trace->type.bit12) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit13 undefined: filled with empty value */ + if (trace->type.bit13) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit14 undefined: filled with empty value */ + if (trace->type.bit14) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit15 undefined: filled with empty value */ + if (trace->type.bit15) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit16 undefined: filled with empty value */ + if (trace->type.bit16) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit17 undefined: filled with empty value */ + if (trace->type.bit17) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit18 undefined: filled with empty value */ + if (trace->type.bit18) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit19 undefined: filled with empty value */ + if (trace->type.bit19) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit20 undefined: filled with empty value */ + if (trace->type.bit20) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit21 undefined: filled with empty value */ + if (trace->type.bit21) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + /* opaque state snapshot */ if (trace->type.bit22) { if (!sc) { @@ -791,16 +851,10 @@ void ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_schema *sc; u8 sclen = 0; - /* Skip if Overflow flag is set OR - * if an unknown type (bit 12-21) is set + /* Skip if Overflow flag is set */ - if (trace->overflow || - trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | - trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | - trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | - trace->type.bit21) { + if (trace->overflow) return; - } /* NodeLen does not include Opaque State Snapshot length. We need to * take it into account if the corresponding bit is set (bit 22) and diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index f9ee04541c17..9b7b726f8f45 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -75,7 +75,11 @@ static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace) u32 fields; if (!trace->type_be32 || !trace->remlen || - trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4) + trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4 || + trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | + trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | + trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | + trace->type.bit21) return false; trace->nodelen = 0; From 7b1700e009cc17702e8db3af1d983860c0eb7164 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 11 Oct 2021 20:04:12 +0200 Subject: [PATCH 197/235] selftests: net: modify IOAM tests for undef bits The output behavior for undefined bits is now directly tested inside the bash script. Trying to set an undefined bit should be refused. The input behavior for undefined bits has been removed due to the fact that we would need another sender allowed to set undefined bits. Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- tools/testing/selftests/net/ioam6.sh | 24 ++- tools/testing/selftests/net/ioam6_parser.c | 164 ++++++++------------- 2 files changed, 80 insertions(+), 108 deletions(-) diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index 3caf72bb9c6a..a2489ec398fe 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -468,10 +468,26 @@ out_bits() for i in {0..22} do ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \ - prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0 + prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ + dev veth0 &>/dev/null - run_test "out_bit$i" "${desc//$i}" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 ${bit2type[$i]} 123 + local cmd_res=$? + local descr="${desc//$i}" + + if [[ $i -ge 12 && $i -le 21 ]] + then + if [ $cmd_res != 0 ] + then + npassed=$((npassed+1)) + log_test_passed "$descr" + else + nfailed=$((nfailed+1)) + log_test_failed "$descr" + fi + else + run_test "out_bit$i" "$descr" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 ${bit2type[$i]} 123 + fi done bit2size[22]=$tmp @@ -544,7 +560,7 @@ in_bits() local tmp=${bit2size[22]} bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) - for i in {0..22} + for i in {0..11} {22..22} do ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \ prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0 diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index d376cb2c383c..8f6997d35816 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -94,16 +94,6 @@ enum { TEST_OUT_BIT9, TEST_OUT_BIT10, TEST_OUT_BIT11, - TEST_OUT_BIT12, - TEST_OUT_BIT13, - TEST_OUT_BIT14, - TEST_OUT_BIT15, - TEST_OUT_BIT16, - TEST_OUT_BIT17, - TEST_OUT_BIT18, - TEST_OUT_BIT19, - TEST_OUT_BIT20, - TEST_OUT_BIT21, TEST_OUT_BIT22, TEST_OUT_FULL_SUPP_TRACE, @@ -125,16 +115,6 @@ enum { TEST_IN_BIT9, TEST_IN_BIT10, TEST_IN_BIT11, - TEST_IN_BIT12, - TEST_IN_BIT13, - TEST_IN_BIT14, - TEST_IN_BIT15, - TEST_IN_BIT16, - TEST_IN_BIT17, - TEST_IN_BIT18, - TEST_IN_BIT19, - TEST_IN_BIT20, - TEST_IN_BIT21, TEST_IN_BIT22, TEST_IN_FULL_SUPP_TRACE, @@ -199,30 +179,6 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, ioam6h->nodelen != 2 || ioam6h->remlen; - case TEST_OUT_BIT12: - case TEST_IN_BIT12: - case TEST_OUT_BIT13: - case TEST_IN_BIT13: - case TEST_OUT_BIT14: - case TEST_IN_BIT14: - case TEST_OUT_BIT15: - case TEST_IN_BIT15: - case TEST_OUT_BIT16: - case TEST_IN_BIT16: - case TEST_OUT_BIT17: - case TEST_IN_BIT17: - case TEST_OUT_BIT18: - case TEST_IN_BIT18: - case TEST_OUT_BIT19: - case TEST_IN_BIT19: - case TEST_OUT_BIT20: - case TEST_IN_BIT20: - case TEST_OUT_BIT21: - case TEST_IN_BIT21: - return ioam6h->overflow || - ioam6h->nodelen || - ioam6h->remlen != 1; - case TEST_OUT_BIT22: case TEST_IN_BIT22: return ioam6h->overflow || @@ -326,6 +282,66 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, *p += sizeof(__u32); } + if (ioam6h->type.bit12) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit13) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit14) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit15) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit16) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit17) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit18) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit19) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit20) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit21) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + if (ioam6h->type.bit22) { len = cnf.sc_data ? strlen(cnf.sc_data) : 0; aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0; @@ -455,26 +471,6 @@ static int str2id(const char *tname) return TEST_OUT_BIT10; if (!strcmp("out_bit11", tname)) return TEST_OUT_BIT11; - if (!strcmp("out_bit12", tname)) - return TEST_OUT_BIT12; - if (!strcmp("out_bit13", tname)) - return TEST_OUT_BIT13; - if (!strcmp("out_bit14", tname)) - return TEST_OUT_BIT14; - if (!strcmp("out_bit15", tname)) - return TEST_OUT_BIT15; - if (!strcmp("out_bit16", tname)) - return TEST_OUT_BIT16; - if (!strcmp("out_bit17", tname)) - return TEST_OUT_BIT17; - if (!strcmp("out_bit18", tname)) - return TEST_OUT_BIT18; - if (!strcmp("out_bit19", tname)) - return TEST_OUT_BIT19; - if (!strcmp("out_bit20", tname)) - return TEST_OUT_BIT20; - if (!strcmp("out_bit21", tname)) - return TEST_OUT_BIT21; if (!strcmp("out_bit22", tname)) return TEST_OUT_BIT22; if (!strcmp("out_full_supp_trace", tname)) @@ -509,26 +505,6 @@ static int str2id(const char *tname) return TEST_IN_BIT10; if (!strcmp("in_bit11", tname)) return TEST_IN_BIT11; - if (!strcmp("in_bit12", tname)) - return TEST_IN_BIT12; - if (!strcmp("in_bit13", tname)) - return TEST_IN_BIT13; - if (!strcmp("in_bit14", tname)) - return TEST_IN_BIT14; - if (!strcmp("in_bit15", tname)) - return TEST_IN_BIT15; - if (!strcmp("in_bit16", tname)) - return TEST_IN_BIT16; - if (!strcmp("in_bit17", tname)) - return TEST_IN_BIT17; - if (!strcmp("in_bit18", tname)) - return TEST_IN_BIT18; - if (!strcmp("in_bit19", tname)) - return TEST_IN_BIT19; - if (!strcmp("in_bit20", tname)) - return TEST_IN_BIT20; - if (!strcmp("in_bit21", tname)) - return TEST_IN_BIT21; if (!strcmp("in_bit22", tname)) return TEST_IN_BIT22; if (!strcmp("in_full_supp_trace", tname)) @@ -606,16 +582,6 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { [TEST_OUT_BIT9] = check_ioam_header_and_data, [TEST_OUT_BIT10] = check_ioam_header_and_data, [TEST_OUT_BIT11] = check_ioam_header_and_data, - [TEST_OUT_BIT12] = check_ioam_header, - [TEST_OUT_BIT13] = check_ioam_header, - [TEST_OUT_BIT14] = check_ioam_header, - [TEST_OUT_BIT15] = check_ioam_header, - [TEST_OUT_BIT16] = check_ioam_header, - [TEST_OUT_BIT17] = check_ioam_header, - [TEST_OUT_BIT18] = check_ioam_header, - [TEST_OUT_BIT19] = check_ioam_header, - [TEST_OUT_BIT20] = check_ioam_header, - [TEST_OUT_BIT21] = check_ioam_header, [TEST_OUT_BIT22] = check_ioam_header_and_data, [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data, [TEST_IN_UNDEF_NS] = check_ioam_header, @@ -633,16 +599,6 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { [TEST_IN_BIT9] = check_ioam_header_and_data, [TEST_IN_BIT10] = check_ioam_header_and_data, [TEST_IN_BIT11] = check_ioam_header_and_data, - [TEST_IN_BIT12] = check_ioam_header, - [TEST_IN_BIT13] = check_ioam_header, - [TEST_IN_BIT14] = check_ioam_header, - [TEST_IN_BIT15] = check_ioam_header, - [TEST_IN_BIT16] = check_ioam_header, - [TEST_IN_BIT17] = check_ioam_header, - [TEST_IN_BIT18] = check_ioam_header, - [TEST_IN_BIT19] = check_ioam_header, - [TEST_IN_BIT20] = check_ioam_header, - [TEST_IN_BIT21] = check_ioam_header, [TEST_IN_BIT22] = check_ioam_header_and_data, [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data, [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data, From 4d4a223a86afe658cd878800f09458e8bb54415d Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 11 Oct 2021 13:48:06 -0700 Subject: [PATCH 198/235] ice: fix locking for Tx timestamp tracking flush Commit 4dd0d5c33c3e ("ice: add lock around Tx timestamp tracker flush") added a lock around the Tx timestamp tracker flow which is used to cleanup any left over SKBs and prepare for device removal. This lock is problematic because it is being held around a call to ice_clear_phy_tstamp. The clear function takes a mutex to send a PHY write command to firmware. This could lead to a deadlock if the mutex actually sleeps, and causes the following warning on a kernel with preemption debugging enabled: [ 715.419426] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:573 [ 715.427900] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 3100, name: rmmod [ 715.435652] INFO: lockdep is turned off. [ 715.439591] Preemption disabled at: [ 715.439594] [<0000000000000000>] 0x0 [ 715.446678] CPU: 52 PID: 3100 Comm: rmmod Tainted: G W OE 5.15.0-rc4+ #42 bdd7ec3018e725f159ca0d372ce8c2c0e784891c [ 715.458058] Hardware name: Intel Corporation S2600STQ/S2600STQ, BIOS SE5C620.86B.02.01.0010.010620200716 01/06/2020 [ 715.468483] Call Trace: [ 715.470940] dump_stack_lvl+0x6a/0x9a [ 715.474613] ___might_sleep.cold+0x224/0x26a [ 715.478895] __mutex_lock+0xb3/0x1440 [ 715.482569] ? stack_depot_save+0x378/0x500 [ 715.486763] ? ice_sq_send_cmd+0x78/0x14c0 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.494979] ? kfree+0xc1/0x520 [ 715.498128] ? mutex_lock_io_nested+0x12a0/0x12a0 [ 715.502837] ? kasan_set_free_info+0x20/0x30 [ 715.507110] ? __kasan_slab_free+0x10b/0x140 [ 715.511385] ? slab_free_freelist_hook+0xc7/0x220 [ 715.516092] ? kfree+0xc1/0x520 [ 715.519235] ? ice_deinit_lag+0x16c/0x220 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.527359] ? ice_remove+0x1cf/0x6a0 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.535133] ? pci_device_remove+0xab/0x1d0 [ 715.539318] ? __device_release_driver+0x35b/0x690 [ 715.544110] ? driver_detach+0x214/0x2f0 [ 715.548035] ? bus_remove_driver+0x11d/0x2f0 [ 715.552309] ? pci_unregister_driver+0x26/0x250 [ 715.556840] ? ice_module_exit+0xc/0x2f [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.564799] ? __do_sys_delete_module.constprop.0+0x2d8/0x4e0 [ 715.570554] ? do_syscall_64+0x3b/0x90 [ 715.574303] ? entry_SYSCALL_64_after_hwframe+0x44/0xae [ 715.579529] ? start_flush_work+0x542/0x8f0 [ 715.583719] ? ice_sq_send_cmd+0x78/0x14c0 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.591923] ice_sq_send_cmd+0x78/0x14c0 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.599960] ? wait_for_completion_io+0x250/0x250 [ 715.604662] ? lock_acquire+0x196/0x200 [ 715.608504] ? do_raw_spin_trylock+0xa5/0x160 [ 715.612864] ice_sbq_rw_reg+0x1e6/0x2f0 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.620813] ? ice_reset+0x130/0x130 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.628497] ? __debug_check_no_obj_freed+0x1e8/0x3c0 [ 715.633550] ? trace_hardirqs_on+0x1c/0x130 [ 715.637748] ice_write_phy_reg_e810+0x70/0xf0 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.646220] ? do_raw_spin_trylock+0xa5/0x160 [ 715.650581] ? ice_ptp_release+0x910/0x910 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.658797] ? ice_ptp_release+0x255/0x910 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.667013] ice_clear_phy_tstamp+0x2c/0x110 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.675403] ice_ptp_release+0x408/0x910 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.683440] ice_remove+0x560/0x6a0 [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.691037] ? _raw_spin_unlock_irqrestore+0x46/0x73 [ 715.696005] pci_device_remove+0xab/0x1d0 [ 715.700018] __device_release_driver+0x35b/0x690 [ 715.704637] driver_detach+0x214/0x2f0 [ 715.708389] bus_remove_driver+0x11d/0x2f0 [ 715.712489] pci_unregister_driver+0x26/0x250 [ 715.716857] ice_module_exit+0xc/0x2f [ice 9a7e1ec00971c89ecd3fe0d4dc7da2b3786a421d] [ 715.724637] __do_sys_delete_module.constprop.0+0x2d8/0x4e0 [ 715.730210] ? free_module+0x6d0/0x6d0 [ 715.733963] ? task_work_run+0xe1/0x170 [ 715.737803] ? exit_to_user_mode_loop+0x17f/0x1d0 [ 715.742509] ? rcu_read_lock_sched_held+0x12/0x80 [ 715.747215] ? trace_hardirqs_on+0x1c/0x130 [ 715.751401] do_syscall_64+0x3b/0x90 [ 715.754981] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 715.760033] RIP: 0033:0x7f4dfe59000b [ 715.763612] Code: 73 01 c3 48 8b 0d 6d 1e 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 3d 1e 0c 00 f7 d8 64 89 01 48 [ 715.782357] RSP: 002b:00007ffe8c891708 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [ 715.789923] RAX: ffffffffffffffda RBX: 00005558a20468b0 RCX: 00007f4dfe59000b [ 715.797054] RDX: 000000000000000a RSI: 0000000000000800 RDI: 00005558a2046918 [ 715.804189] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 [ 715.811319] R10: 00007f4dfe603ac0 R11: 0000000000000206 R12: 00007ffe8c891940 [ 715.818455] R13: 00007ffe8c8920a3 R14: 00005558a20462a0 R15: 00005558a20468b0 Notice that this is the only case where we use the lock in this way. In the cleanup kthread and work kthread the lock is only taken around the bit accesses. This was done intentionally to avoid this kind of issue. The way the lock is used, we only protect ordering of bit sets vs bit clears. The Tx writers in the hot path don't need to be protected against the entire kthread loop. The Tx queues threads only need to ensure that they do not re-use an index that is currently in use. The cleanup loop does not need to block all new set bits, since it will re-queue itself if new timestamps are present. Fix the tracker flow so that it uses the same flow as the standard cleanup thread. In addition, ensure the in_use bitmap actually gets cleared properly. This fixes the warning and also avoids the potential deadlock that might have occurred otherwise. Fixes: 4dd0d5c33c3e ("ice: add lock around Tx timestamp tracker flush") Signed-off-by: Jacob Keller Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_ptp.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 05cc5870e4ef..80380aed8882 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1313,22 +1313,21 @@ ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) { u8 idx; - spin_lock(&tx->lock); - for (idx = 0; idx < tx->len; idx++) { u8 phy_idx = idx + tx->quad_offset; - /* Clear any potential residual timestamp in the PHY block */ - if (!pf->hw.reset_ongoing) - ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx); - + spin_lock(&tx->lock); if (tx->tstamps[idx].skb) { dev_kfree_skb_any(tx->tstamps[idx].skb); tx->tstamps[idx].skb = NULL; } - } + clear_bit(idx, tx->in_use); + spin_unlock(&tx->lock); - spin_unlock(&tx->lock); + /* Clear any potential residual timestamp in the PHY block */ + if (!pf->hw.reset_ongoing) + ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx); + } } /** From a3fd1a986e499a06ac5ef95c3a39aa4611e7444c Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 12 Oct 2021 19:47:48 +0800 Subject: [PATCH 199/235] ALSA: hda/realtek: Fix the mic type detection issue for ASUS G551JW We need to define the codec pin 0x1b to be the mic, but somehow the mic doesn't support hot plugging detection, and Windows also has this issue, so we set it to phantom headset-mic. Also the determine_headset_type() often returns the omtp type by a mistake when we plug a ctia headset, this makes the mic can't record sound at all. Because most of the headset are ctia type nowadays and some machines have the fixed ctia type audio jack, it is possible this machine has the fixed ctia jack too. Here we set this mic jack to fixed ctia type, this could avoid the mic type detection mistake and make the ctia headset work stable. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=214537 Reported-and-tested-by: msd Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20211012114748.5238-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bca5830ff706..22d27b12c4e7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10197,6 +10197,9 @@ enum { ALC671_FIXUP_HP_HEADSET_MIC2, ALC662_FIXUP_ACER_X2660G_HEADSET_MODE, ALC662_FIXUP_ACER_NITRO_HEADSET_MODE, + ALC668_FIXUP_ASUS_NO_HEADSET_MIC, + ALC668_FIXUP_HEADSET_MIC, + ALC668_FIXUP_MIC_DET_COEF, }; static const struct hda_fixup alc662_fixups[] = { @@ -10580,6 +10583,29 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC662_FIXUP_USI_FUNC }, + [ALC668_FIXUP_ASUS_NO_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x04a1112c }, + { } + }, + .chained = true, + .chain_id = ALC668_FIXUP_HEADSET_MIC + }, + [ALC668_FIXUP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_headset_mic, + .chained = true, + .chain_id = ALC668_FIXUP_MIC_DET_COEF + }, + [ALC668_FIXUP_MIC_DET_COEF] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0x15 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0d60 }, + {} + }, + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -10615,6 +10641,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51), SND_PCI_QUIRK(0x1043, 0x17bd, "ASUS N751", ALC668_FIXUP_ASUS_Nx51), + SND_PCI_QUIRK(0x1043, 0x185d, "ASUS G551JW", ALC668_FIXUP_ASUS_NO_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1963, "ASUS X71SL", ALC662_FIXUP_ASUS_MODE8), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), From b37a15188eae9d4c49c5bb035e0c8d4058e4d9b3 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Tue, 12 Oct 2021 17:29:35 +0300 Subject: [PATCH 200/235] ALSA: hda: avoid write to STATESTS if controller is in reset The snd_hdac_bus_reset_link() contains logic to clear STATESTS register before performing controller reset. This code dates back to an old bugfix in commit e8a7f136f5ed ("[ALSA] hda-intel - Improve HD-audio codec probing robustness"). Originally the code was added to azx_reset(). The code was moved around in commit a41d122449be ("ALSA: hda - Embed bus into controller object") and ended up to snd_hdac_bus_reset_link() and called primarily via snd_hdac_bus_init_chip(). The logic to clear STATESTS is correct when snd_hdac_bus_init_chip() is called when controller is not in reset. In this case, STATESTS can be cleared. This can be useful e.g. when forcing a controller reset to retry codec probe. A normal non-power-on reset will not clear the bits. However, this old logic is problematic when controller is already in reset. The HDA specification states that controller must be taken out of reset before writing to registers other than GCTL.CRST (1.0a spec, 3.3.7). The write to STATESTS in snd_hdac_bus_reset_link() will be lost if the controller is already in reset per the HDA specification mentioned. This has been harmless on older hardware. On newer generation of Intel PCIe based HDA controllers, if configured to report issues, this write will emit an unsupported request error. If ACPI Platform Error Interface (APEI) is enabled in kernel, this will end up to kernel log. Fix the code in snd_hdac_bus_reset_link() to only clear the STATESTS if the function is called when controller is not in reset. Otherwise clearing the bits is not possible and should be skipped. Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20211012142935.3731820-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/hdac_controller.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 062da7a7a586..f7bd6e2db085 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -421,8 +421,9 @@ int snd_hdac_bus_reset_link(struct hdac_bus *bus, bool full_reset) if (!full_reset) goto skip_reset; - /* clear STATESTS */ - snd_hdac_chip_writew(bus, STATESTS, STATESTS_INT_MASK); + /* clear STATESTS if not in reset */ + if (snd_hdac_chip_readb(bus, GCTL) & AZX_GCTL_RESET) + snd_hdac_chip_writew(bus, STATESTS, STATESTS_INT_MASK); /* reset controller */ snd_hdac_bus_enter_link_reset(bus); From 596143e3aec35c93508d6b7a05ddc999ee209b61 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Mon, 23 Aug 2021 17:25:26 +0800 Subject: [PATCH 201/235] acpi/arm64: fix next_platform_timer() section mismatch error Fix modpost Section mismatch error in next_platform_timer(). [...] WARNING: modpost: vmlinux.o(.text.unlikely+0x26e60): Section mismatch in reference from the function next_platform_timer() to the variable .init.data:acpi_gtdt_desc The function next_platform_timer() references the variable __initdata acpi_gtdt_desc. This is often because next_platform_timer lacks a __initdata annotation or the annotation of acpi_gtdt_desc is wrong. WARNING: modpost: vmlinux.o(.text.unlikely+0x26e64): Section mismatch in reference from the function next_platform_timer() to the variable .init.data:acpi_gtdt_desc The function next_platform_timer() references the variable __initdata acpi_gtdt_desc. This is often because next_platform_timer lacks a __initdata annotation or the annotation of acpi_gtdt_desc is wrong. ERROR: modpost: Section mismatches detected. Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them. make[1]: *** [scripts/Makefile.modpost:59: vmlinux.symvers] Error 1 make[1]: *** Deleting file 'vmlinux.symvers' make: *** [Makefile:1176: vmlinux] Error 2 [...] Fixes: a712c3ed9b8a ("acpi/arm64: Add memory-mapped timer support in GTDT driver") Signed-off-by: Jackie Liu Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20210823092526.2407526-1-liu.yun@linux.dev Signed-off-by: Catalin Marinas --- drivers/acpi/arm64/gtdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c index 0a0a982f9c28..c0e77c1c8e09 100644 --- a/drivers/acpi/arm64/gtdt.c +++ b/drivers/acpi/arm64/gtdt.c @@ -36,7 +36,7 @@ struct acpi_gtdt_descriptor { static struct acpi_gtdt_descriptor acpi_gtdt_desc __initdata; -static inline void *next_platform_timer(void *platform_timer) +static inline __init void *next_platform_timer(void *platform_timer) { struct acpi_gtdt_header *gh = platform_timer; From 48827e1d6af58f219e89c7ec08dccbca28c7694e Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Tue, 12 Oct 2021 22:09:07 +0200 Subject: [PATCH 202/235] ALSA: usb-audio: Add quirk for VF0770 The device advertises 8 formats, but only a rate of 48kHz is honored by the hardware and 24 bits give chopped audio, so only report the one working combination. This fixes out-of-the-box audio experience with PipeWire which otherwise attempts to choose S24_3LE (while PulseAudio defaulted to S16_LE). Signed-off-by: Jonas Hahnfeld Cc: Link: https://lore.kernel.org/r/20211012200906.3492-1-hahnjo@hahnjo.de Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index e03043f7dad3..de18fff69280 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -77,6 +77,48 @@ /* E-Mu 0204 USB */ { USB_DEVICE_VENDOR_SPEC(0x041e, 0x3f19) }, +/* + * Creative Technology, Ltd Live! Cam Sync HD [VF0770] + * The device advertises 8 formats, but only a rate of 48kHz is honored by the + * hardware and 24 bits give chopped audio, so only report the one working + * combination. + */ +{ + USB_DEVICE(0x041e, 0x4095), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_MIXER, + }, + { + .ifnum = 3, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels = 2, + .fmt_bits = 16, + .iface = 3, + .altsetting = 4, + .altset_idx = 4, + .endpoint = 0x82, + .ep_attr = 0x05, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 48000 }, + }, + }, + { + .ifnum = -1 + }, + }, + }, +}, + /* * HP Wireless Audio * When not ignored, causes instability issues for some users, forcing them to From 2266bb1e122a4f7cdf3427defcfb48e5c6a8f205 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 13 Sep 2021 15:48:11 +0300 Subject: [PATCH 203/235] net/mlx5: Fix cleanup of bridge delayed work Currently, bridge cleanup is calling to cancel_delayed_work(). When this function is finished, there is a chance that the delayed work is still running. Also, the delayed work is queueing itself. As a result, we might execute the delayed work after the bridge cleanup have finished and hit a null-ptr oops[1]. Fix it by using cancel_delayed_work_sync(), which is waiting until the work is done and will cancel the queue work. [1] [ 8202.143043 ] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 8202.144438 ] #PF: supervisor write access in kernel mode [ 8202.145476 ] #PF: error_code(0x0002) - not-present page [ 8202.146520 ] PGD 0 P4D 0 [ 8202.147126 ] Oops: 0002 [#1] SMP NOPTI [ 8202.147899 ] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.14.0-rc6_for_upstream_min_debug_2021_08_25_16_06 #1 [ 8202.149741 ] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 8202.151908 ] RIP: 0010:_raw_spin_lock+0xc/0x20 [ 8202.156234 ] RSP: 0018:ffff88846f885ea0 EFLAGS: 00010046 [ 8202.157289 ] RAX: 0000000000000000 RBX: ffff88846f880000 RCX: 0000000000000000 [ 8202.158731 ] RDX: 0000000000000001 RSI: ffff8881004000c8 RDI: 0000000000000000 [ 8202.160177 ] RBP: ffff8881fe684978 R08: ffff888100140000 R09: ffffffff824455b8 [ 8202.161569 ] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001 [ 8202.163004 ] R13: 0000000000000012 R14: 0000000000000200 R15: ffff88812992d000 [ 8202.164018 ] FS: 0000000000000000(0000) GS:ffff88846f880000(0000) knlGS:0000000000000000 [ 8202.164960 ] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8202.165634 ] CR2: 0000000000000000 CR3: 0000000108cac004 CR4: 0000000000370ea0 [ 8202.166450 ] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 8202.167807 ] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 8202.168852 ] Call Trace: [ 8202.169421 ] [ 8202.169792 ] __queue_work+0xf2/0x3d0 [ 8202.170481 ] ? queue_work_node+0x40/0x40 [ 8202.171270 ] call_timer_fn+0x2b/0x100 [ 8202.171932 ] __run_timers.part.0+0x152/0x220 [ 8202.172717 ] ? __hrtimer_run_queues+0x171/0x290 [ 8202.173526 ] ? kvm_clock_get_cycles+0xd/0x10 [ 8202.174232 ] ? ktime_get+0x35/0x90 [ 8202.174943 ] run_timer_softirq+0x26/0x50 [ 8202.175745 ] __do_softirq+0xc7/0x271 [ 8202.176373 ] irq_exit_rcu+0x93/0xb0 [ 8202.176983 ] sysvec_apic_timer_interrupt+0x72/0x90 [ 8202.177755 ] [ 8202.178245 ] asm_sysvec_apic_timer_interrupt+0x12/0x20 Fixes: c636a0f0f3f0 ("net/mlx5: Bridge, dynamic entry ageing") Signed-off-by: Shay Drory Reviewed-by: Vlad Buslov Reviewed-by: Leon Romanovsky Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index b5ddaa82755f..c6d2f8c78db7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -475,9 +475,6 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) esw_warn(mdev, "Failed to allocate bridge offloads workqueue\n"); goto err_alloc_wq; } - INIT_DELAYED_WORK(&br_offloads->update_work, mlx5_esw_bridge_update_work); - queue_delayed_work(br_offloads->wq, &br_offloads->update_work, - msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL)); br_offloads->nb.notifier_call = mlx5_esw_bridge_switchdev_event; err = register_switchdev_notifier(&br_offloads->nb); @@ -500,6 +497,9 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) err); goto err_register_netdev; } + INIT_DELAYED_WORK(&br_offloads->update_work, mlx5_esw_bridge_update_work); + queue_delayed_work(br_offloads->wq, &br_offloads->update_work, + msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL)); return; err_register_netdev: @@ -523,10 +523,10 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) if (!br_offloads) return; + cancel_delayed_work_sync(&br_offloads->update_work); unregister_netdevice_notifier(&br_offloads->netdev_nb); unregister_switchdev_blocking_notifier(&br_offloads->nb_blk); unregister_switchdev_notifier(&br_offloads->nb); - cancel_delayed_work(&br_offloads->update_work); destroy_workqueue(br_offloads->wq); rtnl_lock(); mlx5_esw_bridge_cleanup(esw); From ca20dfda05ae0531c8f5117b6ac989816f6cf658 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 4 Oct 2021 15:04:25 +0300 Subject: [PATCH 204/235] net/mlx5e: Allow only complete TXQs partition in MQPRIO channel mode Do not allow configurations of MQPRIO channel mode that do not fully define and utilize the channels txqs. Fixes: ec60c4581bd9 ("net/mlx5e: Support MQPRIO channel mode") Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0c5197f9cea3..336aa07313da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2981,8 +2981,8 @@ static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv, agg_count += mqprio->qopt.count[i]; } - if (priv->channels.params.num_channels < agg_count) { - netdev_err(netdev, "Num of queues (%d) exceeds available (%d)\n", + if (priv->channels.params.num_channels != agg_count) { + netdev_err(netdev, "Num of queues (%d) does not match available (%d)\n", agg_count, priv->channels.params.num_channels); return -EINVAL; } From 94b960b9deffc02fc0747afc01f72cc62ab099e3 Mon Sep 17 00:00:00 2001 From: Valentine Fatiev Date: Sun, 15 Aug 2021 17:43:19 +0300 Subject: [PATCH 205/235] net/mlx5e: Fix memory leak in mlx5_core_destroy_cq() error path Prior to this patch in case mlx5_core_destroy_cq() failed it returns without completing all destroy operations and that leads to memory leak. Instead, complete the destroy flow before return error. Also move mlx5_debug_cq_remove() to the beginning of mlx5_core_destroy_cq() to be symmetrical with mlx5_core_create_cq(). kmemleak complains on: unreferenced object 0xc000000038625100 (size 64): comm "ethtool", pid 28301, jiffies 4298062946 (age 785.380s) hex dump (first 32 bytes): 60 01 48 94 00 00 00 c0 b8 05 34 c3 00 00 00 c0 `.H.......4..... 02 00 00 00 00 00 00 00 00 db 7d c1 00 00 00 c0 ..........}..... backtrace: [<000000009e8643cb>] add_res_tree+0xd0/0x270 [mlx5_core] [<00000000e7cb8e6c>] mlx5_debug_cq_add+0x5c/0xc0 [mlx5_core] [<000000002a12918f>] mlx5_core_create_cq+0x1d0/0x2d0 [mlx5_core] [<00000000cef0a696>] mlx5e_create_cq+0x210/0x3f0 [mlx5_core] [<000000009c642c26>] mlx5e_open_cq+0xb4/0x130 [mlx5_core] [<0000000058dfa578>] mlx5e_ptp_open+0x7f4/0xe10 [mlx5_core] [<0000000081839561>] mlx5e_open_channels+0x9cc/0x13e0 [mlx5_core] [<0000000009cf05d4>] mlx5e_switch_priv_channels+0xa4/0x230 [mlx5_core] [<0000000042bbedd8>] mlx5e_safe_switch_params+0x14c/0x300 [mlx5_core] [<0000000004bc9db8>] set_pflag_tx_port_ts+0x9c/0x160 [mlx5_core] [<00000000a0553443>] mlx5e_set_priv_flags+0xd0/0x1b0 [mlx5_core] [<00000000a8f3d84b>] ethnl_set_privflags+0x234/0x2d0 [<00000000fd27f27c>] genl_family_rcv_msg_doit+0x108/0x1d0 [<00000000f495e2bb>] genl_family_rcv_msg+0xe4/0x1f0 [<00000000646c5c2c>] genl_rcv_msg+0x78/0x120 [<00000000d53e384e>] netlink_rcv_skb+0x74/0x1a0 Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Valentine Fatiev Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index cf97985628ab..02e77ffe5c3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -155,6 +155,8 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {}; int err; + mlx5_debug_cq_remove(dev, cq); + mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); mlx5_eq_del_cq(&cq->eq->core, cq); @@ -162,16 +164,13 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); MLX5_SET(destroy_cq_in, in, uid, cq->uid); err = mlx5_cmd_exec_in(dev, destroy_cq, in); - if (err) - return err; synchronize_irq(cq->irqn); - mlx5_debug_cq_remove(dev, cq); mlx5_cq_put(cq); wait_for_completion(&cq->free); - return 0; + return err; } EXPORT_SYMBOL(mlx5_core_destroy_cq); From b2107cdc43d8601f2cadfba990ae844cc1f44e68 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 4 Oct 2021 21:20:25 -0700 Subject: [PATCH 206/235] net/mlx5e: Switchdev representors are not vlan challenged Before this patch, mlx5 representors advertised the NETIF_F_VLAN_CHALLENGED bit, this could lead to missing features when using reps with vxlan/bridge and maybe other virtual interfaces, when such interfaces inherit this bit and block vlan usage in their topology. Example: $ip link add dev bridge type bridge # add representor interface to the bridge $ip link set dev pf0hpf master $ip link add link bridge name vlan10 type vlan id 10 protocol 802.1q Error: 8021q: VLANs not supported on device. Reps are perfectly capable of handling vlan traffic, although they don't implement vlan_{add,kill}_vid ndos, hence, remove NETIF_F_VLAN_CHALLENGED advertisement. Fixes: cb67b832921c ("net/mlx5e: Introduce SRIOV VF representors") Reported-by: Roopa Prabhu Signed-off-by: Saeed Mahameed Reviewed-by: Roi Dayan --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3dd1101cc693..0439203fc7d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -643,7 +643,6 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev, netdev->hw_features |= NETIF_F_RXCSUM; netdev->features |= netdev->hw_features; - netdev->features |= NETIF_F_VLAN_CHALLENGED; netdev->features |= NETIF_F_NETNS_LOCAL; } From 0bc73ad46a76ed6ece4dcacb28858e7b38561e1c Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 26 Sep 2021 17:55:41 +0300 Subject: [PATCH 207/235] net/mlx5e: Mutually exclude RX-FCS and RX-port-timestamp Due to current HW arch limitations, RX-FCS (scattering FCS frame field to software) and RX-port-timestamp (improved timestamp accuracy on the receive side) can't work together. RX-port-timestamp is not controlled by the user and it is enabled by default when supported by the HW/FW. This patch sets RX-port-timestamp opposite to RX-FCS configuration. Fixes: 102722fc6832 ("net/mlx5e: Add support for RXFCS feature flag") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 57 +++++++++++++++++-- include/linux/mlx5/mlx5_ifc.h | 10 +++- 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 336aa07313da..09c8b71b186c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3325,20 +3325,67 @@ static int set_feature_rx_all(struct net_device *netdev, bool enable) return mlx5_set_port_fcs(mdev, !enable); } +static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {}; + bool supported, curr_state; + int err; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return 0; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + + supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap); + curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc); + + if (!supported || enable == curr_state) + return 0; + + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable); + + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + static int set_feature_rx_fcs(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels *chs = &priv->channels; + struct mlx5_core_dev *mdev = priv->mdev; int err; mutex_lock(&priv->state_lock); - priv->channels.params.scatter_fcs_en = enable; - err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable); - if (err) - priv->channels.params.scatter_fcs_en = !enable; + if (enable) { + err = mlx5e_set_rx_port_ts(mdev, false); + if (err) + goto out; + chs->params.scatter_fcs_en = true; + err = mlx5e_modify_channels_scatter_fcs(chs, true); + if (err) { + chs->params.scatter_fcs_en = false; + mlx5e_set_rx_port_ts(mdev, true); + } + } else { + chs->params.scatter_fcs_en = false; + err = mlx5e_modify_channels_scatter_fcs(chs, false); + if (err) { + chs->params.scatter_fcs_en = true; + goto out; + } + err = mlx5e_set_rx_port_ts(mdev, true); + if (err) { + mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err); + err = 0; + } + } + +out: mutex_unlock(&priv->state_lock); - return err; } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f3638d09ba77..993204a6c1a1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9475,16 +9475,22 @@ struct mlx5_ifc_pcmr_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; u8 reserved_at_10[0x10]; + u8 entropy_force_cap[0x1]; u8 entropy_calc_cap[0x1]; u8 entropy_gre_calc_cap[0x1]; - u8 reserved_at_23[0x1b]; + u8 reserved_at_23[0xf]; + u8 rx_ts_over_crc_cap[0x1]; + u8 reserved_at_33[0xb]; u8 fcs_cap[0x1]; u8 reserved_at_3f[0x1]; + u8 entropy_force[0x1]; u8 entropy_calc[0x1]; u8 entropy_gre_calc[0x1]; - u8 reserved_at_43[0x1b]; + u8 reserved_at_43[0xf]; + u8 rx_ts_over_crc[0x1]; + u8 reserved_at_53[0xb]; u8 fcs_chk[0x1]; u8 reserved_at_5f[0x1]; }; From 84c8a87402cf073ba7948dd62d4815a3f4a224c8 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 11 Oct 2021 18:39:35 +0300 Subject: [PATCH 208/235] net/mlx5e: Fix division by 0 in mlx5e_select_queue for representors Commit 846d6da1fcdb ("net/mlx5e: Fix division by 0 in mlx5e_select_queue") makes mlx5e_build_nic_params assign a non-zero initial value to priv->num_tc_x_num_ch, so that mlx5e_select_queue doesn't fail with division by 0 if called before the first activation of channels. However, the initialization flow of representors doesn't call mlx5e_build_nic_params, so this bug can still happen with representors. This commit fixes the bug by adding the missing assignment to mlx5e_build_rep_params. Fixes: 846d6da1fcdb ("net/mlx5e: Fix division by 0 in mlx5e_select_queue") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 0439203fc7d9..0684ac6699b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -618,6 +618,11 @@ static void mlx5e_build_rep_params(struct net_device *netdev) params->mqprio.num_tc = 1; params->tunneled_offload_en = false; + /* Set an initial non-zero value, so that mlx5e_select_queue won't + * divide by zero if called before first activating channels. + */ + priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc; + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); } From 60d950f443a52d950126ad664fbd4a1eb8353dc9 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Tue, 12 Oct 2021 14:48:50 +0200 Subject: [PATCH 209/235] nfp: flow_offload: move flow_indr_dev_register from app init to app start In commit 74fc4f828769 ("net: Fix offloading indirect devices dependency on qdisc order creation"), it adds a process to trigger the callback to setup the bo callback when the driver regists a callback. In our current implement, we are not ready to run the callback when nfp call the function flow_indr_dev_register, then there will be error message as: kernel: Oops: 0000 [#1] SMP PTI kernel: CPU: 0 PID: 14119 Comm: kworker/0:0 Tainted: G kernel: Workqueue: events work_for_cpu_fn kernel: RIP: 0010:nfp_flower_indr_setup_tc_cb+0x258/0x410 kernel: RSP: 0018:ffffbc1e02c57bf8 EFLAGS: 00010286 kernel: RAX: 0000000000000000 RBX: ffff9c761fabc000 RCX: 0000000000000001 kernel: RDX: 0000000000000001 RSI: fffffffffffffff0 RDI: ffffffffc0be9ef1 kernel: RBP: ffffbc1e02c57c58 R08: ffffffffc08f33aa R09: ffff9c6db7478800 kernel: R10: 0000009c003f6e00 R11: ffffbc1e02800000 R12: ffffbc1e000d9000 kernel: R13: ffffbc1e000db428 R14: ffff9c6db7478800 R15: ffff9c761e884e80 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: fffffffffffffff0 CR3: 00000009e260a004 CR4: 00000000007706f0 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 kernel: PKRU: 55555554 kernel: Call Trace: kernel: ? flow_indr_dev_register+0xab/0x210 kernel: ? __cond_resched+0x15/0x30 kernel: ? kmem_cache_alloc_trace+0x44/0x4b0 kernel: ? nfp_flower_setup_tc+0x1d0/0x1d0 [nfp] kernel: flow_indr_dev_register+0x158/0x210 kernel: ? tcf_block_unbind+0xe0/0xe0 kernel: nfp_flower_init+0x40b/0x650 [nfp] kernel: nfp_net_pci_probe+0x25f/0x960 [nfp] kernel: ? nfp_rtsym_read_le+0x76/0x130 [nfp] kernel: nfp_pci_probe+0x6a9/0x820 [nfp] kernel: local_pci_probe+0x45/0x80 So we need to call flow_indr_dev_register in app start process instead of init stage. Fixes: 74fc4f828769 ("net: Fix offloading indirect devices dependency on qdisc order creation") Signed-off-by: Baowen Zheng Signed-off-by: Simon Horman Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20211012124850.13025-1-louis.peens@corigine.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/netronome/nfp/flower/main.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index c029950a81e2..ac1dcfa1d179 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -830,10 +830,6 @@ static int nfp_flower_init(struct nfp_app *app) if (err) goto err_cleanup; - err = flow_indr_dev_register(nfp_flower_indr_setup_tc_cb, app); - if (err) - goto err_cleanup; - if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM) nfp_flower_qos_init(app); @@ -942,7 +938,20 @@ static int nfp_flower_start(struct nfp_app *app) return err; } - return nfp_tunnel_config_start(app); + err = flow_indr_dev_register(nfp_flower_indr_setup_tc_cb, app); + if (err) + return err; + + err = nfp_tunnel_config_start(app); + if (err) + goto err_tunnel_config; + + return 0; + +err_tunnel_config: + flow_indr_dev_unregister(nfp_flower_indr_setup_tc_cb, app, + nfp_flower_setup_indr_tc_release); + return err; } static void nfp_flower_stop(struct nfp_app *app) From 43a4b4dbd48c9006ef64df3a12acf33bdfe11c61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Tue, 12 Oct 2021 13:27:31 +0200 Subject: [PATCH 210/235] net: dsa: fix spurious error message when unoffloaded port leaves bridge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flip the sign of a return value check, thereby suppressing the following spurious error: port 2 failed to notify DSA_NOTIFIER_BRIDGE_LEAVE: -EOPNOTSUPP ... which is emitted when removing an unoffloaded DSA switch port from a bridge. Fixes: d371b7c92d19 ("net: dsa: Unset vlan_filtering when ports leave the bridge") Signed-off-by: Alvin Šipraga Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20211012112730.3429157-1-alvin@pqrs.dk Signed-off-by: Jakub Kicinski --- net/dsa/switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 1c797ec8e2c2..6466d0539af9 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -168,7 +168,7 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, if (extack._msg) dev_err(ds->dev, "port %d: %s\n", info->port, extack._msg); - if (err && err != EOPNOTSUPP) + if (err && err != -EOPNOTSUPP) return err; } From 28da0555c3b542d605e4ca26eea6a740cf2c9174 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 22 Sep 2021 17:37:25 +0300 Subject: [PATCH 211/235] net: dsa: move sja1110_process_meta_tstamp inside the tagging protocol driver The problem is that DSA tagging protocols really must not depend on the switch driver, because this creates a circular dependency at insmod time, and the switch driver will effectively not load when the tagging protocol driver is missing. The code was structured in the way it was for a reason, though. The DSA driver-facing API for PTP timestamping relies on the assumption that two-step TX timestamps are provided by the hardware in an out-of-band manner, typically by raising an interrupt and making that timestamp available inside some sort of FIFO which is to be accessed over SPI/MDIO/etc. So the API puts .port_txtstamp into dsa_switch_ops, because it is expected that the switch driver needs to save some state (like put the skb into a queue until its TX timestamp arrives). On SJA1110, TX timestamps are provided by the switch as Ethernet packets, so this makes them be received and processed by the tagging protocol driver. This in itself is great, because the timestamps are full 64-bit and do not require reconstruction, and since Ethernet is the fastest I/O method available to/from the switch, PTP timestamps arrive very quickly, no matter how bottlenecked the SPI connection is, because SPI interaction is not needed at all. DSA's code structure and strict isolation between the tagging protocol driver and the switch driver break the natural code organization. When the tagging protocol driver receives a packet which is classified as a metadata packet containing timestamps, it passes those timestamps one by one to the switch driver, which then proceeds to compare them based on the recorded timestamp ID that was generated in .port_txtstamp. The communication between the tagging protocol and the switch driver is done through a method exported by the switch driver, sja1110_process_meta_tstamp. To satisfy build requirements, we force a dependency to build the tagging protocol driver as a module when the switch driver is a module. However, as explained in the first paragraph, that causes the circular dependency. To solve this, move the skb queue from struct sja1105_private :: struct sja1105_ptp_data to struct sja1105_private :: struct sja1105_tagger_data. The latter is a data structure for which hacks have already been put into place to be able to create persistent storage per switch that is accessible from the tagging protocol driver (see sja1105_setup_ports). With the skb queue directly accessible from the tagging protocol driver, we can now move sja1110_process_meta_tstamp into the tagging driver itself, and avoid exporting a symbol. Fixes: 566b18c8b752 ("net: dsa: sja1105: implement TX timestamping for SJA1110") Link: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/ Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/sja1105/sja1105_ptp.c | 45 ++++----------------------- drivers/net/dsa/sja1105/sja1105_ptp.h | 19 ----------- include/linux/dsa/sja1105.h | 29 +++++++++-------- net/dsa/tag_sja1105.c | 43 +++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 73 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index 691f6dd7e669..54396992a919 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -64,6 +64,7 @@ enum sja1105_ptp_clk_mode { static int sja1105_change_rxtstamping(struct sja1105_private *priv, bool on) { + struct sja1105_tagger_data *tagger_data = &priv->tagger_data; struct sja1105_ptp_data *ptp_data = &priv->ptp_data; struct sja1105_general_params_entry *general_params; struct sja1105_table *table; @@ -79,7 +80,7 @@ static int sja1105_change_rxtstamping(struct sja1105_private *priv, priv->tagger_data.stampable_skb = NULL; } ptp_cancel_worker_sync(ptp_data->clock); - skb_queue_purge(&ptp_data->skb_txtstamp_queue); + skb_queue_purge(&tagger_data->skb_txtstamp_queue); skb_queue_purge(&ptp_data->skb_rxtstamp_queue); return sja1105_static_config_reload(priv, SJA1105_RX_HWTSTAMPING); @@ -452,40 +453,6 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port, return priv->info->rxtstamp(ds, port, skb); } -void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id, - enum sja1110_meta_tstamp dir, u64 tstamp) -{ - struct sja1105_private *priv = ds->priv; - struct sja1105_ptp_data *ptp_data = &priv->ptp_data; - struct sk_buff *skb, *skb_tmp, *skb_match = NULL; - struct skb_shared_hwtstamps shwt = {0}; - - /* We don't care about RX timestamps on the CPU port */ - if (dir == SJA1110_META_TSTAMP_RX) - return; - - spin_lock(&ptp_data->skb_txtstamp_queue.lock); - - skb_queue_walk_safe(&ptp_data->skb_txtstamp_queue, skb, skb_tmp) { - if (SJA1105_SKB_CB(skb)->ts_id != ts_id) - continue; - - __skb_unlink(skb, &ptp_data->skb_txtstamp_queue); - skb_match = skb; - - break; - } - - spin_unlock(&ptp_data->skb_txtstamp_queue.lock); - - if (WARN_ON(!skb_match)) - return; - - shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp)); - skb_complete_tx_timestamp(skb_match, &shwt); -} -EXPORT_SYMBOL_GPL(sja1110_process_meta_tstamp); - /* In addition to cloning the skb which is done by the common * sja1105_port_txtstamp, we need to generate a timestamp ID and save the * packet to the TX timestamping queue. @@ -494,7 +461,6 @@ void sja1110_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) { struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone; struct sja1105_private *priv = ds->priv; - struct sja1105_ptp_data *ptp_data = &priv->ptp_data; struct sja1105_port *sp = &priv->ports[port]; u8 ts_id; @@ -510,7 +476,7 @@ void sja1110_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) spin_unlock(&sp->data->meta_lock); - skb_queue_tail(&ptp_data->skb_txtstamp_queue, clone); + skb_queue_tail(&sp->data->skb_txtstamp_queue, clone); } /* Called from dsa_skb_tx_timestamp. This callback is just to clone @@ -953,7 +919,7 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds) /* Only used on SJA1105 */ skb_queue_head_init(&ptp_data->skb_rxtstamp_queue); /* Only used on SJA1110 */ - skb_queue_head_init(&ptp_data->skb_txtstamp_queue); + skb_queue_head_init(&tagger_data->skb_txtstamp_queue); spin_lock_init(&tagger_data->meta_lock); ptp_data->clock = ptp_clock_register(&ptp_data->caps, ds->dev); @@ -971,6 +937,7 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds) void sja1105_ptp_clock_unregister(struct dsa_switch *ds) { struct sja1105_private *priv = ds->priv; + struct sja1105_tagger_data *tagger_data = &priv->tagger_data; struct sja1105_ptp_data *ptp_data = &priv->ptp_data; if (IS_ERR_OR_NULL(ptp_data->clock)) @@ -978,7 +945,7 @@ void sja1105_ptp_clock_unregister(struct dsa_switch *ds) del_timer_sync(&ptp_data->extts_timer); ptp_cancel_worker_sync(ptp_data->clock); - skb_queue_purge(&ptp_data->skb_txtstamp_queue); + skb_queue_purge(&tagger_data->skb_txtstamp_queue); skb_queue_purge(&ptp_data->skb_rxtstamp_queue); ptp_clock_unregister(ptp_data->clock); ptp_data->clock = NULL; diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h index 3c874bb4c17b..3ae6b9fdd492 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.h +++ b/drivers/net/dsa/sja1105/sja1105_ptp.h @@ -8,21 +8,6 @@ #if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) -/* Timestamps are in units of 8 ns clock ticks (equivalent to - * a fixed 125 MHz clock). - */ -#define SJA1105_TICK_NS 8 - -static inline s64 ns_to_sja1105_ticks(s64 ns) -{ - return ns / SJA1105_TICK_NS; -} - -static inline s64 sja1105_ticks_to_ns(s64 ticks) -{ - return ticks * SJA1105_TICK_NS; -} - /* Calculate the first base_time in the future that satisfies this * relationship: * @@ -77,10 +62,6 @@ struct sja1105_ptp_data { struct timer_list extts_timer; /* Used only on SJA1105 to reconstruct partial timestamps */ struct sk_buff_head skb_rxtstamp_queue; - /* Used on SJA1110 where meta frames are generated only for - * 2-step TX timestamps - */ - struct sk_buff_head skb_txtstamp_queue; struct ptp_clock_info caps; struct ptp_clock *clock; struct sja1105_ptp_cmd cmd; diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 171106202fe5..0485ab2fcc46 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -48,6 +48,10 @@ struct sja1105_tagger_data { spinlock_t meta_lock; unsigned long state; u8 ts_id; + /* Used on SJA1110 where meta frames are generated only for + * 2-step TX timestamps + */ + struct sk_buff_head skb_txtstamp_queue; }; struct sja1105_skb_cb { @@ -69,25 +73,20 @@ struct sja1105_port { bool hwts_tx_en; }; -enum sja1110_meta_tstamp { - SJA1110_META_TSTAMP_TX = 0, - SJA1110_META_TSTAMP_RX = 1, -}; +/* Timestamps are in units of 8 ns clock ticks (equivalent to + * a fixed 125 MHz clock). + */ +#define SJA1105_TICK_NS 8 -#if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) - -void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id, - enum sja1110_meta_tstamp dir, u64 tstamp); - -#else - -static inline void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, - u8 ts_id, enum sja1110_meta_tstamp dir, - u64 tstamp) +static inline s64 ns_to_sja1105_ticks(s64 ns) { + return ns / SJA1105_TICK_NS; } -#endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */ +static inline s64 sja1105_ticks_to_ns(s64 ticks) +{ + return ticks * SJA1105_TICK_NS; +} #if IS_ENABLED(CONFIG_NET_DSA_SJA1105) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index c054f48541c8..2edede9ddac9 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "dsa_priv.h" @@ -53,6 +54,11 @@ #define SJA1110_TX_TRAILER_LEN 4 #define SJA1110_MAX_PADDING_LEN 15 +enum sja1110_meta_tstamp { + SJA1110_META_TSTAMP_TX = 0, + SJA1110_META_TSTAMP_RX = 1, +}; + /* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */ static inline bool sja1105_is_link_local(const struct sk_buff *skb) { @@ -520,6 +526,43 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, is_meta); } +static void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, + u8 ts_id, enum sja1110_meta_tstamp dir, + u64 tstamp) +{ + struct sk_buff *skb, *skb_tmp, *skb_match = NULL; + struct dsa_port *dp = dsa_to_port(ds, port); + struct skb_shared_hwtstamps shwt = {0}; + struct sja1105_port *sp = dp->priv; + + if (!dsa_port_is_sja1105(dp)) + return; + + /* We don't care about RX timestamps on the CPU port */ + if (dir == SJA1110_META_TSTAMP_RX) + return; + + spin_lock(&sp->data->skb_txtstamp_queue.lock); + + skb_queue_walk_safe(&sp->data->skb_txtstamp_queue, skb, skb_tmp) { + if (SJA1105_SKB_CB(skb)->ts_id != ts_id) + continue; + + __skb_unlink(skb, &sp->data->skb_txtstamp_queue); + skb_match = skb; + + break; + } + + spin_unlock(&sp->data->skb_txtstamp_queue.lock); + + if (WARN_ON(!skb_match)) + return; + + shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp)); + skb_complete_tx_timestamp(skb_match, &shwt); +} + static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header) { u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN; From 4ac0567e40b334b54988e3c28a2425ff9c8bdd35 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 22 Sep 2021 17:37:26 +0300 Subject: [PATCH 212/235] net: dsa: sja1105: break dependency between dsa_port_is_sja1105 and switch driver It's nice to be able to test a tagging protocol with dsa_loop, but not at the cost of losing the ability of building the tagging protocol and switch driver as modules, because as things stand, there is a circular dependency between the two. Tagging protocol drivers cannot depend on switch drivers, that is a hard fact. The reasoning behind the blamed patch was that accessing dp->priv should first make sure that the structure behind that pointer is what we really think it is. Currently the "sja1105" and "sja1110" tagging protocols only operate with the sja1105 switch driver, just like any other tagging protocol and switch combination. The only way to mix and match them is by modifying the code, and this applies to dsa_loop as well (by default that uses DSA_TAG_PROTO_NONE). So while in principle there is an issue, in practice there isn't one. Until we extend dsa_loop to allow user space configuration, treat the problem as a non-issue and just say that DSA ports found by tag_sja1105 are always sja1105 ports, which is in fact true. But keep the dsa_port_is_sja1105 function so that it's easy to patch it during testing, and rely on dead code elimination. Fixes: 994d2cbb08ca ("net: dsa: tag_sja1105: be dsa_loop-safe") Link: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/ Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/sja1105/sja1105_main.c | 3 +-- include/linux/dsa/sja1105.h | 15 +-------------- net/dsa/Kconfig | 1 - 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 7c0db80eff00..924c3f129992 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -3117,7 +3117,7 @@ static void sja1105_teardown(struct dsa_switch *ds) sja1105_static_config_free(&priv->static_config); } -const struct dsa_switch_ops sja1105_switch_ops = { +static const struct dsa_switch_ops sja1105_switch_ops = { .get_tag_protocol = sja1105_get_tag_protocol, .setup = sja1105_setup, .teardown = sja1105_teardown, @@ -3166,7 +3166,6 @@ const struct dsa_switch_ops sja1105_switch_ops = { .port_bridge_tx_fwd_offload = dsa_tag_8021q_bridge_tx_fwd_offload, .port_bridge_tx_fwd_unoffload = dsa_tag_8021q_bridge_tx_fwd_unoffload, }; -EXPORT_SYMBOL_GPL(sja1105_switch_ops); static const struct of_device_id sja1105_dt_ids[]; diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 0485ab2fcc46..9e07079528a5 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -88,22 +88,9 @@ static inline s64 sja1105_ticks_to_ns(s64 ticks) return ticks * SJA1105_TICK_NS; } -#if IS_ENABLED(CONFIG_NET_DSA_SJA1105) - -extern const struct dsa_switch_ops sja1105_switch_ops; - static inline bool dsa_port_is_sja1105(struct dsa_port *dp) { - return dp->ds->ops == &sja1105_switch_ops; + return true; } -#else - -static inline bool dsa_port_is_sja1105(struct dsa_port *dp) -{ - return false; -} - -#endif - #endif /* _NET_DSA_SJA1105_H */ diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 548285539752..bca1b5d66df2 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -138,7 +138,6 @@ config NET_DSA_TAG_LAN9303 config NET_DSA_TAG_SJA1105 tristate "Tag driver for NXP SJA1105 switches" - depends on NET_DSA_SJA1105 || !NET_DSA_SJA1105 select PACKING help Say Y or M if you want to enable support for tagging frames with the From c57fe0037a4e3863d9b740f8c14df9c51ac31aa1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:35 +0300 Subject: [PATCH 213/235] net: mscc: ocelot: make use of all 63 PTP timestamp identifiers At present, there is a problem when user space bombards a port with PTP event frames which have TX timestamping requests (or when a tc-taprio offload is installed on a port, which delays the TX timestamps by a significant amount of time). The driver will happily roll over the 2-bit timestamp ID and this will cause incorrect matches between an skb and the TX timestamp collected from the FIFO. The Ocelot switches have a 6-bit PTP timestamp identifier, and the value 63 is reserved, so that leaves identifiers 0-62 to be used. The timestamp identifiers are selected by the REW_OP packet field, and are actually shared between CPU-injected frames and frames which match a VCAP IS2 rule that modifies the REW_OP. The hardware supports partitioning between the two uses of the REW_OP field through the PTP_ID_LOW and PTP_ID_HIGH registers, and by default reserves the PTP IDs 0-3 for CPU-injected traffic and the rest for VCAP IS2. The driver does not use VCAP IS2 to set REW_OP for 2-step timestamping, and it also writes 0xffffffff to both PTP_ID_HIGH and PTP_ID_LOW in ocelot_init_timestamp() which makes all timestamp identifiers available to CPU injection. Therefore, we can make use of all 63 timestamp identifiers, which should allow more timestampable packets to be in flight on each port. This is only part of the solution, more issues will be addressed in future changes. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 4 +++- include/soc/mscc/ocelot_ptp.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 4de58321907c..c43c8f53faaf 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -579,7 +579,9 @@ static void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS; /* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */ OCELOT_SKB_CB(clone)->ts_id = ocelot_port->ts_id; - ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4; + ocelot_port->ts_id++; + if (ocelot_port->ts_id == OCELOT_MAX_PTP_ID) + ocelot_port->ts_id = 0; skb_queue_tail(&ocelot_port->tx_skbs, clone); spin_unlock(&ocelot_port->ts_id_lock); diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h index ded497d72bdb..6e54442b49ad 100644 --- a/include/soc/mscc/ocelot_ptp.h +++ b/include/soc/mscc/ocelot_ptp.h @@ -13,6 +13,8 @@ #include #include +#define OCELOT_MAX_PTP_ID 63 + #define PTP_PIN_CFG_RSZ 0x20 #define PTP_PIN_TOD_SEC_MSB_RSZ PTP_PIN_CFG_RSZ #define PTP_PIN_TOD_SEC_LSB_RSZ PTP_PIN_CFG_RSZ From 52849bcf0029ccc553be304e4f804938a39112e2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:36 +0300 Subject: [PATCH 214/235] net: mscc: ocelot: avoid overflowing the PTP timestamp FIFO PTP packets with 2-step TX timestamp requests are matched to packets based on the egress port number and a 6-bit timestamp identifier. All PTP timestamps are held in a common FIFO that is 128 entry deep. This patch ensures that back-to-back timestamping requests cannot exceed the hardware FIFO capacity. If that happens, simply send the packets without requesting a TX timestamp to be taken (in the case of felix, since the DSA API has a void return code in ds->ops->port_txtstamp) or drop them (in the case of ocelot). I've moved the ts_id_lock from a per-port basis to a per-switch basis, because we need separate accounting for both numbers of PTP frames in flight. And since we need locking to inc/dec the per-switch counter, that also offers protection for the per-port counter and hence there is no reason to have a per-port counter anymore. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 6 ++++- drivers/net/ethernet/mscc/ocelot.c | 37 ++++++++++++++++++++++++------ include/soc/mscc/ocelot.h | 5 +++- include/soc/mscc/ocelot_ptp.h | 1 + 4 files changed, 40 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index a3a9636430d6..50ef20724958 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1291,8 +1291,12 @@ static void felix_txtstamp(struct dsa_switch *ds, int port, if (!ocelot->ptp) return; - if (ocelot_port_txtstamp_request(ocelot, port, skb, &clone)) + if (ocelot_port_txtstamp_request(ocelot, port, skb, &clone)) { + dev_err_ratelimited(ds->dev, + "port %d delivering skb without TX timestamp\n", + port); return; + } if (clone) OCELOT_SKB_CB(skb)->clone = clone; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index c43c8f53faaf..9c62f1d13adc 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -569,22 +569,36 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port, } EXPORT_SYMBOL_GPL(ocelot_phylink_mac_link_up); -static void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, - struct sk_buff *clone) +static int ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, + struct sk_buff *clone) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + unsigned long flags; - spin_lock(&ocelot_port->ts_id_lock); + spin_lock_irqsave(&ocelot->ts_id_lock, flags); + + if (ocelot_port->ptp_skbs_in_flight == OCELOT_MAX_PTP_ID || + ocelot->ptp_skbs_in_flight == OCELOT_PTP_FIFO_SIZE) { + spin_unlock_irqrestore(&ocelot->ts_id_lock, flags); + return -EBUSY; + } skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS; /* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */ OCELOT_SKB_CB(clone)->ts_id = ocelot_port->ts_id; + ocelot_port->ts_id++; if (ocelot_port->ts_id == OCELOT_MAX_PTP_ID) ocelot_port->ts_id = 0; + + ocelot_port->ptp_skbs_in_flight++; + ocelot->ptp_skbs_in_flight++; + skb_queue_tail(&ocelot_port->tx_skbs, clone); - spin_unlock(&ocelot_port->ts_id_lock); + spin_unlock_irqrestore(&ocelot->ts_id_lock, flags); + + return 0; } u32 ocelot_ptp_rew_op(struct sk_buff *skb) @@ -633,6 +647,7 @@ int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, { struct ocelot_port *ocelot_port = ocelot->ports[port]; u8 ptp_cmd = ocelot_port->ptp_cmd; + int err; /* Store ptp_cmd in OCELOT_SKB_CB(skb)->ptp_cmd */ if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) { @@ -650,7 +665,10 @@ int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, if (!(*clone)) return -ENOMEM; - ocelot_port_add_txtstamp_skb(ocelot, port, *clone); + err = ocelot_port_add_txtstamp_skb(ocelot, port, *clone); + if (err) + return err; + OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd; } @@ -709,9 +727,14 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) id = SYS_PTP_STATUS_PTP_MESS_ID_X(val); txport = SYS_PTP_STATUS_PTP_MESS_TXPORT_X(val); - /* Retrieve its associated skb */ port = ocelot->ports[txport]; + spin_lock(&ocelot->ts_id_lock); + port->ptp_skbs_in_flight--; + ocelot->ptp_skbs_in_flight--; + spin_unlock(&ocelot->ts_id_lock); + + /* Retrieve its associated skb */ spin_lock_irqsave(&port->tx_skbs.lock, flags); skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { @@ -1950,7 +1973,6 @@ void ocelot_init_port(struct ocelot *ocelot, int port) struct ocelot_port *ocelot_port = ocelot->ports[port]; skb_queue_head_init(&ocelot_port->tx_skbs); - spin_lock_init(&ocelot_port->ts_id_lock); /* Basic L2 initialization */ @@ -2083,6 +2105,7 @@ int ocelot_init(struct ocelot *ocelot) mutex_init(&ocelot->stats_lock); mutex_init(&ocelot->ptp_lock); spin_lock_init(&ocelot->ptp_clock_lock); + spin_lock_init(&ocelot->ts_id_lock); snprintf(queue_name, sizeof(queue_name), "%s-stats", dev_name(ocelot->dev)); ocelot->stats_queue = create_singlethread_workqueue(queue_name); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 06706a9fd5b1..b0ece85d9a76 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -603,10 +603,10 @@ struct ocelot_port { /* The VLAN ID that will be transmitted as untagged, on egress */ struct ocelot_vlan native_vlan; + unsigned int ptp_skbs_in_flight; u8 ptp_cmd; struct sk_buff_head tx_skbs; u8 ts_id; - spinlock_t ts_id_lock; phy_interface_t phy_mode; @@ -680,6 +680,9 @@ struct ocelot { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_info; struct hwtstamp_config hwtstamp_config; + unsigned int ptp_skbs_in_flight; + /* Protects the 2-step TX timestamp ID logic */ + spinlock_t ts_id_lock; /* Protects the PTP interface state */ struct mutex ptp_lock; /* Protects the PTP clock */ diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h index 6e54442b49ad..f085884b1fa2 100644 --- a/include/soc/mscc/ocelot_ptp.h +++ b/include/soc/mscc/ocelot_ptp.h @@ -14,6 +14,7 @@ #include #define OCELOT_MAX_PTP_ID 63 +#define OCELOT_PTP_FIFO_SIZE 128 #define PTP_PIN_CFG_RSZ 0x20 #define PTP_PIN_TOD_SEC_MSB_RSZ PTP_PIN_CFG_RSZ From 9fde506e0c53b8309f69b18b4b8144c544b4b3b1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:37 +0300 Subject: [PATCH 215/235] net: mscc: ocelot: warn when a PTP IRQ is raised for an unknown skb When skb_match is NULL, it means we received a PTP IRQ for a timestamp ID that the kernel has no idea about, since there is no skb in the timestamping queue with that timestamp ID. This is a grave error and not something to just "continue" over. So print a big warning in case this happens. Also, move the check above ocelot_get_hwtimestamp(), there is no point in reading the full 64-bit current PTP time if we're not going to do anything with it anyway for this skb. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 9c62f1d13adc..687c07c338cd 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -747,12 +747,12 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) spin_unlock_irqrestore(&port->tx_skbs.lock, flags); + if (WARN_ON(!skb_match)) + continue; + /* Get the h/w timestamp */ ocelot_get_hwtimestamp(ocelot, &ts); - if (unlikely(!skb_match)) - continue; - /* Set the timestamp into the skb */ memset(&shhwtstamps, 0, sizeof(shhwtstamps)); shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec); From fba01283d85a09e0e2ef552c6e764b903111d90a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:38 +0300 Subject: [PATCH 216/235] net: mscc: ocelot: deny TX timestamping of non-PTP packets It appears that Ocelot switches cannot timestamp non-PTP frames, I tested this using the isochron program at: https://github.com/vladimiroltean/tsn-scripts with the result that the driver increments the ocelot_port->ts_id counter as expected, puts it in the REW_OP, but the hardware seems to not timestamp these packets at all, since no IRQ is emitted. Therefore check whether we are sending PTP frames, and refuse to populate REW_OP otherwise. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 687c07c338cd..3b1f0bb6a414 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -618,16 +618,12 @@ u32 ocelot_ptp_rew_op(struct sk_buff *skb) } EXPORT_SYMBOL(ocelot_ptp_rew_op); -static bool ocelot_ptp_is_onestep_sync(struct sk_buff *skb) +static bool ocelot_ptp_is_onestep_sync(struct sk_buff *skb, + unsigned int ptp_class) { struct ptp_header *hdr; - unsigned int ptp_class; u8 msgtype, twostep; - ptp_class = ptp_classify_raw(skb); - if (ptp_class == PTP_CLASS_NONE) - return false; - hdr = ptp_parse_header(skb, ptp_class); if (!hdr) return false; @@ -647,11 +643,20 @@ int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, { struct ocelot_port *ocelot_port = ocelot->ports[port]; u8 ptp_cmd = ocelot_port->ptp_cmd; + unsigned int ptp_class; int err; + /* Don't do anything if PTP timestamping not enabled */ + if (!ptp_cmd) + return 0; + + ptp_class = ptp_classify_raw(skb); + if (ptp_class == PTP_CLASS_NONE) + return -EINVAL; + /* Store ptp_cmd in OCELOT_SKB_CB(skb)->ptp_cmd */ if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) { - if (ocelot_ptp_is_onestep_sync(skb)) { + if (ocelot_ptp_is_onestep_sync(skb, ptp_class)) { OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd; return 0; } From ebb4c6a990f786d7e0e4618a0d3766cd660125d8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:39 +0300 Subject: [PATCH 217/235] net: mscc: ocelot: cross-check the sequence id from the timestamp FIFO with the skb PTP header The sad reality is that when a PTP frame with a TX timestamping request is transmitted, it isn't guaranteed that it will make it all the way to the wire (due to congestion inside the switch), and that a timestamp will be taken by the hardware and placed in the timestamp FIFO where an IRQ will be raised for it. The implication is that if enough PTP frames are silently dropped by the hardware such that the timestamp ID has rolled over, it is possible to match a timestamp to an old skb. Furthermore, nobody will match on the real skb corresponding to this timestamp, since we stupidly matched on a previous one that was stale in the queue, and stopped there. So PTP timestamping will be broken and there will be no way to recover. It looks like the hardware parses the sequenceID from the PTP header, and also provides that metadata for each timestamp. The driver currently ignores this, but it shouldn't. As an extra resiliency measure, do the following: - check whether the PTP sequenceID also matches between the skb and the timestamp, treat the skb as stale otherwise and free it - if we see a stale skb, don't stop there and try to match an skb one more time, chances are there's one more skb in the queue with the same timestamp ID, otherwise we wouldn't have ever found the stale one (it is by timestamp ID that we matched it). While this does not prevent PTP packet drops, it at least prevents the catastrophic consequences of incorrect timestamp matching. Since we already call ptp_classify_raw in the TX path, save the result in the skb->cb of the clone, and just use that result in the interrupt code path. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 24 +++++++++++++++++++++++- include/soc/mscc/ocelot.h | 1 + 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 3b1f0bb6a414..f0044329e3d7 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -675,6 +675,7 @@ int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, return err; OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd; + OCELOT_SKB_CB(*clone)->ptp_class = ptp_class; } return 0; @@ -708,6 +709,17 @@ static void ocelot_get_hwtimestamp(struct ocelot *ocelot, spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags); } +static bool ocelot_validate_ptp_skb(struct sk_buff *clone, u16 seqid) +{ + struct ptp_header *hdr; + + hdr = ptp_parse_header(clone, OCELOT_SKB_CB(clone)->ptp_class); + if (WARN_ON(!hdr)) + return false; + + return seqid == ntohs(hdr->sequence_id); +} + void ocelot_get_txtstamp(struct ocelot *ocelot) { int budget = OCELOT_PTP_QUEUE_SZ; @@ -715,10 +727,10 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) while (budget--) { struct sk_buff *skb, *skb_tmp, *skb_match = NULL; struct skb_shared_hwtstamps shhwtstamps; + u32 val, id, seqid, txport; struct ocelot_port *port; struct timespec64 ts; unsigned long flags; - u32 val, id, txport; val = ocelot_read(ocelot, SYS_PTP_STATUS); @@ -731,6 +743,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) /* Retrieve the ts ID and Tx port */ id = SYS_PTP_STATUS_PTP_MESS_ID_X(val); txport = SYS_PTP_STATUS_PTP_MESS_TXPORT_X(val); + seqid = SYS_PTP_STATUS_PTP_MESS_SEQ_ID(val); port = ocelot->ports[txport]; @@ -740,6 +753,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) spin_unlock(&ocelot->ts_id_lock); /* Retrieve its associated skb */ +try_again: spin_lock_irqsave(&port->tx_skbs.lock, flags); skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { @@ -755,6 +769,14 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) if (WARN_ON(!skb_match)) continue; + if (!ocelot_validate_ptp_skb(skb_match, seqid)) { + dev_err_ratelimited(ocelot->dev, + "port %d received stale TX timestamp for seqid %d, discarding\n", + txport, seqid); + dev_kfree_skb_any(skb); + goto try_again; + } + /* Get the h/w timestamp */ ocelot_get_hwtimestamp(ocelot, &ts); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index b0ece85d9a76..cabacef8731c 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -697,6 +697,7 @@ struct ocelot_policer { struct ocelot_skb_cb { struct sk_buff *clone; + unsigned int ptp_class; /* valid only for clones */ u8 ptp_cmd; u8 ts_id; }; From deab6b1cd9789bb9bd466d5e76aecb8b336259b4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:40 +0300 Subject: [PATCH 218/235] net: dsa: tag_ocelot: break circular dependency with ocelot switch lib driver As explained here: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/ DSA tagging protocol drivers cannot depend on symbols exported by switch drivers, because this creates a circular dependency that breaks module autoloading. The tag_ocelot.c file depends on the ocelot_ptp_rew_op() function exported by the common ocelot switch lib. This function looks at OCELOT_SKB_CB(skb) and computes how to populate the REW_OP field of the DSA tag, for PTP timestamping (the command: one-step/two-step, and the TX timestamp identifier). None of that requires deep insight into the driver, it is quite stateless, as it only depends upon the skb->cb. So let's make it a static inline function and put it in include/linux/dsa/ocelot.h, a file that despite its name is used by the ocelot switch driver for populating the injection header too - since commit 40d3f295b5fe ("net: mscc: ocelot: use common tag parsing code with DSA"). With that function declared as static inline, its body is expanded inside each call site, so the dependency is broken and the DSA tagger can be built without the switch library, upon which the felix driver depends. Fixes: 39e5308b3250 ("net: mscc: ocelot: support PTP Sync one-step timestamping") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 17 ------------ drivers/net/ethernet/mscc/ocelot_net.c | 1 + include/linux/dsa/ocelot.h | 37 ++++++++++++++++++++++++++ include/soc/mscc/ocelot.h | 24 ----------------- net/dsa/Kconfig | 2 -- net/dsa/tag_ocelot.c | 1 - net/dsa/tag_ocelot_8021q.c | 1 + 7 files changed, 39 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index f0044329e3d7..a08e4f530c1c 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -601,23 +601,6 @@ static int ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, return 0; } -u32 ocelot_ptp_rew_op(struct sk_buff *skb) -{ - struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone; - u8 ptp_cmd = OCELOT_SKB_CB(skb)->ptp_cmd; - u32 rew_op = 0; - - if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP && clone) { - rew_op = ptp_cmd; - rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3; - } else if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) { - rew_op = ptp_cmd; - } - - return rew_op; -} -EXPORT_SYMBOL(ocelot_ptp_rew_op); - static bool ocelot_ptp_is_onestep_sync(struct sk_buff *skb, unsigned int ptp_class) { diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 2a85bcb5d0c2..2545727fd5b2 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -8,6 +8,7 @@ * Copyright 2020-2021 NXP */ +#include #include #include #include diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index 435777a0073c..50641a7529ad 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -6,6 +6,26 @@ #define _NET_DSA_TAG_OCELOT_H #include +#include + +struct ocelot_skb_cb { + struct sk_buff *clone; + unsigned int ptp_class; /* valid only for clones */ + u8 ptp_cmd; + u8 ts_id; +}; + +#define OCELOT_SKB_CB(skb) \ + ((struct ocelot_skb_cb *)((skb)->cb)) + +#define IFH_TAG_TYPE_C 0 +#define IFH_TAG_TYPE_S 1 + +#define IFH_REW_OP_NOOP 0x0 +#define IFH_REW_OP_DSCP 0x1 +#define IFH_REW_OP_ONE_STEP_PTP 0x2 +#define IFH_REW_OP_TWO_STEP_PTP 0x3 +#define IFH_REW_OP_ORIGIN_PTP 0x5 #define OCELOT_TAG_LEN 16 #define OCELOT_SHORT_PREFIX_LEN 4 @@ -215,4 +235,21 @@ static inline void ocelot_ifh_set_vid(void *injection, u64 vid) packing(injection, &vid, 11, 0, OCELOT_TAG_LEN, PACK, 0); } +/* Determine the PTP REW_OP to use for injecting the given skb */ +static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb) +{ + struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone; + u8 ptp_cmd = OCELOT_SKB_CB(skb)->ptp_cmd; + u32 rew_op = 0; + + if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP && clone) { + rew_op = ptp_cmd; + rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3; + } else if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) { + rew_op = ptp_cmd; + } + + return rew_op; +} + #endif diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index cabacef8731c..66b2e65c1179 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -89,15 +89,6 @@ /* Source PGIDs, one per physical port */ #define PGID_SRC 80 -#define IFH_TAG_TYPE_C 0 -#define IFH_TAG_TYPE_S 1 - -#define IFH_REW_OP_NOOP 0x0 -#define IFH_REW_OP_DSCP 0x1 -#define IFH_REW_OP_ONE_STEP_PTP 0x2 -#define IFH_REW_OP_TWO_STEP_PTP 0x3 -#define IFH_REW_OP_ORIGIN_PTP 0x5 - #define OCELOT_NUM_TC 8 #define OCELOT_SPEED_2500 0 @@ -695,16 +686,6 @@ struct ocelot_policer { u32 burst; /* bytes */ }; -struct ocelot_skb_cb { - struct sk_buff *clone; - unsigned int ptp_class; /* valid only for clones */ - u8 ptp_cmd; - u8 ts_id; -}; - -#define OCELOT_SKB_CB(skb) \ - ((struct ocelot_skb_cb *)((skb)->cb)) - #define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) #define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi)) #define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri)) @@ -765,7 +746,6 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); -u32 ocelot_ptp_rew_op(struct sk_buff *skb); #else static inline bool ocelot_can_inject(struct ocelot *ocelot, int grp) @@ -789,10 +769,6 @@ static inline void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp) { } -static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb) -{ - return 0; -} #endif /* Hardware initialization */ diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index bca1b5d66df2..d166377d7085 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -101,8 +101,6 @@ config NET_DSA_TAG_RTL4_A config NET_DSA_TAG_OCELOT tristate "Tag driver for Ocelot family of switches, using NPI port" - depends on MSCC_OCELOT_SWITCH_LIB || \ - (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST) select PACKING help Say Y or M if you want to enable NPI tagging for the Ocelot switches diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 8025ed778d33..605b51ca6921 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -2,7 +2,6 @@ /* Copyright 2019 NXP */ #include -#include #include "dsa_priv.h" static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 59072930cb02..1e4e66ea6796 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -9,6 +9,7 @@ * that on egress */ #include +#include #include #include #include "dsa_priv.h" From 49f885b2d97093451410e7279aa29d81e094e108 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:41 +0300 Subject: [PATCH 219/235] net: dsa: tag_ocelot_8021q: break circular dependency with ocelot switch lib Michael reported that when using the "ocelot-8021q" tagging protocol, the switch driver module must be manually loaded before the tagging protocol can be loaded/is available. This appears to be the same problem described here: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/ where due to the fact that DSA tagging protocols make use of symbols exported by the switch drivers, circular dependencies appear and this breaks module autoloading. The ocelot_8021q driver needs the ocelot_can_inject() and ocelot_port_inject_frame() functions from the switch library. Previously the wrong approach was taken to solve that dependency: shims were provided for the case where the ocelot switch library was compiled out, but that turns out to be insufficient, because the dependency when the switch lib _is_ compiled is problematic too. We cannot declare ocelot_can_inject() and ocelot_port_inject_frame() as static inline functions, because these access I/O functions like __ocelot_write_ix() which is called by ocelot_write_rix(). Making those static inline basically means exposing the whole guts of the ocelot switch library, not ideal... We already have one tagging protocol driver which calls into the switch driver during xmit but not using any exported symbol: sja1105_defer_xmit. We can do the same thing here: create a kthread worker and one work item per skb, and let the switch driver itself do the register accesses to send the skb, and then consume it. Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") Reported-by: Michael Walle Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 96 ++++++++++++++++++++++++++++++++-- drivers/net/dsa/ocelot/felix.h | 1 + include/linux/dsa/ocelot.h | 12 +++++ include/soc/mscc/ocelot.h | 27 ---------- net/dsa/Kconfig | 2 - net/dsa/tag_ocelot_8021q.c | 38 +++++++++----- 6 files changed, 130 insertions(+), 46 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 50ef20724958..f8603e068e7c 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1074,6 +1074,73 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) return 0; } +#define work_to_xmit_work(w) \ + container_of((w), struct felix_deferred_xmit_work, work) + +static void felix_port_deferred_xmit(struct kthread_work *work) +{ + struct felix_deferred_xmit_work *xmit_work = work_to_xmit_work(work); + struct dsa_switch *ds = xmit_work->dp->ds; + struct sk_buff *skb = xmit_work->skb; + u32 rew_op = ocelot_ptp_rew_op(skb); + struct ocelot *ocelot = ds->priv; + int port = xmit_work->dp->index; + int retries = 10; + + do { + if (ocelot_can_inject(ocelot, 0)) + break; + + cpu_relax(); + } while (--retries); + + if (!retries) { + dev_err(ocelot->dev, "port %d failed to inject skb\n", + port); + kfree_skb(skb); + return; + } + + ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb); + + consume_skb(skb); + kfree(xmit_work); +} + +static int felix_port_setup_tagger_data(struct dsa_switch *ds, int port) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct ocelot *ocelot = ds->priv; + struct felix *felix = ocelot_to_felix(ocelot); + struct felix_port *felix_port; + + if (!dsa_port_is_user(dp)) + return 0; + + felix_port = kzalloc(sizeof(*felix_port), GFP_KERNEL); + if (!felix_port) + return -ENOMEM; + + felix_port->xmit_worker = felix->xmit_worker; + felix_port->xmit_work_fn = felix_port_deferred_xmit; + + dp->priv = felix_port; + + return 0; +} + +static void felix_port_teardown_tagger_data(struct dsa_switch *ds, int port) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct felix_port *felix_port = dp->priv; + + if (!felix_port) + return; + + dp->priv = NULL; + kfree(felix_port); +} + /* Hardware initialization done here so that we can allocate structures with * devm without fear of dsa_register_switch returning -EPROBE_DEFER and causing * us to allocate structures twice (leak memory) and map PCI memory twice @@ -1102,6 +1169,12 @@ static int felix_setup(struct dsa_switch *ds) } } + felix->xmit_worker = kthread_create_worker(0, "felix_xmit"); + if (IS_ERR(felix->xmit_worker)) { + err = PTR_ERR(felix->xmit_worker); + goto out_deinit_timestamp; + } + for (port = 0; port < ds->num_ports; port++) { if (dsa_is_unused_port(ds, port)) continue; @@ -1112,6 +1185,14 @@ static int felix_setup(struct dsa_switch *ds) * bits of vlan tag. */ felix_port_qos_map_init(ocelot, port); + + err = felix_port_setup_tagger_data(ds, port); + if (err) { + dev_err(ds->dev, + "port %d failed to set up tagger data: %pe\n", + port, ERR_PTR(err)); + goto out_deinit_ports; + } } err = ocelot_devlink_sb_register(ocelot); @@ -1138,9 +1219,13 @@ static int felix_setup(struct dsa_switch *ds) if (dsa_is_unused_port(ds, port)) continue; + felix_port_teardown_tagger_data(ds, port); ocelot_deinit_port(ocelot, port); } + kthread_destroy_worker(felix->xmit_worker); + +out_deinit_timestamp: ocelot_deinit_timestamp(ocelot); ocelot_deinit(ocelot); @@ -1164,17 +1249,20 @@ static void felix_teardown(struct dsa_switch *ds) felix_del_tag_protocol(ds, port, felix->tag_proto); } - ocelot_devlink_sb_unregister(ocelot); - ocelot_deinit_timestamp(ocelot); - ocelot_deinit(ocelot); - for (port = 0; port < ocelot->num_phys_ports; port++) { if (dsa_is_unused_port(ds, port)) continue; + felix_port_teardown_tagger_data(ds, port); ocelot_deinit_port(ocelot, port); } + kthread_destroy_worker(felix->xmit_worker); + + ocelot_devlink_sb_unregister(ocelot); + ocelot_deinit_timestamp(ocelot); + ocelot_deinit(ocelot); + if (felix->info->mdio_bus_free) felix->info->mdio_bus_free(ocelot); } diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 54024b6f9498..be3e42e135c0 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -62,6 +62,7 @@ struct felix { resource_size_t switch_base; resource_size_t imdio_base; enum dsa_tag_protocol tag_proto; + struct kthread_worker *xmit_worker; }; struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port); diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index 50641a7529ad..8ae999f587c4 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -5,6 +5,7 @@ #ifndef _NET_DSA_TAG_OCELOT_H #define _NET_DSA_TAG_OCELOT_H +#include #include #include @@ -160,6 +161,17 @@ struct ocelot_skb_cb { * +------+------+------+------+------+------+------+------+ */ +struct felix_deferred_xmit_work { + struct dsa_port *dp; + struct sk_buff *skb; + struct kthread_work work; +}; + +struct felix_port { + void (*xmit_work_fn)(struct kthread_work *work); + struct kthread_worker *xmit_worker; +}; + static inline void ocelot_xfh_get_rew_val(void *extraction, u64 *rew_val) { packing(extraction, rew_val, 116, 85, OCELOT_TAG_LEN, UNPACK, 0); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 66b2e65c1179..d7055b41982d 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -737,8 +737,6 @@ u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target, void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, u32 val, u32 reg, u32 offset); -#if IS_ENABLED(CONFIG_MSCC_OCELOT_SWITCH_LIB) - /* Packet I/O */ bool ocelot_can_inject(struct ocelot *ocelot, int grp); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, @@ -746,31 +744,6 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); -#else - -static inline bool ocelot_can_inject(struct ocelot *ocelot, int grp) -{ - return false; -} - -static inline void ocelot_port_inject_frame(struct ocelot *ocelot, int port, - int grp, u32 rew_op, - struct sk_buff *skb) -{ -} - -static inline int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, - struct sk_buff **skb) -{ - return -EIO; -} - -static inline void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp) -{ -} - -#endif - /* Hardware initialization */ int ocelot_regfields_init(struct ocelot *ocelot, const struct reg_field *const regfields); diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index d166377d7085..d8ee15f1c7a9 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -112,8 +112,6 @@ config NET_DSA_TAG_OCELOT config NET_DSA_TAG_OCELOT_8021Q tristate "Tag driver for Ocelot family of switches, using VLAN" - depends on MSCC_OCELOT_SWITCH_LIB || \ - (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST) help Say Y or M if you want to enable support for tagging frames with a custom VLAN-based header. Frames that require timestamping, such as diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 1e4e66ea6796..d05c352f96e5 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -10,10 +10,31 @@ */ #include #include -#include -#include #include "dsa_priv.h" +static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp, + struct sk_buff *skb) +{ + struct felix_deferred_xmit_work *xmit_work; + struct felix_port *felix_port = dp->priv; + + xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC); + if (!xmit_work) + return NULL; + + /* Calls felix_port_deferred_xmit in felix.c */ + kthread_init_work(&xmit_work->work, felix_port->xmit_work_fn); + /* Increase refcount so the kfree_skb in dsa_slave_xmit + * won't really free the packet. + */ + xmit_work->dp = dp; + xmit_work->skb = skb_get(skb); + + kthread_queue_work(felix_port->xmit_worker, &xmit_work->work); + + return NULL; +} + static struct sk_buff *ocelot_xmit(struct sk_buff *skb, struct net_device *netdev) { @@ -21,18 +42,9 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); - struct ocelot *ocelot = dp->ds->priv; - int port = dp->index; - u32 rew_op = 0; - rew_op = ocelot_ptp_rew_op(skb); - if (rew_op) { - if (!ocelot_can_inject(ocelot, 0)) - return NULL; - - ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb); - return NULL; - } + if (ocelot_ptp_rew_op(skb)) + return ocelot_defer_xmit(dp, skb); return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q, ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); From 1328a883258b4507909090ed0a9ad63771f9f780 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:42 +0300 Subject: [PATCH 220/235] net: dsa: felix: purge skb from TX timestamping queue if it cannot be sent At present, when a PTP packet which requires TX timestamping gets dropped under congestion by the switch, things go downhill very fast. The driver keeps a clone of that skb in a queue of packets awaiting TX timestamp interrupts, but interrupts will never be raised for the dropped packets. Moreover, matching timestamped packets to timestamps is done by a 2-bit timestamp ID, and this can wrap around and we can match on the wrong skb. Since with the default NPI-based tagging protocol, we get no notification about packet drops, the best we can do is eventually recover from the drop of a PTP frame: its skb will be dead memory until another skb which was assigned the same timestamp ID happens to find it. However, with the ocelot-8021q tagger which injects packets using the manual register interface, it appears that we can check for more information, such as: - whether the input queue has reached the high watermark or not - whether the injection group's FIFO can accept additional data or not so we know that a PTP frame is likely to get dropped before actually sending it, and drop it ourselves (because DSA uses NETIF_F_LLTX, so it can't return NETDEV_TX_BUSY to ask the qdisc to requeue the packet). But when we do that, we can also remove the skb from the timestamping queue, because there surely won't be any timestamp that matches it. Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index f8603e068e7c..9af8f900aa56 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1074,6 +1074,33 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) return 0; } +static void ocelot_port_purge_txtstamp_skb(struct ocelot *ocelot, int port, + struct sk_buff *skb) +{ + struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone; + struct sk_buff *skb_match = NULL, *skb_tmp; + unsigned long flags; + + if (!clone) + return; + + spin_lock_irqsave(&ocelot_port->tx_skbs.lock, flags); + + skb_queue_walk_safe(&ocelot_port->tx_skbs, skb, skb_tmp) { + if (skb != clone) + continue; + __skb_unlink(skb, &ocelot_port->tx_skbs); + skb_match = skb; + break; + } + + spin_unlock_irqrestore(&ocelot_port->tx_skbs.lock, flags); + + WARN_ONCE(!skb_match, + "Could not find skb clone in TX timestamping list\n"); +} + #define work_to_xmit_work(w) \ container_of((w), struct felix_deferred_xmit_work, work) @@ -1097,6 +1124,7 @@ static void felix_port_deferred_xmit(struct kthread_work *work) if (!retries) { dev_err(ocelot->dev, "port %d failed to inject skb\n", port); + ocelot_port_purge_txtstamp_skb(ocelot, port, skb); kfree_skb(skb); return; } From 43ba33b4f143965a451cfdc1e826b61f6933c887 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:43 +0300 Subject: [PATCH 221/235] net: dsa: tag_ocelot_8021q: fix inability to inject STP BPDUs into BLOCKING ports When setting up a bridge with stp_state 1, topology changes are not detected and loops are not blocked. This is because the standard way of transmitting a packet, based on VLAN IDs redirected by VCAP IS2 to the right egress port, does not override the port STP state (in the case of Ocelot switches, that's really the PGID_SRC masks). To force a packet to be injected into a port that's BLOCKING, we must send it as a control packet, which means in the case of this tagger to send it using the manual register injection method. We already do this for PTP frames, extend the logic to apply to any link-local MAC DA. Fixes: 7c83a7c539ab ("net: dsa: add a second tagger for Ocelot switches based on tag_8021q") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- net/dsa/tag_ocelot_8021q.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index d05c352f96e5..3412051981d7 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -42,8 +42,9 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + struct ethhdr *hdr = eth_hdr(skb); - if (ocelot_ptp_rew_op(skb)) + if (ocelot_ptp_rew_op(skb) || is_link_local_ether_addr(hdr->h_dest)) return ocelot_defer_xmit(dp, skb); return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q, From 8d5f7954b7c8de54902a8beda141064a7e2e6ee0 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:44 +0300 Subject: [PATCH 222/235] net: dsa: felix: break at first CPU port during init and teardown The NXP LS1028A switch has two Ethernet ports towards the CPU, but only one of them is capable of acting as an NPI port at a time (inject and extract packets using DSA tags). However, using the alternative ocelot-8021q tagging protocol, it should be possible to use both CPU ports symmetrically, but for that we need to mark both ports in the device tree as DSA masters. In the process of doing that, it can be seen that traffic to/from the network stack gets broken, and this is because the Felix driver iterates through all DSA CPU ports and configures them as NPI ports. But since there can only be a single NPI port, we effectively end up in a situation where DSA thinks the default CPU port is the first one, but the hardware port configured to be an NPI is the last one. I would like to treat this as a bug, because if the updated device trees are going to start circulating, it would be really good for existing kernels to support them, too. Fixes: adb3dccf090b ("net: dsa: felix: convert to the new .change_tag_protocol DSA API") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 9af8f900aa56..341236dcbdb4 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -266,12 +266,12 @@ static void felix_8021q_cpu_port_deinit(struct ocelot *ocelot, int port) */ static int felix_setup_mmio_filtering(struct felix *felix) { - unsigned long user_ports = 0, cpu_ports = 0; + unsigned long user_ports = dsa_user_ports(felix->ds); struct ocelot_vcap_filter *redirect_rule; struct ocelot_vcap_filter *tagging_rule; struct ocelot *ocelot = &felix->ocelot; struct dsa_switch *ds = felix->ds; - int port, ret; + int cpu = -1, port, ret; tagging_rule = kzalloc(sizeof(struct ocelot_vcap_filter), GFP_KERNEL); if (!tagging_rule) @@ -284,12 +284,15 @@ static int felix_setup_mmio_filtering(struct felix *felix) } for (port = 0; port < ocelot->num_phys_ports; port++) { - if (dsa_is_user_port(ds, port)) - user_ports |= BIT(port); - if (dsa_is_cpu_port(ds, port)) - cpu_ports |= BIT(port); + if (dsa_is_cpu_port(ds, port)) { + cpu = port; + break; + } } + if (cpu < 0) + return -EINVAL; + tagging_rule->key_type = OCELOT_VCAP_KEY_ETYPE; *(__be16 *)tagging_rule->key.etype.etype.value = htons(ETH_P_1588); *(__be16 *)tagging_rule->key.etype.etype.mask = htons(0xffff); @@ -325,7 +328,7 @@ static int felix_setup_mmio_filtering(struct felix *felix) * the CPU port module */ redirect_rule->action.mask_mode = OCELOT_MASK_MODE_REDIRECT; - redirect_rule->action.port_mask = cpu_ports; + redirect_rule->action.port_mask = BIT(cpu); } else { /* Trap PTP packets only to the CPU port module (which is * redirected to the NPI port) @@ -1235,6 +1238,7 @@ static int felix_setup(struct dsa_switch *ds) * there's no real point in checking for errors. */ felix_set_tag_protocol(ds, port, felix->tag_proto); + break; } ds->mtu_enforcement_ingress = true; @@ -1275,6 +1279,7 @@ static void felix_teardown(struct dsa_switch *ds) continue; felix_del_tag_protocol(ds, port, felix->tag_proto); + break; } for (port = 0; port < ocelot->num_phys_ports; port++) { From e599ee234ad4fdfe241d937bbabd96e0d8f9d868 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 12 Oct 2021 11:34:46 +0200 Subject: [PATCH 223/235] net: arc: select CRC32 Fix the following build/link error by adding a dependency on the CRC32 routines: ld: drivers/net/ethernet/arc/emac_main.o: in function `arc_emac_set_rx_mode': emac_main.c:(.text+0xb11): undefined reference to `crc32_le' The crc32_le() call comes through the ether_crc_le() call in arc_emac_set_rx_mode(). [v2: moved the select to ARC_EMAC_CORE; the Makefile is a bit confusing, but the error comes from emac_main.o, which is part of the arc_emac module, which in turn is enabled by CONFIG_ARC_EMAC_CORE. Note that arc_emac is different from emac_arc...] Fixes: 775dd682e2b0ec ("arc_emac: implement promiscuous mode and multicast filtering") Cc: Arnd Bergmann Signed-off-by: Vegard Nossum Link: https://lore.kernel.org/r/20211012093446.1575-1-vegard.nossum@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/arc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig index 37a41773dd43..92a79c4ffa2c 100644 --- a/drivers/net/ethernet/arc/Kconfig +++ b/drivers/net/ethernet/arc/Kconfig @@ -21,6 +21,7 @@ config ARC_EMAC_CORE depends on ARC || ARCH_ROCKCHIP || COMPILE_TEST select MII select PHYLIB + select CRC32 config ARC_EMAC tristate "ARC EMAC support" From 427f974d9727ca681085ddcd0530c97ab5811ae0 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 12 Oct 2021 17:25:09 +0200 Subject: [PATCH 224/235] net: korina: select CRC32 Fix the following build/link error by adding a dependency on the CRC32 routines: ld: drivers/net/ethernet/korina.o: in function `korina_multicast_list': korina.c:(.text+0x1af): undefined reference to `crc32_le' Fixes: ef11291bcd5f9 ("Add support the Korina (IDT RC32434) Ethernet MAC") Cc: Arnd Bergmann Signed-off-by: Vegard Nossum Acked-by: Florian fainelli Link: https://lore.kernel.org/r/20211012152509.21771-1-vegard.nossum@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index d796684ec9ca..412ae3e43ffb 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -100,6 +100,7 @@ config JME config KORINA tristate "Korina (IDT RC32434) Ethernet support" depends on MIKROTIK_RB532 || COMPILE_TEST + select CRC32 select MII help If you have a Mikrotik RouterBoard 500 or IDT RC32434 From f03dca0c9e2297c84a018e306f8a9cd534ee4287 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Tue, 12 Oct 2021 20:59:01 +0800 Subject: [PATCH 225/235] net: encx24j600: check error in devm_regmap_init_encx24j600 devm_regmap_init may return error which caused by like out of memory, this will results in null pointer dereference later when reading or writing register: general protection fault in encx24j600_spi_probe KASAN: null-ptr-deref in range [0x0000000000000090-0x0000000000000097] CPU: 0 PID: 286 Comm: spi-encx24j600- Not tainted 5.15.0-rc2-00142-g9978db750e31-dirty #11 9c53a778c1306b1b02359f3c2bbedc0222cba652 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:regcache_cache_bypass drivers/base/regmap/regcache.c:540 Code: 54 41 89 f4 55 53 48 89 fb 48 83 ec 08 e8 26 94 a8 fe 48 8d bb a0 00 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 4a 03 00 00 4c 8d ab b0 00 00 00 48 8b ab a0 00 RSP: 0018:ffffc900010476b8 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: fffffffffffffff4 RCX: 0000000000000000 RDX: 0000000000000012 RSI: ffff888002de0000 RDI: 0000000000000094 RBP: ffff888013c9a000 R08: 0000000000000000 R09: fffffbfff3f9cc6a R10: ffffc900010476e8 R11: fffffbfff3f9cc69 R12: 0000000000000001 R13: 000000000000000a R14: ffff888013c9af54 R15: ffff888013c9ad08 FS: 00007ffa984ab580(0000) GS:ffff88801fe00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055a6384136c8 CR3: 000000003bbe6003 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: encx24j600_spi_probe drivers/net/ethernet/microchip/encx24j600.c:459 spi_probe drivers/spi/spi.c:397 really_probe drivers/base/dd.c:517 __driver_probe_device drivers/base/dd.c:751 driver_probe_device drivers/base/dd.c:782 __device_attach_driver drivers/base/dd.c:899 bus_for_each_drv drivers/base/bus.c:427 __device_attach drivers/base/dd.c:971 bus_probe_device drivers/base/bus.c:487 device_add drivers/base/core.c:3364 __spi_add_device drivers/spi/spi.c:599 spi_add_device drivers/spi/spi.c:641 spi_new_device drivers/spi/spi.c:717 new_device_store+0x18c/0x1f1 [spi_stub 4e02719357f1ff33f5a43d00630982840568e85e] dev_attr_store drivers/base/core.c:2074 sysfs_kf_write fs/sysfs/file.c:139 kernfs_fop_write_iter fs/kernfs/file.c:300 new_sync_write fs/read_write.c:508 (discriminator 4) vfs_write fs/read_write.c:594 ksys_write fs/read_write.c:648 do_syscall_64 arch/x86/entry/common.c:50 entry_SYSCALL_64_after_hwframe arch/x86/entry/entry_64.S:113 Add error check in devm_regmap_init_encx24j600 to avoid this situation. Fixes: 04fbfce7a222 ("net: Microchip encx24j600 driver") Reported-by: Hulk Robot Signed-off-by: Nanyong Sun Link: https://lore.kernel.org/r/20211012125901.3623144-1-sunnanyong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/encx24j600-regmap.c | 10 ++++++++-- drivers/net/ethernet/microchip/encx24j600.c | 5 ++++- drivers/net/ethernet/microchip/encx24j600_hw.h | 4 ++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c index 796e46a53926..81a8ccca7e5e 100644 --- a/drivers/net/ethernet/microchip/encx24j600-regmap.c +++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c @@ -497,13 +497,19 @@ static struct regmap_bus phymap_encx24j600 = { .reg_read = regmap_encx24j600_phy_reg_read, }; -void devm_regmap_init_encx24j600(struct device *dev, - struct encx24j600_context *ctx) +int devm_regmap_init_encx24j600(struct device *dev, + struct encx24j600_context *ctx) { mutex_init(&ctx->mutex); regcfg.lock_arg = ctx; ctx->regmap = devm_regmap_init(dev, ®map_encx24j600, ctx, ®cfg); + if (IS_ERR(ctx->regmap)) + return PTR_ERR(ctx->regmap); ctx->phymap = devm_regmap_init(dev, &phymap_encx24j600, ctx, &phycfg); + if (IS_ERR(ctx->phymap)) + return PTR_ERR(ctx->phymap); + + return 0; } EXPORT_SYMBOL_GPL(devm_regmap_init_encx24j600); diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index ee921a99e439..0bc6b3176fbf 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -1023,10 +1023,13 @@ static int encx24j600_spi_probe(struct spi_device *spi) priv->speed = SPEED_100; priv->ctx.spi = spi; - devm_regmap_init_encx24j600(&spi->dev, &priv->ctx); ndev->irq = spi->irq; ndev->netdev_ops = &encx24j600_netdev_ops; + ret = devm_regmap_init_encx24j600(&spi->dev, &priv->ctx); + if (ret) + goto out_free; + mutex_init(&priv->lock); /* Reset device and check if it is connected */ diff --git a/drivers/net/ethernet/microchip/encx24j600_hw.h b/drivers/net/ethernet/microchip/encx24j600_hw.h index fac61a8fbd02..34c5a289898c 100644 --- a/drivers/net/ethernet/microchip/encx24j600_hw.h +++ b/drivers/net/ethernet/microchip/encx24j600_hw.h @@ -15,8 +15,8 @@ struct encx24j600_context { int bank; }; -void devm_regmap_init_encx24j600(struct device *dev, - struct encx24j600_context *ctx); +int devm_regmap_init_encx24j600(struct device *dev, + struct encx24j600_context *ctx); /* Single-byte instructions */ #define BANK_SELECT(bank) (0xC0 | ((bank & (BANK_MASK >> BANK_SHIFT)) << 1)) From 1f922d9e374f0ca7d1b241666c792f403c6120aa Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 13 Oct 2021 03:19:09 +0300 Subject: [PATCH 226/235] Revert "net: procfs: add seq_puts() statement for dev_mcast" This reverts commit ec18e8455484370d633a718c6456ddbf6eceef21. It turns out that there are user space programs which got broken by that change. One example is the "ifstat" program shipped by Debian: https://packages.debian.org/source/bullseye/ifstat which, confusingly enough, seems to not have anything in common with the much more familiar (at least to me) ifstat program from iproute2: https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/misc/ifstat.c root@debian:~# ifstat ifstat: /proc/net/dev: unsupported format. This change modified the header (first two lines of text) in /proc/net/dev so that it looks like this: root@debian:~# cat /proc/net/dev Interface| Receive | Transmit | bytes packets errs drop fifo frame compressed multicast| bytes packets errs drop fifo colls carrier compressed lo: 97400 1204 0 0 0 0 0 0 97400 1204 0 0 0 0 0 0 bond0: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 sit0: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 eno2: 5002206 6651 0 0 0 0 0 0 105518642 1465023 0 0 0 0 0 0 swp0: 134531 2448 0 0 0 0 0 0 99599598 1464381 0 0 0 0 0 0 swp1: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 swp2: 4867675 4203 0 0 0 0 0 0 58134 631 0 0 0 0 0 0 sw0p0: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 sw0p1: 124739 2448 0 1422 0 0 0 0 93741184 1464369 0 0 0 0 0 0 sw0p2: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 sw2p0: 4850863 4203 0 0 0 0 0 0 54722 619 0 0 0 0 0 0 sw2p1: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 sw2p2: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 sw2p3: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 br0: 10508 212 0 212 0 0 0 212 61369558 958857 0 0 0 0 0 0 whereas before it looked like this: root@debian:~# cat /proc/net/dev Inter-| Receive | Transmit face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed lo: 13160 164 0 0 0 0 0 0 13160 164 0 0 0 0 0 0 bond0: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 sit0: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 eno2: 30824 268 0 0 0 0 0 0 3332 37 0 0 0 0 0 0 swp0: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 swp1: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 swp2: 30824 268 0 0 0 0 0 0 2428 27 0 0 0 0 0 0 sw0p0: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 sw0p1: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 sw0p2: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 sw2p0: 29752 268 0 0 0 0 0 0 1564 17 0 0 0 0 0 0 sw2p1: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 sw2p2: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 sw2p3: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 The reason why the ifstat shipped by Debian (v1.1, with a Debian patch upgrading it to 1.1-8.1 at the time of writing) is broken is because its "proc" driver/backend parses the header very literally: main/drivers.c#L825 if (!data->checked && strncmp(buf, "Inter-|", 7)) goto badproc; and there's no way in which the header can be changed such that programs parsing like that would not get broken. Even if we fix this ancient and very "lightly" maintained program to parse the text output of /proc/net/dev in a more sensible way, this story seems bound to repeat again with other programs, and modifying them all could cause more trouble than it's worth. On the other hand, the reverted patch had no other reason than an aesthetic one, so reverting it is the simplest way out. I don't know what other distributions would be affected; the fact that Debian doesn't ship the iproute2 version of the program (a different code base altogether, which uses netlink and not /proc/net/dev) is surprising in itself. Fixes: ec18e8455484 ("net: procfs: add seq_puts() statement for dev_mcast") Link: https://lore.kernel.org/netdev/20211009163511.vayjvtn3rrteglsu@skbuf/ Cc: Yajun Deng Cc: Matthieu Baerts Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20211013001909.3164185-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/core/net-procfs.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index eab5fc88a002..d8b9dbabd4a4 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -77,8 +77,8 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); - seq_printf(seq, "%9s: %16llu %12llu %4llu %6llu %4llu %5llu %10llu %9llu " - "%16llu %12llu %4llu %6llu %4llu %5llu %7llu %10llu\n", + seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " + "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, @@ -103,11 +103,11 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) static int dev_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) - seq_puts(seq, "Interface| Receive " - " | Transmit\n" - " | bytes packets errs drop fifo frame " - "compressed multicast| bytes packets errs " - " drop fifo colls carrier compressed\n"); + seq_puts(seq, "Inter-| Receive " + " | Transmit\n" + " face |bytes packets errs drop fifo frame " + "compressed multicast|bytes packets errs " + "drop fifo colls carrier compressed\n"); else dev_seq_printf_stats(seq, v); return 0; @@ -259,14 +259,14 @@ static int ptype_seq_show(struct seq_file *seq, void *v) struct packet_type *pt = v; if (v == SEQ_START_TOKEN) - seq_puts(seq, "Type Device Function\n"); + seq_puts(seq, "Type Device Function\n"); else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else seq_printf(seq, "%04x", ntohs(pt->type)); - seq_printf(seq, " %-9s %ps\n", + seq_printf(seq, " %-8s %ps\n", pt->dev ? pt->dev->name : "", pt->func); } @@ -327,14 +327,12 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) struct netdev_hw_addr *ha; struct net_device *dev = v; - if (v == SEQ_START_TOKEN) { - seq_puts(seq, "Ifindex Interface Refcount Global_use Address\n"); + if (v == SEQ_START_TOKEN) return 0; - } netif_addr_lock_bh(dev); netdev_for_each_mc_addr(ha, dev) { - seq_printf(seq, "%-7d %-9s %-8d %-10d %*phN\n", + seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n", dev->ifindex, dev->name, ha->refcount, ha->global_use, (int)dev->addr_len, ha->addr); From 0911ab31896f0e908540746414a77dd63912748d Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Wed, 13 Oct 2021 11:49:32 +0800 Subject: [PATCH 227/235] nfc: fix error handling of nfc_proto_register() When nfc proto id is using, nfc_proto_register() return -EBUSY error code, but forgot to unregister proto. Fix it by adding proto_unregister() in the error handling case. Fixes: c7fe3b52c128 ("NFC: add NFC socket family") Signed-off-by: Ziyang Xuan Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211013034932.2833737-1-william.xuanziyang@huawei.com Signed-off-by: Jakub Kicinski --- net/nfc/af_nfc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index 6024fad905ff..dda323e0a473 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -60,6 +60,9 @@ int nfc_proto_register(const struct nfc_protocol *nfc_proto) proto_tab[nfc_proto->id] = nfc_proto; write_unlock(&proto_tab_lock); + if (rc) + proto_unregister(nfc_proto->proto); + return rc; } EXPORT_SYMBOL(nfc_proto_register); From 58e7dcc9ca29c14e44267a4d0ea61e3229124907 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Wed, 13 Oct 2021 15:50:12 +0800 Subject: [PATCH 228/235] NFC: digital: fix possible memory leak in digital_tg_listen_mdaa() 'params' is allocated in digital_tg_listen_mdaa(), but not free when digital_send_cmd() failed, which will cause memory leak. Fix it by freeing 'params' if digital_send_cmd() return failed. Fixes: 1c7a4c24fbfd ("NFC Digital: Add target NFC-DEP support") Signed-off-by: Ziyang Xuan Reviewed-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- net/nfc/digital_core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index fefc03674f4f..d63d2e5dc60c 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -277,6 +277,7 @@ int digital_tg_configure_hw(struct nfc_digital_dev *ddev, int type, int param) static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech) { struct digital_tg_mdaa_params *params; + int rc; params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) @@ -291,8 +292,12 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech) get_random_bytes(params->nfcid2 + 2, NFC_NFCID2_MAXSIZE - 2); params->sc = DIGITAL_SENSF_FELICA_SC; - return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params, - 500, digital_tg_recv_atr_req, NULL); + rc = digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params, + 500, digital_tg_recv_atr_req, NULL); + if (rc) + kfree(params); + + return rc; } static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech) From 291c932fc3692e4d211a445ba8aa35663831bac7 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Wed, 13 Oct 2021 15:50:32 +0800 Subject: [PATCH 229/235] NFC: digital: fix possible memory leak in digital_in_send_sdd_req() 'skb' is allocated in digital_in_send_sdd_req(), but not free when digital_in_send_cmd() failed, which will cause memory leak. Fix it by freeing 'skb' if digital_in_send_cmd() return failed. Fixes: 2c66daecc409 ("NFC Digital: Add NFC-A technology support") Signed-off-by: Ziyang Xuan Reviewed-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- net/nfc/digital_technology.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c index 84d2345c75a3..3adf4589852a 100644 --- a/net/nfc/digital_technology.c +++ b/net/nfc/digital_technology.c @@ -465,8 +465,12 @@ static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev, skb_put_u8(skb, sel_cmd); skb_put_u8(skb, DIGITAL_SDD_REQ_SEL_PAR); - return digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res, - target); + rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res, + target); + if (rc) + kfree_skb(skb); + + return rc; } static void digital_in_recv_sens_res(struct nfc_digital_dev *ddev, void *arg, From 26d6574109838b8fa40a8261421693015bab0579 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Tue, 12 Oct 2021 22:40:56 -0500 Subject: [PATCH 230/235] MAINTAINERS: Update entry for the Stratix10 firmware Richard Gong is no longer at Intel, so update the MAINTAINER's entry for the Stratix10 firmware drivers. Signed-off-by: Dinh Nguyen Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e0bca0de0df7..6b6f98483deb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9629,7 +9629,7 @@ F: include/uapi/linux/isst_if.h F: tools/power/x86/intel-speed-select/ INTEL STRATIX10 FIRMWARE DRIVERS -M: Richard Gong +M: Dinh Nguyen L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-devices-platform-stratix10-rsu From 40507e7aada8422c38aafa0c8a1a09e4623c712a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Oct 2021 16:35:49 +0200 Subject: [PATCH 231/235] ethernet: s2io: fix setting mac address during resume After recent cleanups, gcc started warning about a suspicious memcpy() call during the s2io_io_resume() function: In function '__dev_addr_set', inlined from 'eth_hw_addr_set' at include/linux/etherdevice.h:318:2, inlined from 's2io_set_mac_addr' at drivers/net/ethernet/neterion/s2io.c:5205:2, inlined from 's2io_io_resume' at drivers/net/ethernet/neterion/s2io.c:8569:7: arch/x86/include/asm/string_32.h:182:25: error: '__builtin_memcpy' accessing 6 bytes at offsets 0 and 2 overlaps 4 bytes at offset 2 [-Werror=restrict] 182 | #define memcpy(t, f, n) __builtin_memcpy(t, f, n) | ^~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/netdevice.h:4648:9: note: in expansion of macro 'memcpy' 4648 | memcpy(dev->dev_addr, addr, len); | ^~~~~~ What apparently happened is that an old cleanup changed the calling conventions for s2io_set_mac_addr() from taking an ethernet address as a character array to taking a struct sockaddr, but one of the callers was not changed at the same time. Change it to instead call the low-level do_s2io_prog_unicast() function that still takes the old argument type. Fixes: 2fd376884558 ("S2io: Added support set_mac_address driver entry point") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211013143613.2049096-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/neterion/s2io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 09c0e839cca5..3b6b2e61139e 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -8566,7 +8566,7 @@ static void s2io_io_resume(struct pci_dev *pdev) return; } - if (s2io_set_mac_addr(netdev, netdev->dev_addr) == FAILURE) { + if (do_s2io_prog_unicast(netdev, netdev->dev_addr) == FAILURE) { s2io_card_down(sp); pr_err("Can't restore mac addr after reset.\n"); return; From 332fdf951df8b870e3da86b122ae304e2aabe88c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 12 Oct 2021 20:49:55 +0300 Subject: [PATCH 232/235] mlxsw: thermal: Fix out-of-bounds memory accesses Currently, mlxsw allows cooling states to be set above the maximum cooling state supported by the driver: # cat /sys/class/thermal/thermal_zone2/cdev0/type mlxsw_fan # cat /sys/class/thermal/thermal_zone2/cdev0/max_state 10 # echo 18 > /sys/class/thermal/thermal_zone2/cdev0/cur_state # echo $? 0 This results in out-of-bounds memory accesses when thermal state transition statistics are enabled (CONFIG_THERMAL_STATISTICS=y), as the transition table is accessed with a too large index (state) [1]. According to the thermal maintainer, it is the responsibility of the driver to reject such operations [2]. Therefore, return an error when the state to be set exceeds the maximum cooling state supported by the driver. To avoid dead code, as suggested by the thermal maintainer [3], partially revert commit a421ce088ac8 ("mlxsw: core: Extend cooling device with cooling levels") that tried to interpret these invalid cooling states (above the maximum) in a special way. The cooling levels array is not removed in order to prevent the fans going below 20% PWM, which would cause them to get stuck at 0% PWM. [1] BUG: KASAN: slab-out-of-bounds in thermal_cooling_device_stats_update+0x271/0x290 Read of size 4 at addr ffff8881052f7bf8 by task kworker/0:0/5 CPU: 0 PID: 5 Comm: kworker/0:0 Not tainted 5.15.0-rc3-custom-45935-gce1adf704b14 #122 Hardware name: Mellanox Technologies Ltd. "MSN2410-CB2FO"/"SA000874", BIOS 4.6.5 03/08/2016 Workqueue: events_freezable_power_ thermal_zone_device_check Call Trace: dump_stack_lvl+0x8b/0xb3 print_address_description.constprop.0+0x1f/0x140 kasan_report.cold+0x7f/0x11b thermal_cooling_device_stats_update+0x271/0x290 __thermal_cdev_update+0x15e/0x4e0 thermal_cdev_update+0x9f/0xe0 step_wise_throttle+0x770/0xee0 thermal_zone_device_update+0x3f6/0xdf0 process_one_work+0xa42/0x1770 worker_thread+0x62f/0x13e0 kthread+0x3ee/0x4e0 ret_from_fork+0x1f/0x30 Allocated by task 1: kasan_save_stack+0x1b/0x40 __kasan_kmalloc+0x7c/0x90 thermal_cooling_device_setup_sysfs+0x153/0x2c0 __thermal_cooling_device_register.part.0+0x25b/0x9c0 thermal_cooling_device_register+0xb3/0x100 mlxsw_thermal_init+0x5c5/0x7e0 __mlxsw_core_bus_device_register+0xcb3/0x19c0 mlxsw_core_bus_device_register+0x56/0xb0 mlxsw_pci_probe+0x54f/0x710 local_pci_probe+0xc6/0x170 pci_device_probe+0x2b2/0x4d0 really_probe+0x293/0xd10 __driver_probe_device+0x2af/0x440 driver_probe_device+0x51/0x1e0 __driver_attach+0x21b/0x530 bus_for_each_dev+0x14c/0x1d0 bus_add_driver+0x3ac/0x650 driver_register+0x241/0x3d0 mlxsw_sp_module_init+0xa2/0x174 do_one_initcall+0xee/0x5f0 kernel_init_freeable+0x45a/0x4de kernel_init+0x1f/0x210 ret_from_fork+0x1f/0x30 The buggy address belongs to the object at ffff8881052f7800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 1016 bytes inside of 1024-byte region [ffff8881052f7800, ffff8881052f7c00) The buggy address belongs to the page: page:0000000052355272 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1052f0 head:0000000052355272 order:3 compound_mapcount:0 compound_pincount:0 flags: 0x200000000010200(slab|head|node=0|zone=2) raw: 0200000000010200 ffffea0005034800 0000000300000003 ffff888100041dc0 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8881052f7a80: 00 00 00 00 00 00 04 fc fc fc fc fc fc fc fc fc ffff8881052f7b00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8881052f7b80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff8881052f7c00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8881052f7c80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [2] https://lore.kernel.org/linux-pm/9aca37cb-1629-5c67-1895-1fdc45c0244e@linaro.org/ [3] https://lore.kernel.org/linux-pm/af9857f2-578e-de3a-e62b-6baff7e69fd4@linaro.org/ CC: Daniel Lezcano Fixes: a50c1e35650b ("mlxsw: core: Implement thermal zone") Fixes: a421ce088ac8 ("mlxsw: core: Extend cooling device with cooling levels") Signed-off-by: Ido Schimmel Tested-by: Vadim Pasternak Link: https://lore.kernel.org/r/20211012174955.472928-1-idosch@idosch.org Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlxsw/core_thermal.c | 52 ++----------------- 1 file changed, 5 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 0998dcc9cac0..b29824448aa8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -24,16 +24,8 @@ #define MLXSW_THERMAL_ZONE_MAX_NAME 16 #define MLXSW_THERMAL_TEMP_SCORE_MAX GENMASK(31, 0) #define MLXSW_THERMAL_MAX_STATE 10 +#define MLXSW_THERMAL_MIN_STATE 2 #define MLXSW_THERMAL_MAX_DUTY 255 -/* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values - * MLXSW_THERMAL_MAX_STATE + x, where x is between 2 and 10 are used for - * setting fan speed dynamic minimum. For example, if value is set to 14 (40%) - * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to - * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100. - */ -#define MLXSW_THERMAL_SPEED_MIN (MLXSW_THERMAL_MAX_STATE + 2) -#define MLXSW_THERMAL_SPEED_MAX (MLXSW_THERMAL_MAX_STATE * 2) -#define MLXSW_THERMAL_SPEED_MIN_LEVEL 2 /* 20% */ /* External cooling devices, allowed for binding to mlxsw thermal zones. */ static char * const mlxsw_thermal_external_allowed_cdev[] = { @@ -646,49 +638,16 @@ static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev, struct mlxsw_thermal *thermal = cdev->devdata; struct device *dev = thermal->bus_info->dev; char mfsc_pl[MLXSW_REG_MFSC_LEN]; - unsigned long cur_state, i; int idx; - u8 duty; int err; + if (state > MLXSW_THERMAL_MAX_STATE) + return -EINVAL; + idx = mlxsw_get_cooling_device_idx(thermal, cdev); if (idx < 0) return idx; - /* Verify if this request is for changing allowed fan dynamical - * minimum. If it is - update cooling levels accordingly and update - * state, if current state is below the newly requested minimum state. - * For example, if current state is 5, and minimal state is to be - * changed from 4 to 6, thermal->cooling_levels[0 to 5] will be changed - * all from 4 to 6. And state 5 (thermal->cooling_levels[4]) should be - * overwritten. - */ - if (state >= MLXSW_THERMAL_SPEED_MIN && - state <= MLXSW_THERMAL_SPEED_MAX) { - state -= MLXSW_THERMAL_MAX_STATE; - for (i = 0; i <= MLXSW_THERMAL_MAX_STATE; i++) - thermal->cooling_levels[i] = max(state, i); - - mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0); - err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl); - if (err) - return err; - - duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl); - cur_state = mlxsw_duty_to_state(duty); - - /* If current fan state is lower than requested dynamical - * minimum, increase fan speed up to dynamical minimum. - */ - if (state < cur_state) - return 0; - - state = cur_state; - } - - if (state > MLXSW_THERMAL_MAX_STATE) - return -EINVAL; - /* Normalize the state to the valid speed range. */ state = thermal->cooling_levels[state]; mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state)); @@ -998,8 +957,7 @@ int mlxsw_thermal_init(struct mlxsw_core *core, /* Initialize cooling levels per PWM state. */ for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++) - thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL, - i); + thermal->cooling_levels[i] = max(MLXSW_THERMAL_MIN_STATE, i); thermal->polling_delay = bus_info->low_frequency ? MLXSW_THERMAL_SLOW_POLL_INT : From a2d859e3fc97e79d907761550dbc03ff1b36479c Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Wed, 13 Oct 2021 17:27:29 -0300 Subject: [PATCH 233/235] sctp: account stream padding length for reconf chunk sctp_make_strreset_req() makes repeated calls to sctp_addto_chunk() which will automatically account for padding on each call. inreq and outreq are already 4 bytes aligned, but the payload is not and doing SCTP_PAD4(a + b) (which _sctp_make_chunk() did implicitly here) is different from SCTP_PAD4(a) + SCTP_PAD4(b) and not enough. It led to possible attempt to use more buffer than it was allocated and triggered a BUG_ON. Cc: Vlad Yasevich Cc: Neil Horman Cc: Greg KH Fixes: cc16f00f6529 ("sctp: add support for generating stream reconf ssn reset request chunk") Reported-by: Eiichi Tsukata Signed-off-by: Eiichi Tsukata Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Link: https://lore.kernel.org/r/b97c1f8b0c7ff79ac4ed206fc2c49d3612e0850c.1634156849.git.mleitner@redhat.com Signed-off-by: Jakub Kicinski --- net/sctp/sm_make_chunk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index b8fa8f1a7277..c7503fd64915 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3697,7 +3697,7 @@ struct sctp_chunk *sctp_make_strreset_req( outlen = (sizeof(outreq) + stream_len) * out; inlen = (sizeof(inreq) + stream_len) * in; - retval = sctp_make_reconf(asoc, outlen + inlen); + retval = sctp_make_reconf(asoc, SCTP_PAD4(outlen) + SCTP_PAD4(inlen)); if (!retval) return NULL; From ea142b09a6399fb2c35c07198e3f0e2423273540 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Thu, 14 Oct 2021 19:02:14 +0800 Subject: [PATCH 234/235] MAINTAINERS: Update the devicetree documentation path of imx fec driver Change the devicetree documentation path to "Documentation/devicetree/bindings/net/fsl,fec.yaml" since 'fsl-fec.txt' has been converted to 'fsl,fec.yaml' already. Signed-off-by: Cai Huoqing Link: https://lore.kernel.org/r/20211014110214.3254-1-caihuoqing@baidu.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 17f652b2f653..c4f2f3382e26 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7440,7 +7440,7 @@ FREESCALE IMX / MXC FEC DRIVER M: Joakim Zhang L: netdev@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/net/fsl-fec.txt +F: Documentation/devicetree/bindings/net/fsl,fec.yaml F: drivers/net/ethernet/freescale/fec.h F: drivers/net/ethernet/freescale/fec_main.c F: drivers/net/ethernet/freescale/fec_ptp.c From 1fcd794518b7644169595c66b1bfe726d1f498ab Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 14 Oct 2021 05:50:50 -0400 Subject: [PATCH 235/235] icmp: fix icmp_ext_echo_iio parsing in icmp_build_probe In icmp_build_probe(), the icmp_ext_echo_iio parsing should be done step by step and skb_header_pointer() return value should always be checked, this patch fixes 3 places in there: - On case ICMP_EXT_ECHO_CTYPE_NAME, it should only copy ident.name from skb by skb_header_pointer(), its len is ident_len. Besides, the return value of skb_header_pointer() should always be checked. - On case ICMP_EXT_ECHO_CTYPE_INDEX, move ident_len check ahead of skb_header_pointer(), and also do the return value check for skb_header_pointer(). - On case ICMP_EXT_ECHO_CTYPE_ADDR, before accessing iio->ident.addr. ctype3_hdr.addrlen, skb_header_pointer() should be called first, then check its return value and ident_len. On subcases ICMP_AFI_IP and ICMP_AFI_IP6, also do check for ident. addr.ctype3_hdr.addrlen and skb_header_pointer()'s return value. On subcase ICMP_AFI_IP, the len for skb_header_pointer() should be "sizeof(iio->extobj_hdr) + sizeof(iio->ident.addr.ctype3_hdr) + sizeof(struct in_addr)" or "ident_len". v1->v2: - To make it more clear, call skb_header_pointer() once only for iio->indent's parsing as Jakub Suggested. v2->v3: - The extobj_hdr.length check against sizeof(_iio) should be done before calling skb_header_pointer(), as Eric noticed. Fixes: d329ea5bd884 ("icmp: add response to RFC 8335 PROBE messages") Reported-by: Dan Carpenter Signed-off-by: Xin Long Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/31628dd76657ea62f5cf78bb55da6b35240831f1.1634205050.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 8b30cadff708..b7e277d8a84d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1054,14 +1054,19 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr), &_iio); if (!ext_hdr || !iio) goto send_mal_query; - if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr)) + if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr) || + ntohs(iio->extobj_hdr.length) > sizeof(_iio)) goto send_mal_query; ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr); + iio = skb_header_pointer(skb, sizeof(_ext_hdr), + sizeof(iio->extobj_hdr) + ident_len, &_iio); + if (!iio) + goto send_mal_query; + status = 0; dev = NULL; switch (iio->extobj_hdr.class_type) { case ICMP_EXT_ECHO_CTYPE_NAME: - iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio); if (ident_len >= IFNAMSIZ) goto send_mal_query; memset(buff, 0, sizeof(buff)); @@ -1069,30 +1074,24 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) dev = dev_get_by_name(net, buff); break; case ICMP_EXT_ECHO_CTYPE_INDEX: - iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) + - sizeof(iio->ident.ifindex), &_iio); if (ident_len != sizeof(iio->ident.ifindex)) goto send_mal_query; dev = dev_get_by_index(net, ntohl(iio->ident.ifindex)); break; case ICMP_EXT_ECHO_CTYPE_ADDR: - if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + + if (ident_len < sizeof(iio->ident.addr.ctype3_hdr) || + ident_len != sizeof(iio->ident.addr.ctype3_hdr) + iio->ident.addr.ctype3_hdr.addrlen) goto send_mal_query; switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) { case ICMP_AFI_IP: - iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) + - sizeof(struct in_addr), &_iio); - if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + - sizeof(struct in_addr)) + if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in_addr)) goto send_mal_query; dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr); break; #if IS_ENABLED(CONFIG_IPV6) case ICMP_AFI_IP6: - iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio); - if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + - sizeof(struct in6_addr)) + if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in6_addr)) goto send_mal_query; dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev); dev_hold(dev);