From 9c426b770bd088f18899f836093d810a83b59b98 Mon Sep 17 00:00:00 2001 From: Talel Shenhar Date: Mon, 9 Sep 2019 11:39:18 +0300 Subject: [PATCH 001/572] irqchip/al-fic: Add support for irq retrigger Introduce interrupts retrigger support for Amazon's Annapurna Labs Fabric Interrupt Controller. Signed-off-by: Talel Shenhar Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1568018358-18985-1-git-send-email-talel@amazon.com --- drivers/irqchip/irq-al-fic.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/irqchip/irq-al-fic.c b/drivers/irqchip/irq-al-fic.c index 1a57cee3efab..0b0a73739756 100644 --- a/drivers/irqchip/irq-al-fic.c +++ b/drivers/irqchip/irq-al-fic.c @@ -15,6 +15,7 @@ /* FIC Registers */ #define AL_FIC_CAUSE 0x00 +#define AL_FIC_SET_CAUSE 0x08 #define AL_FIC_MASK 0x10 #define AL_FIC_CONTROL 0x28 @@ -126,6 +127,16 @@ static void al_fic_irq_handler(struct irq_desc *desc) chained_irq_exit(irqchip, desc); } +static int al_fic_irq_retrigger(struct irq_data *data) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); + struct al_fic *fic = gc->private; + + writel_relaxed(BIT(data->hwirq), fic->base + AL_FIC_SET_CAUSE); + + return 1; +} + static int al_fic_register(struct device_node *node, struct al_fic *fic) { @@ -159,6 +170,7 @@ static int al_fic_register(struct device_node *node, gc->chip_types->chip.irq_unmask = irq_gc_mask_clr_bit; gc->chip_types->chip.irq_ack = irq_gc_ack_clr_bit; gc->chip_types->chip.irq_set_type = al_fic_irq_set_type; + gc->chip_types->chip.irq_retrigger = al_fic_irq_retrigger; gc->chip_types->chip.flags = IRQCHIP_SKIP_SET_WAKE; gc->private = fic; From 212fbf2c9e84ceb267cadd8342156b69b54b8135 Mon Sep 17 00:00:00 2001 From: Sandeep Sheriker Mallikarjun Date: Mon, 9 Sep 2019 14:00:35 +0300 Subject: [PATCH 002/572] irqchip/atmel-aic5: Add support for sam9x60 irqchip Add support for SAM9X60 irqchip. Signed-off-by: Sandeep Sheriker Mallikarjun Signed-off-by: Claudiu Beznea Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1568026835-6646-1-git-send-email-claudiu.beznea@microchip.com [claudiu.beznea@microchip.com: update aic5_irq_fixups[], update documentation] --- .../bindings/interrupt-controller/atmel,aic.txt | 7 +++++-- drivers/irqchip/irq-atmel-aic5.c | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt b/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt index f4c5d34c4111..7079d44bf3ba 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt @@ -1,8 +1,11 @@ * Advanced Interrupt Controller (AIC) Required properties: -- compatible: Should be "atmel,-aic" - can be "at91rm9200", "sama5d2", "sama5d3" or "sama5d4" +- compatible: Should be: + - "atmel,-aic" where can be "at91rm9200", "sama5d2", + "sama5d3" or "sama5d4" + - "microchip,-aic" where can be "sam9x60" + - interrupt-controller: Identifies the node as an interrupt controller. - #interrupt-cells: The number of cells to define the interrupts. It should be 3. The first cell is the IRQ number (aka "Peripheral IDentifier" on datasheet). diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index 6acad2ea0fb3..29333497ba10 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c @@ -313,6 +313,7 @@ static void __init sama5d3_aic_irq_fixup(void) static const struct of_device_id aic5_irq_fixups[] __initconst = { { .compatible = "atmel,sama5d3", .data = sama5d3_aic_irq_fixup }, { .compatible = "atmel,sama5d4", .data = sama5d3_aic_irq_fixup }, + { .compatible = "microchip,sam9x60", .data = sama5d3_aic_irq_fixup }, { /* sentinel */ }, }; @@ -390,3 +391,12 @@ static int __init sama5d4_aic5_of_init(struct device_node *node, return aic5_of_init(node, parent, NR_SAMA5D4_IRQS); } IRQCHIP_DECLARE(sama5d4_aic5, "atmel,sama5d4-aic", sama5d4_aic5_of_init); + +#define NR_SAM9X60_IRQS 50 + +static int __init sam9x60_aic5_of_init(struct device_node *node, + struct device_node *parent) +{ + return aic5_of_init(node, parent, NR_SAM9X60_IRQS); +} +IRQCHIP_DECLARE(sam9x60_aic5, "microchip,sam9x60-aic", sam9x60_aic5_of_init); From 131cb1210d4b58acb0695707dad2eb90dcb50a2a Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Tue, 17 Sep 2019 17:40:20 +0200 Subject: [PATCH 003/572] regulator: of: fix suspend-min/max-voltage parsing Currently the regulator-suspend-min/max-microvolt must be within the root regulator node but the dt-bindings specifies it as subnode properties for the regulator-state-[mem/disk/standby] node. The only DT using this bindings currently is the at91-sama5d2_xplained.dts and this DT uses it correctly. I don't know if it isn't tested but it can't work without this fix. Fixes: f7efad10b5c4 ("regulator: add PM suspend and resume hooks") Signed-off-by: Marco Felsch Link: https://lore.kernel.org/r/20190917154021.14693-3-m.felsch@pengutronix.de Signed-off-by: Mark Brown --- drivers/regulator/of_regulator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index 9112faa6a9a0..38dd06fbab38 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -231,12 +231,12 @@ static int of_get_regulation_constraints(struct device *dev, "regulator-off-in-suspend")) suspend_state->enabled = DISABLE_IN_SUSPEND; - if (!of_property_read_u32(np, "regulator-suspend-min-microvolt", - &pval)) + if (!of_property_read_u32(suspend_np, + "regulator-suspend-min-microvolt", &pval)) suspend_state->min_uV = pval; - if (!of_property_read_u32(np, "regulator-suspend-max-microvolt", - &pval)) + if (!of_property_read_u32(suspend_np, + "regulator-suspend-max-microvolt", &pval)) suspend_state->max_uV = pval; if (!of_property_read_u32(suspend_np, From f8970d341eec73c976a3462b9ecdb02b60b84dd6 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Tue, 17 Sep 2019 17:40:21 +0200 Subject: [PATCH 004/572] regulator: core: make regulator_register() EPROBE_DEFER aware Sometimes it can happen that the regulator_of_get_init_data() can't retrieve the config due to a not probed device the regulator depends on. Fix that by checking the return value of of_parse_cb() and return EPROBE_DEFER in such cases. Signed-off-by: Marco Felsch Link: https://lore.kernel.org/r/20190917154021.14693-4-m.felsch@pengutronix.de Signed-off-by: Mark Brown --- drivers/regulator/core.c | 13 +++++++++++++ drivers/regulator/of_regulator.c | 19 ++++++++++++++----- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index afe94470b67f..a46be221dbdc 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5053,6 +5053,19 @@ regulator_register(const struct regulator_desc *regulator_desc, init_data = regulator_of_get_init_data(dev, regulator_desc, config, &rdev->dev.of_node); + + /* + * Sometimes not all resources are probed already so we need to take + * that into account. This happens most the time if the ena_gpiod comes + * from a gpio extender or something else. + */ + if (PTR_ERR(init_data) == -EPROBE_DEFER) { + kfree(config); + kfree(rdev); + ret = -EPROBE_DEFER; + goto rinse; + } + /* * We need to keep track of any GPIO descriptor coming from the * device tree until we have handled it over to the core. If the diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index 38dd06fbab38..ef7198b76e50 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -445,11 +445,20 @@ struct regulator_init_data *regulator_of_get_init_data(struct device *dev, goto error; } - if (desc->of_parse_cb && desc->of_parse_cb(child, desc, config)) { - dev_err(dev, - "driver callback failed to parse DT for regulator %pOFn\n", - child); - goto error; + if (desc->of_parse_cb) { + int ret; + + ret = desc->of_parse_cb(child, desc, config); + if (ret) { + if (ret == -EPROBE_DEFER) { + of_node_put(child); + return ERR_PTR(-EPROBE_DEFER); + } + dev_err(dev, + "driver callback failed to parse DT for regulator %pOFn\n", + child); + goto error; + } } *node = child; From c107d613f9204ff9c7624c229938153d7492c56e Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 18 Sep 2019 06:57:30 +0000 Subject: [PATCH 005/572] irqchip/gic-v3: Fix GIC_LINE_NR accessor As per GIC spec, ITLinesNumber indicates the maximum SPI INTID that the GIC implementation supports. And the maximum SPI INTID an implementation might support is 1019 (field value 11111). max(GICD_TYPER_SPIS(...), 1020) is not what we actually want for GIC_LINE_NR. Fix it to min(GICD_TYPER_SPIS(...), 1020). Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1568789850-14080-1-git-send-email-yuzenghui@huawei.com --- drivers/irqchip/irq-gic-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 422664ac5f53..1edc99335a94 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -59,7 +59,7 @@ static struct gic_chip_data gic_data __read_mostly; static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); #define GIC_ID_NR (1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer)) -#define GIC_LINE_NR max(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U) +#define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U) #define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer) /* From bb0fed1c60cccbe4063b455a7228818395dac86e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 15 Sep 2019 15:17:45 +0100 Subject: [PATCH 006/572] irqchip/sifive-plic: Switch to fasteoi flow The SiFive PLIC interrupt controller seems to have all the HW features to support the fasteoi flow, but the driver seems to be stuck in a distant past. Bring it into the 21st century. Signed-off-by: Marc Zyngier Tested-by: Palmer Dabbelt (QEMU Boot) Tested-by: Darius Rad (on 2 HW PLIC implementations) Tested-by: Paul Walmsley (HiFive Unleashed) Reviewed-by: Palmer Dabbelt Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/8636gxskmj.wl-maz@kernel.org --- drivers/irqchip/irq-sifive-plic.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index cf755964f2f8..3e51deedbcc8 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -97,7 +97,7 @@ static inline void plic_irq_toggle(const struct cpumask *mask, } } -static void plic_irq_enable(struct irq_data *d) +static void plic_irq_unmask(struct irq_data *d) { unsigned int cpu = cpumask_any_and(irq_data_get_affinity_mask(d), cpu_online_mask); @@ -106,7 +106,7 @@ static void plic_irq_enable(struct irq_data *d) plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1); } -static void plic_irq_disable(struct irq_data *d) +static void plic_irq_mask(struct irq_data *d) { plic_irq_toggle(cpu_possible_mask, d->hwirq, 0); } @@ -125,10 +125,8 @@ static int plic_set_affinity(struct irq_data *d, if (cpu >= nr_cpu_ids) return -EINVAL; - if (!irqd_irq_disabled(d)) { - plic_irq_toggle(cpu_possible_mask, d->hwirq, 0); - plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1); - } + plic_irq_toggle(cpu_possible_mask, d->hwirq, 0); + plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1); irq_data_update_effective_affinity(d, cpumask_of(cpu)); @@ -136,14 +134,18 @@ static int plic_set_affinity(struct irq_data *d, } #endif +static void plic_irq_eoi(struct irq_data *d) +{ + struct plic_handler *handler = this_cpu_ptr(&plic_handlers); + + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); +} + static struct irq_chip plic_chip = { .name = "SiFive PLIC", - /* - * There is no need to mask/unmask PLIC interrupts. They are "masked" - * by reading claim and "unmasked" when writing it back. - */ - .irq_enable = plic_irq_enable, - .irq_disable = plic_irq_disable, + .irq_mask = plic_irq_mask, + .irq_unmask = plic_irq_unmask, + .irq_eoi = plic_irq_eoi, #ifdef CONFIG_SMP .irq_set_affinity = plic_set_affinity, #endif @@ -152,7 +154,7 @@ static struct irq_chip plic_chip = { static int plic_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) { - irq_set_chip_and_handler(irq, &plic_chip, handle_simple_irq); + irq_set_chip_and_handler(irq, &plic_chip, handle_fasteoi_irq); irq_set_chip_data(irq, NULL); irq_set_noprobe(irq); return 0; @@ -188,7 +190,6 @@ static void plic_handle_irq(struct pt_regs *regs) hwirq); else generic_handle_irq(irq); - writel(hwirq, claim); } csr_set(sie, SIE_SEIE); } From fb629fa2587d0c150792d87e3053664bfc8dc78c Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Fri, 20 Sep 2019 15:02:11 +0200 Subject: [PATCH 007/572] ASoC: samsung: arndale: Add missing OF node dereferencing Ensure there is no OF node references kept when the driver is removed/unbound. Reviewed-by: Charles Keepax Signed-off-by: Sylwester Nawrocki Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20190920130218.32690-3-s.nawrocki@samsung.com Signed-off-by: Mark Brown --- sound/soc/samsung/arndale_rt5631.c | 34 ++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/sound/soc/samsung/arndale_rt5631.c b/sound/soc/samsung/arndale_rt5631.c index c213913eb984..fd8c6642fb0d 100644 --- a/sound/soc/samsung/arndale_rt5631.c +++ b/sound/soc/samsung/arndale_rt5631.c @@ -5,6 +5,7 @@ // Author: Claude #include +#include #include #include @@ -74,6 +75,17 @@ static struct snd_soc_card arndale_rt5631 = { .num_links = ARRAY_SIZE(arndale_rt5631_dai), }; +static void arndale_put_of_nodes(struct snd_soc_card *card) +{ + struct snd_soc_dai_link *dai_link; + int i; + + for_each_card_prelinks(card, i, dai_link) { + of_node_put(dai_link->cpus->of_node); + of_node_put(dai_link->codecs->of_node); + } +} + static int arndale_audio_probe(struct platform_device *pdev) { int n, ret; @@ -103,18 +115,31 @@ static int arndale_audio_probe(struct platform_device *pdev) if (!arndale_rt5631_dai[0].codecs->of_node) { dev_err(&pdev->dev, "Property 'samsung,audio-codec' missing or invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_of_nodes; } } ret = devm_snd_soc_register_card(card->dev, card); + if (ret) { + dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", ret); + goto err_put_of_nodes; + } + return 0; - if (ret) - dev_err(&pdev->dev, "snd_soc_register_card() failed:%d\n", ret); - +err_put_of_nodes: + arndale_put_of_nodes(card); return ret; } +static int arndale_audio_remove(struct platform_device *pdev) +{ + struct snd_soc_card *card = platform_get_drvdata(pdev); + + arndale_put_of_nodes(card); + return 0; +} + static const struct of_device_id samsung_arndale_rt5631_of_match[] __maybe_unused = { { .compatible = "samsung,arndale-rt5631", }, { .compatible = "samsung,arndale-alc5631", }, @@ -129,6 +154,7 @@ static struct platform_driver arndale_audio_driver = { .of_match_table = of_match_ptr(samsung_arndale_rt5631_of_match), }, .probe = arndale_audio_probe, + .remove = arndale_audio_remove, }; module_platform_driver(arndale_audio_driver); From ca2347190adb5e4eece73a2b16e96e651c46246b Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Fri, 20 Sep 2019 15:02:10 +0200 Subject: [PATCH 008/572] ASoC: wm8994: Do not register inapplicable controls for WM1811 In case of WM1811 device there are currently being registered controls referring to registers not existing on that device. It has been noticed when getting values of "AIF1ADC2 Volume", "AIF1DAC2 Volume" controls was failing during ALSA state restoring at boot time: "amixer: Mixer hw:0 load error: Device or resource busy" Reading some registers through I2C was failing with EBUSY error and indeed these registers were not available according to the datasheet. To fix this controls not available on WM1811 are moved to a separate array and registered only for WM8994 and WM8958. There are some further differences between WM8994 and WM1811, e.g. registers 603h, 604h, 605h, which are not covered in this patch. Acked-by: Charles Keepax Acked-by: Krzysztof Kozlowski Signed-off-by: Sylwester Nawrocki Link: https://lore.kernel.org/r/20190920130218.32690-2-s.nawrocki@samsung.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm8994.c | 43 +++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index c3d06e8bc54f..d5fb7f5dd551 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -533,13 +533,10 @@ static SOC_ENUM_SINGLE_DECL(dac_osr, static SOC_ENUM_SINGLE_DECL(adc_osr, WM8994_OVERSAMPLING, 1, osr_text); -static const struct snd_kcontrol_new wm8994_snd_controls[] = { +static const struct snd_kcontrol_new wm8994_common_snd_controls[] = { SOC_DOUBLE_R_TLV("AIF1ADC1 Volume", WM8994_AIF1_ADC1_LEFT_VOLUME, WM8994_AIF1_ADC1_RIGHT_VOLUME, 1, 119, 0, digital_tlv), -SOC_DOUBLE_R_TLV("AIF1ADC2 Volume", WM8994_AIF1_ADC2_LEFT_VOLUME, - WM8994_AIF1_ADC2_RIGHT_VOLUME, - 1, 119, 0, digital_tlv), SOC_DOUBLE_R_TLV("AIF2ADC Volume", WM8994_AIF2_ADC_LEFT_VOLUME, WM8994_AIF2_ADC_RIGHT_VOLUME, 1, 119, 0, digital_tlv), @@ -556,8 +553,6 @@ SOC_ENUM("AIF2DACR Source", aif2dacr_src), SOC_DOUBLE_R_TLV("AIF1DAC1 Volume", WM8994_AIF1_DAC1_LEFT_VOLUME, WM8994_AIF1_DAC1_RIGHT_VOLUME, 1, 96, 0, digital_tlv), -SOC_DOUBLE_R_TLV("AIF1DAC2 Volume", WM8994_AIF1_DAC2_LEFT_VOLUME, - WM8994_AIF1_DAC2_RIGHT_VOLUME, 1, 96, 0, digital_tlv), SOC_DOUBLE_R_TLV("AIF2DAC Volume", WM8994_AIF2_DAC_LEFT_VOLUME, WM8994_AIF2_DAC_RIGHT_VOLUME, 1, 96, 0, digital_tlv), @@ -565,17 +560,12 @@ SOC_SINGLE_TLV("AIF1 Boost Volume", WM8994_AIF1_CONTROL_2, 10, 3, 0, aif_tlv), SOC_SINGLE_TLV("AIF2 Boost Volume", WM8994_AIF2_CONTROL_2, 10, 3, 0, aif_tlv), SOC_SINGLE("AIF1DAC1 EQ Switch", WM8994_AIF1_DAC1_EQ_GAINS_1, 0, 1, 0), -SOC_SINGLE("AIF1DAC2 EQ Switch", WM8994_AIF1_DAC2_EQ_GAINS_1, 0, 1, 0), SOC_SINGLE("AIF2 EQ Switch", WM8994_AIF2_EQ_GAINS_1, 0, 1, 0), WM8994_DRC_SWITCH("AIF1DAC1 DRC Switch", WM8994_AIF1_DRC1_1, 2), WM8994_DRC_SWITCH("AIF1ADC1L DRC Switch", WM8994_AIF1_DRC1_1, 1), WM8994_DRC_SWITCH("AIF1ADC1R DRC Switch", WM8994_AIF1_DRC1_1, 0), -WM8994_DRC_SWITCH("AIF1DAC2 DRC Switch", WM8994_AIF1_DRC2_1, 2), -WM8994_DRC_SWITCH("AIF1ADC2L DRC Switch", WM8994_AIF1_DRC2_1, 1), -WM8994_DRC_SWITCH("AIF1ADC2R DRC Switch", WM8994_AIF1_DRC2_1, 0), - WM8994_DRC_SWITCH("AIF2DAC DRC Switch", WM8994_AIF2_DRC_1, 2), WM8994_DRC_SWITCH("AIF2ADCL DRC Switch", WM8994_AIF2_DRC_1, 1), WM8994_DRC_SWITCH("AIF2ADCR DRC Switch", WM8994_AIF2_DRC_1, 0), @@ -594,9 +584,6 @@ SOC_SINGLE("Sidetone HPF Switch", WM8994_SIDETONE, 6, 1, 0), SOC_ENUM("AIF1ADC1 HPF Mode", aif1adc1_hpf), SOC_DOUBLE("AIF1ADC1 HPF Switch", WM8994_AIF1_ADC1_FILTERS, 12, 11, 1, 0), -SOC_ENUM("AIF1ADC2 HPF Mode", aif1adc2_hpf), -SOC_DOUBLE("AIF1ADC2 HPF Switch", WM8994_AIF1_ADC2_FILTERS, 12, 11, 1, 0), - SOC_ENUM("AIF2ADC HPF Mode", aif2adc_hpf), SOC_DOUBLE("AIF2ADC HPF Switch", WM8994_AIF2_ADC_FILTERS, 12, 11, 1, 0), @@ -637,6 +624,24 @@ SOC_SINGLE("AIF2DAC 3D Stereo Switch", WM8994_AIF2_DAC_FILTERS_2, 8, 1, 0), }; +/* Controls not available on WM1811 */ +static const struct snd_kcontrol_new wm8994_snd_controls[] = { +SOC_DOUBLE_R_TLV("AIF1ADC2 Volume", WM8994_AIF1_ADC2_LEFT_VOLUME, + WM8994_AIF1_ADC2_RIGHT_VOLUME, + 1, 119, 0, digital_tlv), +SOC_DOUBLE_R_TLV("AIF1DAC2 Volume", WM8994_AIF1_DAC2_LEFT_VOLUME, + WM8994_AIF1_DAC2_RIGHT_VOLUME, 1, 96, 0, digital_tlv), + +SOC_SINGLE("AIF1DAC2 EQ Switch", WM8994_AIF1_DAC2_EQ_GAINS_1, 0, 1, 0), + +WM8994_DRC_SWITCH("AIF1DAC2 DRC Switch", WM8994_AIF1_DRC2_1, 2), +WM8994_DRC_SWITCH("AIF1ADC2L DRC Switch", WM8994_AIF1_DRC2_1, 1), +WM8994_DRC_SWITCH("AIF1ADC2R DRC Switch", WM8994_AIF1_DRC2_1, 0), + +SOC_ENUM("AIF1ADC2 HPF Mode", aif1adc2_hpf), +SOC_DOUBLE("AIF1ADC2 HPF Switch", WM8994_AIF1_ADC2_FILTERS, 12, 11, 1, 0), +}; + static const struct snd_kcontrol_new wm8994_eq_controls[] = { SOC_SINGLE_TLV("AIF1DAC1 EQ1 Volume", WM8994_AIF1_DAC1_EQ_GAINS_1, 11, 31, 0, eq_tlv), @@ -4258,13 +4263,15 @@ static int wm8994_component_probe(struct snd_soc_component *component) wm8994_handle_pdata(wm8994); wm_hubs_add_analogue_controls(component); - snd_soc_add_component_controls(component, wm8994_snd_controls, - ARRAY_SIZE(wm8994_snd_controls)); + snd_soc_add_component_controls(component, wm8994_common_snd_controls, + ARRAY_SIZE(wm8994_common_snd_controls)); snd_soc_dapm_new_controls(dapm, wm8994_dapm_widgets, ARRAY_SIZE(wm8994_dapm_widgets)); switch (control->type) { case WM8994: + snd_soc_add_component_controls(component, wm8994_snd_controls, + ARRAY_SIZE(wm8994_snd_controls)); snd_soc_dapm_new_controls(dapm, wm8994_specific_dapm_widgets, ARRAY_SIZE(wm8994_specific_dapm_widgets)); if (control->revision < 4) { @@ -4284,8 +4291,10 @@ static int wm8994_component_probe(struct snd_soc_component *component) } break; case WM8958: + snd_soc_add_component_controls(component, wm8994_snd_controls, + ARRAY_SIZE(wm8994_snd_controls)); snd_soc_add_component_controls(component, wm8958_snd_controls, - ARRAY_SIZE(wm8958_snd_controls)); + ARRAY_SIZE(wm8958_snd_controls)); snd_soc_dapm_new_controls(dapm, wm8958_dapm_widgets, ARRAY_SIZE(wm8958_dapm_widgets)); if (control->revision < 1) { From 901e822b2e365dac4727e0ddffb444a2554b0a89 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 23 Sep 2019 17:22:57 +0300 Subject: [PATCH 009/572] ASoC: soc-component: fix a couple missing error assignments There were a couple places where the return value wasn't assigned so the error handling wouldn't trigger. Fixes: 5c0769af4caf ("ASoC: soc-dai: add snd_soc_dai_bespoke_trigger()") Fixes: 95aef3553384 ("ASoC: soc-dai: add snd_soc_dai_trigger()") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20190923142257.GB31251@mwanda Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index e163dde5eab1..a1b99ac57d9e 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1070,7 +1070,7 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd) return ret; } - snd_soc_dai_trigger(cpu_dai, substream, cmd); + ret = snd_soc_dai_trigger(cpu_dai, substream, cmd); if (ret < 0) return ret; @@ -1097,7 +1097,7 @@ static int soc_pcm_bespoke_trigger(struct snd_pcm_substream *substream, return ret; } - snd_soc_dai_bespoke_trigger(cpu_dai, substream, cmd); + ret = snd_soc_dai_bespoke_trigger(cpu_dai, substream, cmd); if (ret < 0) return ret; From 1d6db22ff7d67a17c571543c69c63b1d261249b0 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 22 Sep 2019 10:29:28 +0800 Subject: [PATCH 010/572] regulator: fixed: Prevent NULL pointer dereference when !CONFIG_OF Use of_device_get_match_data which has NULL test for match before dereference match->data. Add NULL test for drvtype so it still works for fixed_voltage_ops when !CONFIG_OF. Signed-off-by: Axel Lin Reviewed-by: Philippe Schenker Link: https://lore.kernel.org/r/20190922022928.28355-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/fixed.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c index d90a6fd8cbc7..f81533070058 100644 --- a/drivers/regulator/fixed.c +++ b/drivers/regulator/fixed.c @@ -144,8 +144,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct fixed_voltage_config *config; struct fixed_voltage_data *drvdata; - const struct fixed_dev_type *drvtype = - of_match_device(dev->driver->of_match_table, dev)->data; + const struct fixed_dev_type *drvtype = of_device_get_match_data(dev); struct regulator_config cfg = { }; enum gpiod_flags gflags; int ret; @@ -177,7 +176,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev) drvdata->desc.type = REGULATOR_VOLTAGE; drvdata->desc.owner = THIS_MODULE; - if (drvtype->has_enable_clock) { + if (drvtype && drvtype->has_enable_clock) { drvdata->desc.ops = &fixed_voltage_clkenabled_ops; drvdata->enable_clock = devm_clk_get(dev, NULL); From 58283636a5a03e64ad5d03bd282e3b66dcfa2c49 Mon Sep 17 00:00:00 2001 From: Philippe Schenker Date: Mon, 23 Sep 2019 08:18:49 +0000 Subject: [PATCH 011/572] dt-bindings: fixed-regulator: fix compatible enum Remove 'const:' in the compatible enum. This was breaking make dt_binding_check since it has more than one compatible string. Fixes: 9c86d003d620 ("dt-bindings: regulator: add regulator-fixed-clock binding") Signed-off-by: Philippe Schenker Link: https://lore.kernel.org/r/20190923081840.23391-1-philippe.schenker@toradex.com Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/fixed-regulator.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml index a78150c47aa2..f32416968197 100644 --- a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml @@ -30,8 +30,8 @@ if: properties: compatible: enum: - - const: regulator-fixed - - const: regulator-fixed-clock + - regulator-fixed + - regulator-fixed-clock regulator-name: true From a72865f057820ea9f57597915da4b651d65eb92f Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Tue, 17 Sep 2019 14:42:42 +0200 Subject: [PATCH 012/572] regulator: da9062: fix suspend_enable/disable preparation Currently the suspend reg_field maps to the pmic voltage selection bits and is used during suspend_enabe/disable() and during get_mode(). This seems to be wrong for both use cases. Use case one (suspend_enabe/disable): Those callbacks are used to mark a regulator device as enabled/disabled during suspend. Marking the regulator enabled during suspend is done by the LDOx_CONF/BUCKx_CONF bit within the LDOx_CONT/BUCKx_CONT registers. Setting this bit tells the DA9062 PMIC state machine to keep the regulator on in POWERDOWN mode and switch to suspend voltage. Use case two (get_mode): The get_mode callback is used to retrieve the active mode state. Since the regulator-setting-A is used for the active state and regulator-setting-B for the suspend state there is no need to check which regulator setting is active. Fixes: 4068e5182ada ("regulator: da9062: DA9062 regulator driver") Signed-off-by: Marco Felsch Reviewed-by: Adam Thomson Link: https://lore.kernel.org/r/20190917124246.11732-2-m.felsch@pengutronix.de Signed-off-by: Mark Brown --- drivers/regulator/da9062-regulator.c | 118 +++++++++++---------------- 1 file changed, 47 insertions(+), 71 deletions(-) diff --git a/drivers/regulator/da9062-regulator.c b/drivers/regulator/da9062-regulator.c index 56f3f72d7707..710e67081d53 100644 --- a/drivers/regulator/da9062-regulator.c +++ b/drivers/regulator/da9062-regulator.c @@ -136,7 +136,6 @@ static int da9062_buck_set_mode(struct regulator_dev *rdev, unsigned mode) static unsigned da9062_buck_get_mode(struct regulator_dev *rdev) { struct da9062_regulator *regl = rdev_get_drvdata(rdev); - struct regmap_field *field; unsigned int val, mode = 0; int ret; @@ -158,18 +157,7 @@ static unsigned da9062_buck_get_mode(struct regulator_dev *rdev) return REGULATOR_MODE_NORMAL; } - /* Detect current regulator state */ - ret = regmap_field_read(regl->suspend, &val); - if (ret < 0) - return 0; - - /* Read regulator mode from proper register, depending on state */ - if (val) - field = regl->suspend_sleep; - else - field = regl->sleep; - - ret = regmap_field_read(field, &val); + ret = regmap_field_read(regl->sleep, &val); if (ret < 0) return 0; @@ -208,21 +196,9 @@ static int da9062_ldo_set_mode(struct regulator_dev *rdev, unsigned mode) static unsigned da9062_ldo_get_mode(struct regulator_dev *rdev) { struct da9062_regulator *regl = rdev_get_drvdata(rdev); - struct regmap_field *field; int ret, val; - /* Detect current regulator state */ - ret = regmap_field_read(regl->suspend, &val); - if (ret < 0) - return 0; - - /* Read regulator mode from proper register, depending on state */ - if (val) - field = regl->suspend_sleep; - else - field = regl->sleep; - - ret = regmap_field_read(field, &val); + ret = regmap_field_read(regl->sleep, &val); if (ret < 0) return 0; @@ -408,10 +384,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = { __builtin_ffs((int)DA9062AA_BUCK1_MODE_MASK) - 1, sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_BUCK1_MODE_MASK)) - 1), - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VBUCK1_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_BUCK1_CONT, + __builtin_ffs((int)DA9062AA_BUCK1_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VBUCK1_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_BUCK1_CONF_MASK) - 1), }, { .desc.id = DA9061_ID_BUCK2, @@ -444,10 +420,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = { __builtin_ffs((int)DA9062AA_BUCK3_MODE_MASK) - 1, sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_BUCK3_MODE_MASK)) - 1), - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VBUCK3_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_BUCK3_CONT, + __builtin_ffs((int)DA9062AA_BUCK3_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VBUCK3_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_BUCK3_CONF_MASK) - 1), }, { .desc.id = DA9061_ID_BUCK3, @@ -480,10 +456,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = { __builtin_ffs((int)DA9062AA_BUCK4_MODE_MASK) - 1, sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_BUCK4_MODE_MASK)) - 1), - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VBUCK4_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_BUCK4_CONT, + __builtin_ffs((int)DA9062AA_BUCK4_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VBUCK4_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_BUCK4_CONF_MASK) - 1), }, { .desc.id = DA9061_ID_LDO1, @@ -509,10 +485,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = { sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_LDO1_SL_B_MASK)) - 1), .suspend_vsel_reg = DA9062AA_VLDO1_B, - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VLDO1_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_LDO1_CONT, + __builtin_ffs((int)DA9062AA_LDO1_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VLDO1_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_LDO1_CONF_MASK) - 1), .oc_event = REG_FIELD(DA9062AA_STATUS_D, __builtin_ffs((int)DA9062AA_LDO1_ILIM_MASK) - 1, sizeof(unsigned int) * 8 - @@ -542,10 +518,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = { sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_LDO2_SL_B_MASK)) - 1), .suspend_vsel_reg = DA9062AA_VLDO2_B, - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VLDO2_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_LDO2_CONT, + __builtin_ffs((int)DA9062AA_LDO2_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VLDO2_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_LDO2_CONF_MASK) - 1), .oc_event = REG_FIELD(DA9062AA_STATUS_D, __builtin_ffs((int)DA9062AA_LDO2_ILIM_MASK) - 1, sizeof(unsigned int) * 8 - @@ -575,10 +551,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = { sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_LDO3_SL_B_MASK)) - 1), .suspend_vsel_reg = DA9062AA_VLDO3_B, - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VLDO3_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_LDO3_CONT, + __builtin_ffs((int)DA9062AA_LDO3_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VLDO3_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_LDO3_CONF_MASK) - 1), .oc_event = REG_FIELD(DA9062AA_STATUS_D, __builtin_ffs((int)DA9062AA_LDO3_ILIM_MASK) - 1, sizeof(unsigned int) * 8 - @@ -608,10 +584,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = { sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_LDO4_SL_B_MASK)) - 1), .suspend_vsel_reg = DA9062AA_VLDO4_B, - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VLDO4_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_LDO4_CONT, + __builtin_ffs((int)DA9062AA_LDO4_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VLDO4_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_LDO4_CONF_MASK) - 1), .oc_event = REG_FIELD(DA9062AA_STATUS_D, __builtin_ffs((int)DA9062AA_LDO4_ILIM_MASK) - 1, sizeof(unsigned int) * 8 - @@ -652,10 +628,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = { __builtin_ffs((int)DA9062AA_BUCK1_MODE_MASK) - 1, sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_BUCK1_MODE_MASK)) - 1), - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VBUCK1_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_BUCK1_CONT, + __builtin_ffs((int)DA9062AA_BUCK1_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VBUCK1_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_BUCK1_CONF_MASK) - 1), }, { .desc.id = DA9062_ID_BUCK2, @@ -688,10 +664,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = { __builtin_ffs((int)DA9062AA_BUCK2_MODE_MASK) - 1, sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_BUCK2_MODE_MASK)) - 1), - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VBUCK2_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_BUCK2_CONT, + __builtin_ffs((int)DA9062AA_BUCK2_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VBUCK2_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_BUCK2_CONF_MASK) - 1), }, { .desc.id = DA9062_ID_BUCK3, @@ -724,10 +700,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = { __builtin_ffs((int)DA9062AA_BUCK3_MODE_MASK) - 1, sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_BUCK3_MODE_MASK)) - 1), - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VBUCK3_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_BUCK3_CONT, + __builtin_ffs((int)DA9062AA_BUCK3_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VBUCK3_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_BUCK3_CONF_MASK) - 1), }, { .desc.id = DA9062_ID_BUCK4, @@ -760,10 +736,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = { __builtin_ffs((int)DA9062AA_BUCK4_MODE_MASK) - 1, sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_BUCK4_MODE_MASK)) - 1), - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VBUCK4_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_BUCK4_CONT, + __builtin_ffs((int)DA9062AA_BUCK4_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VBUCK4_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_BUCK4_CONF_MASK) - 1), }, { .desc.id = DA9062_ID_LDO1, @@ -789,10 +765,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = { sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_LDO1_SL_B_MASK)) - 1), .suspend_vsel_reg = DA9062AA_VLDO1_B, - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VLDO1_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_LDO1_CONT, + __builtin_ffs((int)DA9062AA_LDO1_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VLDO1_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_LDO1_CONF_MASK) - 1), .oc_event = REG_FIELD(DA9062AA_STATUS_D, __builtin_ffs((int)DA9062AA_LDO1_ILIM_MASK) - 1, sizeof(unsigned int) * 8 - @@ -822,10 +798,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = { sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_LDO2_SL_B_MASK)) - 1), .suspend_vsel_reg = DA9062AA_VLDO2_B, - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VLDO2_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_LDO2_CONT, + __builtin_ffs((int)DA9062AA_LDO2_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VLDO2_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_LDO2_CONF_MASK) - 1), .oc_event = REG_FIELD(DA9062AA_STATUS_D, __builtin_ffs((int)DA9062AA_LDO2_ILIM_MASK) - 1, sizeof(unsigned int) * 8 - @@ -855,10 +831,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = { sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_LDO3_SL_B_MASK)) - 1), .suspend_vsel_reg = DA9062AA_VLDO3_B, - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VLDO3_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_LDO3_CONT, + __builtin_ffs((int)DA9062AA_LDO3_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VLDO3_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_LDO3_CONF_MASK) - 1), .oc_event = REG_FIELD(DA9062AA_STATUS_D, __builtin_ffs((int)DA9062AA_LDO3_ILIM_MASK) - 1, sizeof(unsigned int) * 8 - @@ -888,10 +864,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = { sizeof(unsigned int) * 8 - __builtin_clz((DA9062AA_LDO4_SL_B_MASK)) - 1), .suspend_vsel_reg = DA9062AA_VLDO4_B, - .suspend = REG_FIELD(DA9062AA_DVC_1, - __builtin_ffs((int)DA9062AA_VLDO4_SEL_MASK) - 1, + .suspend = REG_FIELD(DA9062AA_LDO4_CONT, + __builtin_ffs((int)DA9062AA_LDO4_CONF_MASK) - 1, sizeof(unsigned int) * 8 - - __builtin_clz((DA9062AA_VLDO4_SEL_MASK)) - 1), + __builtin_clz(DA9062AA_LDO4_CONF_MASK) - 1), .oc_event = REG_FIELD(DA9062AA_STATUS_D, __builtin_ffs((int)DA9062AA_LDO4_ILIM_MASK) - 1, sizeof(unsigned int) * 8 - From 82a9ac7130cf51c2640800fb0ef19d3a05cb8fff Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Wed, 7 Aug 2019 16:49:47 +0200 Subject: [PATCH 013/572] scsi: core: fix missing .cleanup_rq for SCSI hosts without request batching This was missing from scsi_mq_ops_no_commit of linux-next commit 8930a6c20791 ("scsi: core: add support for request batching") from Martin's scsi/5.4/scsi-queue or James' scsi/misc. See also linux-next commit b7e9e1fb7a92 ("scsi: implement .cleanup_rq callback") from block/for-next. Signed-off-by: Steffen Maier Fixes: 8930a6c20791 ("scsi: core: add support for request batching") Cc: Paolo Bonzini Cc: Ming Lei Reviewed-by: Paolo Bonzini Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index dc210b9d4896..12db0c13a927 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1834,6 +1834,7 @@ static const struct blk_mq_ops scsi_mq_ops_no_commit = { .init_request = scsi_mq_init_request, .exit_request = scsi_mq_exit_request, .initialize_rq_fn = scsi_initialize_rq, + .cleanup_rq = scsi_cleanup_rq, .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, }; From 6b6fa7a5c86e1269d9f0c9a5b902072351317387 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Wed, 7 Aug 2019 16:49:48 +0200 Subject: [PATCH 014/572] scsi: core: fix dh and multipathing for SCSI hosts without request batching This was missing from scsi_device_from_queue() due to the introduction of another new scsi_mq_ops_no_commit of linux-next commit 8930a6c20791 ("scsi: core: add support for request batching") from Martin's scsi/5.4/scsi-queue or James' scsi/misc. Only devicehandler code seems to call scsi_device_from_queue(): *** drivers/scsi/scsi_dh.c: scsi_dh_activate[255] sdev = scsi_device_from_queue(q); scsi_dh_set_params[302] sdev = scsi_device_from_queue(q); scsi_dh_attach[325] sdev = scsi_device_from_queue(q); scsi_dh_attached_handler_name[363] sdev = scsi_device_from_queue(q); Fixes multipath tools follow-on errors: $ multipath -v6 ... libdevmapper: ioctl/libdm-iface.c(1887): device-mapper: reload ioctl on mpatha failed: No such device ... mpatha: failed to load map, error 19 ... showing also as kernel messages: device-mapper: table: 252:0: multipath: error attaching hardware handler device-mapper: ioctl: error adding target to table Signed-off-by: Steffen Maier Fixes: 8930a6c20791 ("scsi: core: add support for request batching") Cc: Paolo Bonzini Reviewed-by: Paolo Bonzini Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 12db0c13a927..5447738906ac 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1922,7 +1922,8 @@ struct scsi_device *scsi_device_from_queue(struct request_queue *q) { struct scsi_device *sdev = NULL; - if (q->mq_ops == &scsi_mq_ops) + if (q->mq_ops == &scsi_mq_ops_no_commit || + q->mq_ops == &scsi_mq_ops) sdev = q->queuedata; if (!sdev || !get_device(&sdev->sdev_gendev)) sdev = NULL; From 752c938a5c14b8cbf0ed3ffbfa637fb166255c3f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Sep 2019 14:06:24 +0300 Subject: [PATCH 015/572] ASoC: topology: Fix a signedness bug in soc_tplg_dapm_widget_create() The "template.id" variable is an enum and in this context GCC will treat it as an unsigned int so it can never be less than zero. Fixes: 8a9782346dcc ("ASoC: topology: Add topology core") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20190925110624.GR3264@mwanda Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index b8690715abb5..c25939c5611b 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1588,7 +1588,7 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg, /* map user to kernel widget ID */ template.id = get_widget_id(le32_to_cpu(w->id)); - if (template.id < 0) + if ((int)template.id < 0) return template.id; /* strings are allocated here, but used and freed by the widget */ From 55d554f5d14071f7c2c5dbd88d0a2eb695c97d16 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 26 Sep 2019 19:13:44 -0600 Subject: [PATCH 016/572] tools: bpf: Use !building_out_of_srctree to determine srctree make TARGETS=bpf kselftest fails with: Makefile:127: tools/build/Makefile.include: No such file or directory When the bpf tool make is invoked from tools Makefile, srctree is cleared and the current logic check for srctree equals to empty string to determine srctree location from CURDIR. When the build in invoked from selftests/bpf Makefile, the srctree is set to "." and the same logic used for srctree equals to empty is needed to determine srctree. Check building_out_of_srctree undefined as the condition for both cases to fix "make TARGETS=bpf kselftest" build failure. Signed-off-by: Shuah Khan Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20190927011344.4695-1-skhan@linuxfoundation.org --- tools/bpf/Makefile | 6 +++++- tools/lib/bpf/Makefile | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index fbf5e4a0cb9c..5d1995fd369c 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -12,7 +12,11 @@ INSTALL ?= install CFLAGS += -Wall -O2 CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include -ifeq ($(srctree),) +# This will work when bpf is built in tools env. where srctree +# isn't set and when invoked from selftests build, where srctree +# is set to ".". building_out_of_srctree is undefined for in srctree +# builds +ifndef building_out_of_srctree srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index c6f94cffe06e..20772663d3e1 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -8,7 +8,11 @@ LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION))) MAKEFLAGS += --no-print-directory -ifeq ($(srctree),) +# This will work when bpf is built in tools env. where srctree +# isn't set and when invoked from selftests build, where srctree +# is a ".". building_out_of_srctree is undefined for in srctree +# builds +ifndef building_out_of_srctree srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) From 2511366797fa6ab4a404b4b000ef7cd262aaafe8 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Mon, 9 Sep 2019 20:42:35 +0200 Subject: [PATCH 017/572] arm64: dts: allwinner: a64: pine64-plus: Add PHY regulator delay Depending on kernel and bootloader configuration, it's possible that Realtek ethernet PHY isn't powered on properly. According to the datasheet, it needs 30ms to power up and then some more time before it can be used. Fix that by adding 100ms ramp delay to regulator responsible for powering PHY. Fixes: 94dcfdc77fc5 ("arm64: allwinner: pine64-plus: Enable dwmac-sun8i") Suggested-by: Ondrej Jirman Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard --- arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts index 24f1aac366d6..d5b6e8159a33 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts @@ -63,3 +63,12 @@ reg = <1>; }; }; + +®_dc1sw { + /* + * Ethernet PHY needs 30ms to properly power up and some more + * to initialize. 100ms should be plenty of time to finish + * whole process. + */ + regulator-enable-ramp-delay = <100000>; +}; From ed3e9406bcbc32f84dc4aa4cb4767852e5ab086c Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Tue, 6 Aug 2019 07:01:35 -0700 Subject: [PATCH 018/572] arm64: dts: allwinner: a64: Drop PMU node Looks like PMU in A64 is broken, it generates no interrupts at all and as result 'perf top' shows no events. Tested on Pine64-LTS. Fixes: 34a97fcc71c2 ("arm64: dts: allwinner: a64: Add PMU node") Cc: Harald Geyer Cc: Jared D. McNeill Signed-off-by: Vasily Khoruzhick Reviewed-by: Emmanuel Vadot Signed-off-by: Maxime Ripard --- arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 69128a6dfc46..d0028934e11c 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -142,15 +142,6 @@ clock-output-names = "ext-osc32k"; }; - pmu { - compatible = "arm,cortex-a53-pmu"; - interrupts = , - , - , - ; - interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; - }; - psci { compatible = "arm,psci-0.2"; method = "smc"; From ccdf3aaa27ded6db9a93eed3ca7468bb2353b8fe Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sun, 29 Sep 2019 10:52:59 +0200 Subject: [PATCH 019/572] arm64: dts: allwinner: a64: sopine-baseboard: Add PHY regulator delay It turns out that sopine-baseboard needs same fix as pine64-plus for ethernet PHY. Here too Realtek ethernet PHY chip needs additional power on delay to properly initialize. Datasheet mentions that chip needs 30 ms to be properly powered on and that it needs some more time to be initialized. Fix that by adding 100ms ramp delay to regulator responsible for powering PHY. Note that issue was found out and fix tested on pine64-lts, but it's basically the same as sopine-baseboard, only layout and connectors differ. Fixes: bdfe4cebea11 ("arm64: allwinner: a64: add Ethernet PHY regulator for several boards") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard --- .../boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index e6fb9683f213..25099202c52c 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -159,6 +159,12 @@ }; ®_dc1sw { + /* + * Ethernet PHY needs 30ms to properly power up and some more + * to initialize. 100ms should be plenty of time to finish + * whole process. + */ + regulator-enable-ramp-delay = <100000>; regulator-name = "vcc-phy"; }; From 1bd63524593b964934a33afd442df16b8f90e2b5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 30 Sep 2019 14:02:03 -0700 Subject: [PATCH 020/572] libbpf: handle symbol versioning properly for libbpf.a bcc uses libbpf repo as a submodule. It brings in libbpf source code and builds everything together to produce shared libraries. With latest libbpf, I got the following errors: /bin/ld: libbcc_bpf.so.0.10.0: version node not found for symbol xsk_umem__create@LIBBPF_0.0.2 /bin/ld: failed to set dynamic section sizes: Bad value collect2: error: ld returned 1 exit status make[2]: *** [src/cc/libbcc_bpf.so.0.10.0] Error 1 In xsk.c, we have asm(".symver xsk_umem__create_v0_0_2, xsk_umem__create@LIBBPF_0.0.2"); asm(".symver xsk_umem__create_v0_0_4, xsk_umem__create@@LIBBPF_0.0.4"); The linker thinks the built is for LIBBPF but cannot find proper version LIBBPF_0.0.2/4, so emit errors. I also confirmed that using libbpf.a to produce a shared library also has issues: -bash-4.4$ cat t.c extern void *xsk_umem__create; void * test() { return xsk_umem__create; } -bash-4.4$ gcc -c -fPIC t.c -bash-4.4$ gcc -shared t.o libbpf.a -o t.so /bin/ld: t.so: version node not found for symbol xsk_umem__create@LIBBPF_0.0.2 /bin/ld: failed to set dynamic section sizes: Bad value collect2: error: ld returned 1 exit status -bash-4.4$ Symbol versioning does happens in commonly used libraries, e.g., elfutils and glibc. For static libraries, for a versioned symbol, the old definitions will be ignored, and the symbol will be an alias to the latest definition. For example, glibc sched_setaffinity is versioned. -bash-4.4$ readelf -s /usr/lib64/libc.so.6 | grep sched_setaffinity 756: 000000000013d3d0 13 FUNC GLOBAL DEFAULT 13 sched_setaffinity@GLIBC_2.3.3 757: 00000000000e2e70 455 FUNC GLOBAL DEFAULT 13 sched_setaffinity@@GLIBC_2.3.4 1800: 0000000000000000 0 FILE LOCAL DEFAULT ABS sched_setaffinity.c 4228: 00000000000e2e70 455 FUNC LOCAL DEFAULT 13 __sched_setaffinity_new 4648: 000000000013d3d0 13 FUNC LOCAL DEFAULT 13 __sched_setaffinity_old 7338: 000000000013d3d0 13 FUNC GLOBAL DEFAULT 13 sched_setaffinity@GLIBC_2 7380: 00000000000e2e70 455 FUNC GLOBAL DEFAULT 13 sched_setaffinity@@GLIBC_ -bash-4.4$ For static library, the definition of sched_setaffinity aliases to the new definition. -bash-4.4$ readelf -s /usr/lib64/libc.a | grep sched_setaffinity File: /usr/lib64/libc.a(sched_setaffinity.o) 8: 0000000000000000 455 FUNC GLOBAL DEFAULT 1 __sched_setaffinity_new 12: 0000000000000000 455 FUNC WEAK DEFAULT 1 sched_setaffinity For both elfutils and glibc, additional macros are used to control different handling of symbol versioning w.r.t static and shared libraries. For elfutils, the macro is SYMBOL_VERSIONING (https://sourceware.org/git/?p=elfutils.git;a=blob;f=lib/eu-config.h). For glibc, the macro is SHARED (https://sourceware.org/git/?p=glibc.git;a=blob;f=include/shlib-compat.h;hb=refs/heads/master) This patch used SHARED as the macro name. After this patch, the libbpf.a has -bash-4.4$ readelf -s libbpf.a | grep xsk_umem__create 372: 0000000000017145 1190 FUNC GLOBAL DEFAULT 1 xsk_umem__create_v0_0_4 405: 0000000000017145 1190 FUNC GLOBAL DEFAULT 1 xsk_umem__create 499: 00000000000175eb 103 FUNC GLOBAL DEFAULT 1 xsk_umem__create_v0_0_2 -bash-4.4$ No versioned symbols for xsk_umem__create. The libbpf.a can be used to build a shared library succesfully. -bash-4.4$ cat t.c extern void *xsk_umem__create; void * test() { return xsk_umem__create; } -bash-4.4$ gcc -c -fPIC t.c -bash-4.4$ gcc -shared t.o libbpf.a -o t.so -bash-4.4$ Fixes: 10d30e301732 ("libbpf: add flags to umem config") Cc: Kevin Laatz Cc: Arnaldo Carvalho de Melo Cc: Andrii Nakryiko Acked-by: Andrii Nakryiko Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Makefile | 27 ++++++++++++++++++--------- tools/lib/bpf/libbpf_internal.h | 16 ++++++++++++++++ tools/lib/bpf/xsk.c | 4 ++-- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 20772663d3e1..56ce6292071b 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -114,6 +114,9 @@ override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +# flags specific for shared library +SHLIB_FLAGS := -DSHARED + ifeq ($(VERBOSE),1) Q = else @@ -130,14 +133,17 @@ all: export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include -BPF_IN := $(OUTPUT)libbpf-in.o +SHARED_OBJDIR := $(OUTPUT)sharedobjs/ +STATIC_OBJDIR := $(OUTPUT)staticobjs/ +BPF_IN_SHARED := $(SHARED_OBJDIR)libbpf-in.o +BPF_IN_STATIC := $(STATIC_OBJDIR)libbpf-in.o VERSION_SCRIPT := libbpf.map LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE)) -GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \ +GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}' | \ sort -u | wc -l) @@ -159,7 +165,7 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN): force elfdep bpfdep +$(BPF_IN_SHARED): force elfdep bpfdep @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -175,17 +181,20 @@ $(BPF_IN): force elfdep bpfdep @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \ (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true - $(Q)$(MAKE) $(build)=libbpf + $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" + +$(BPF_IN_STATIC): force elfdep bpfdep + $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) -$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN) +$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION) -$(OUTPUT)libbpf.a: $(BPF_IN) +$(OUTPUT)libbpf.a: $(BPF_IN_STATIC) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ $(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a @@ -201,7 +210,7 @@ check: check_abi check_abi: $(OUTPUT)libbpf.so @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \ - echo "Warning: Num of global symbols in $(BPF_IN)" \ + echo "Warning: Num of global symbols in $(BPF_IN_SHARED)" \ "($(GLOBAL_SYM_COUNT)) does NOT match with num of" \ "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \ "Please make sure all LIBBPF_API symbols are" \ @@ -259,9 +268,9 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ + $(call QUIET_CLEAN, libbpf) $(RM) -rf $(TARGETS) $(CXX_TEST_TARGET) \ *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \ - *.pc LIBBPF-CFLAGS + *.pc LIBBPF-CFLAGS $(SHARED_OBJDIR) $(STATIC_OBJDIR) $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 2e83a34f8c79..98216a69c32f 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -34,6 +34,22 @@ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD)) #endif +/* Symbol versioning is different between static and shared library. + * Properly versioned symbols are needed for shared library, but + * only the symbol of the new version is needed for static library. + */ +#ifdef SHARED +# define COMPAT_VERSION(internal_name, api_name, version) \ + asm(".symver " #internal_name "," #api_name "@" #version); +# define DEFAULT_VERSION(internal_name, api_name, version) \ + asm(".symver " #internal_name "," #api_name "@@" #version); +#else +# define COMPAT_VERSION(internal_name, api_name, version) +# define DEFAULT_VERSION(internal_name, api_name, version) \ + extern typeof(internal_name) api_name \ + __attribute__((alias(#internal_name))); +#endif + extern void libbpf_print(enum libbpf_print_level level, const char *format, ...) __attribute__((format(printf, 2, 3))); diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 24fa313524fb..a902838f9fcc 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -261,8 +261,8 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, &config); } -asm(".symver xsk_umem__create_v0_0_2, xsk_umem__create@LIBBPF_0.0.2"); -asm(".symver xsk_umem__create_v0_0_4, xsk_umem__create@@LIBBPF_0.0.4"); +COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) +DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) static int xsk_load_xdp_prog(struct xsk_socket *xsk) { From 965f6603e3335a953f4f876792074cb36bf65f7f Mon Sep 17 00:00:00 2001 From: Rayagonda Kokatanur Date: Mon, 9 Sep 2019 14:05:27 +0530 Subject: [PATCH 021/572] arm64: dts: Fix gpio to pinmux mapping There are total of 151 non-secure gpio (0-150) and four pins of pinmux (91, 92, 93 and 94) are not mapped to any gpio pin, hence update same in DT. Fixes: 8aa428cc1e2e ("arm64: dts: Add pinctrl DT nodes for Stingray SOC") Signed-off-by: Rayagonda Kokatanur Reviewed-by: Ray Jui Signed-off-by: Florian Fainelli --- arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi | 5 +++-- arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi index 8a3a770e8f2c..56789ccf9454 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi @@ -42,13 +42,14 @@ pinmux: pinmux@14029c { compatible = "pinctrl-single"; - reg = <0x0014029c 0x250>; + reg = <0x0014029c 0x26c>; #address-cells = <1>; #size-cells = <1>; pinctrl-single,register-width = <32>; pinctrl-single,function-mask = <0xf>; pinctrl-single,gpio-range = < - &range 0 154 MODE_GPIO + &range 0 91 MODE_GPIO + &range 95 60 MODE_GPIO >; range: gpio-range { #pinctrl-single,gpio-range-cells = <3>; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index 71e2e34400d4..0098dfdef96c 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -464,8 +464,7 @@ <&pinmux 108 16 27>, <&pinmux 135 77 6>, <&pinmux 141 67 4>, - <&pinmux 145 149 6>, - <&pinmux 151 91 4>; + <&pinmux 145 149 6>; }; i2c1: i2c@e0000 { From 21e3d6c81179bbdfa279efc8de456c34b814cfd2 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 3 Sep 2019 12:18:39 +0200 Subject: [PATCH 022/572] scsi: sd: Ignore a failure to sync cache due to lack of authorization I've got a report about a UAS drive enclosure reporting back Sense: Logical unit access not authorized if the drive it holds is password protected. While the drive is obviously unusable in that state as a mass storage device, it still exists as a sd device and when the system is asked to perform a suspend of the drive, it will be sent a SYNCHRONIZE CACHE. If that fails due to password protection, the error must be ignored. Cc: Link: https://lore.kernel.org/r/20190903101840.16483-1-oneukum@suse.com Signed-off-by: Oliver Neukum Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 91af598f2f53..0f96eb0ddbfa 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1655,7 +1655,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) /* we need to evaluate the error return */ if (scsi_sense_valid(sshdr) && (sshdr->asc == 0x3a || /* medium not present */ - sshdr->asc == 0x20)) /* invalid command */ + sshdr->asc == 0x20 || /* invalid command */ + (sshdr->asc == 0x74 && sshdr->ascq == 0x71))) /* drive is password locked */ /* this is no error here */ return 0; From 9bc6157f5fd0e898c94f3018d088a3419bde0d8f Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 27 Sep 2019 09:30:31 +0200 Subject: [PATCH 023/572] scsi: qla2xxx: Remove WARN_ON_ONCE in qla2x00_status_cont_entry() Commit 88263208dd23 ("scsi: qla2xxx: Complain if sp->done() is not called from the completion path") introduced the WARN_ON_ONCE in qla2x00_status_cont_entry(). The assumption was that there is only one status continuations element. According to the firmware documentation it is possible that multiple status continuations are emitted by the firmware. Fixes: 88263208dd23 ("scsi: qla2xxx: Complain if sp->done() is not called from the completion path") Link: https://lore.kernel.org/r/20190927073031.62296-1-dwagner@suse.de Cc: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Daniel Wagner Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_isr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 4c26630c1c3e..009fd5a33fcd 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2837,8 +2837,6 @@ qla2x00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt) if (sense_len == 0) { rsp->status_srb = NULL; sp->done(sp, cp->result); - } else { - WARN_ON_ONCE(true); } } From 6db7bfb431220d78e34d2d0afdb7c12683323588 Mon Sep 17 00:00:00 2001 From: Liu Xiang Date: Mon, 16 Sep 2019 21:53:00 +0800 Subject: [PATCH 024/572] iommu/arm-smmu: Free context bitmap in the err path of arm_smmu_init_domain_context When alloc_io_pgtable_ops is failed, context bitmap which is just allocated by __arm_smmu_alloc_bitmap should be freed to release the resource. Signed-off-by: Liu Xiang Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index b18aac4c105e..7c503a6bc585 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -812,6 +812,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, return 0; out_clear_smmu: + __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); smmu_domain->smmu = NULL; out_unlock: mutex_unlock(&smmu_domain->init_mutex); From 52f325f4eb321ea2e8a0779f49a3866be58bc694 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 30 Sep 2019 15:11:00 +0100 Subject: [PATCH 025/572] iommu/io-pgtable-arm: Correct Mali attributes Whilst Midgard's MEMATTR follows a similar principle to the VMSA MAIR, the actual attribute values differ, so although it currently appears to work to some degree, we probably shouldn't be using our standard stage 1 MAIR for that. Instead, generate a reasonable MEMATTR with attribute values borrowed from the kbase driver; at this point we'll be overriding or ignoring pretty much all of the LPAE config, so just implement these Mali details in a dedicated allocator instead of pretending to subclass the standard VMSA format. Fixes: d08d42de6432 ("iommu: io-pgtable: Add ARM Mali midgard MMU page table format") Tested-by: Neil Armstrong Reviewed-by: Steven Price Reviewed-by: Rob Herring Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 53 +++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 4c91359057c5..90cb37af761c 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -166,6 +166,9 @@ #define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2) #define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4) +#define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL +#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL + /* IOPTE accessors */ #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d)) @@ -1015,27 +1018,51 @@ arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) static struct io_pgtable * arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { - struct io_pgtable *iop; + struct arm_lpae_io_pgtable *data; + + /* No quirks for Mali (hopefully) */ + if (cfg->quirks) + return NULL; if (cfg->ias != 48 || cfg->oas > 40) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); - iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie); - if (iop) { - u64 mair, ttbr; - /* Copy values as union fields overlap */ - mair = cfg->arm_lpae_s1_cfg.mair[0]; - ttbr = cfg->arm_lpae_s1_cfg.ttbr[0]; + data = arm_lpae_alloc_pgtable(cfg); + if (!data) + return NULL; - cfg->arm_mali_lpae_cfg.memattr = mair; - cfg->arm_mali_lpae_cfg.transtab = ttbr | - ARM_MALI_LPAE_TTBR_READ_INNER | - ARM_MALI_LPAE_TTBR_ADRMODE_TABLE; - } + /* + * MEMATTR: Mali has no actual notion of a non-cacheable type, so the + * best we can do is mimic the out-of-tree driver and hope that the + * "implementation-defined caching policy" is good enough. Similarly, + * we'll use it for the sake of a valid attribute for our 'device' + * index, although callers should never request that in practice. + */ + cfg->arm_mali_lpae_cfg.memattr = + (ARM_MALI_LPAE_MEMATTR_IMP_DEF + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) | + (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | + (ARM_MALI_LPAE_MEMATTR_IMP_DEF + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); - return iop; + data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg); + if (!data->pgd) + goto out_free_data; + + /* Ensure the empty pgd is visible before TRANSTAB can be written */ + wmb(); + + cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) | + ARM_MALI_LPAE_TTBR_READ_INNER | + ARM_MALI_LPAE_TTBR_ADRMODE_TABLE; + return &data->iop; + +out_free_data: + kfree(data); + return NULL; } struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { From 1be08f458d1602275b02f5357ef069957058f3fd Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 30 Sep 2019 15:11:01 +0100 Subject: [PATCH 026/572] iommu/io-pgtable-arm: Support all Mali configurations In principle, Midgard GPUs supporting smaller VA sizes should only require 3-level pagetables, since level 0 only resolves bits 48:40 of the address. However, the kbase driver does not appear to have any notion of a variable start level, and empirically T720 and T820 rapidly blow up with translation faults unless given a full 4-level table, despite only supporting a 33-bit VA size. The 'real' IAS value is still valuable in terms of validating addresses on map/unmap, so tweak the allocator to allow smaller values while still forcing the resultant tables to the full 4 levels. As far as I can test, this should make all known Midgard variants happy. Fixes: d08d42de6432 ("iommu: io-pgtable: Add ARM Mali midgard MMU page table format") Tested-by: Neil Armstrong Reviewed-by: Steven Price Reviewed-by: Rob Herring Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 90cb37af761c..ca51036aa53c 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -1024,7 +1024,7 @@ arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) if (cfg->quirks) return NULL; - if (cfg->ias != 48 || cfg->oas > 40) + if (cfg->ias > 48 || cfg->oas > 40) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); @@ -1033,6 +1033,11 @@ arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) if (!data) return NULL; + /* Mali seems to need a full 4-level table regardless of IAS */ + if (data->levels < ARM_LPAE_MAX_LEVELS) { + data->levels = ARM_LPAE_MAX_LEVELS; + data->pgd_size = sizeof(arm_lpae_iopte); + } /* * MEMATTR: Mali has no actual notion of a non-cacheable type, so the * best we can do is mimic the out-of-tree driver and hope that the From f64db548799e0330897c3203680c2ee795ade518 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 29 Sep 2019 17:58:48 +0800 Subject: [PATCH 027/572] regulator: ti-abb: Fix timeout in ti_abb_wait_txdone/ti_abb_clear_all_txdone ti_abb_wait_txdone() may return -ETIMEDOUT when ti_abb_check_txdone() returns true in the latest iteration of the while loop because the timeout value is abb->settling_time + 1. Similarly, ti_abb_clear_all_txdone() may return -ETIMEDOUT when ti_abb_check_txdone() returns false in the latest iteration of the while loop. Fix it. Signed-off-by: Axel Lin Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20190929095848.21960-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/ti-abb-regulator.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index cced1ffb896c..89b9314d64c9 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -173,19 +173,14 @@ static int ti_abb_wait_txdone(struct device *dev, struct ti_abb *abb) while (timeout++ <= abb->settling_time) { status = ti_abb_check_txdone(abb); if (status) - break; + return 0; udelay(1); } - if (timeout > abb->settling_time) { - dev_warn_ratelimited(dev, - "%s:TRANXDONE timeout(%duS) int=0x%08x\n", - __func__, timeout, readl(abb->int_base)); - return -ETIMEDOUT; - } - - return 0; + dev_warn_ratelimited(dev, "%s:TRANXDONE timeout(%duS) int=0x%08x\n", + __func__, timeout, readl(abb->int_base)); + return -ETIMEDOUT; } /** @@ -205,19 +200,14 @@ static int ti_abb_clear_all_txdone(struct device *dev, const struct ti_abb *abb) status = ti_abb_check_txdone(abb); if (!status) - break; + return 0; udelay(1); } - if (timeout > abb->settling_time) { - dev_warn_ratelimited(dev, - "%s:TRANXDONE timeout(%duS) int=0x%08x\n", - __func__, timeout, readl(abb->int_base)); - return -ETIMEDOUT; - } - - return 0; + dev_warn_ratelimited(dev, "%s:TRANXDONE timeout(%duS) int=0x%08x\n", + __func__, timeout, readl(abb->int_base)); + return -ETIMEDOUT; } /** From 4bb41984bf2f4cb8ed6ec1579d317790bd941788 Mon Sep 17 00:00:00 2001 From: Sathyanarayana Nujella Date: Sat, 28 Sep 2019 13:22:30 -0700 Subject: [PATCH 028/572] ASoC: max98373: check for device node before parsing Below Oops is caused in a system which uses ACPI instead of device node: of_get_named_gpiod_flags: can't parse 'maxim,reset-gpio' property of node '(null)[0]' BUG: kernel NULL pointer dereference, address: 0000000000000010 This patch avoids NULL pointer deferencing by adding a check before parsing and initializes to make reset-gpio pin as invalid. Signed-off-by: Sathyanarayana Nujella Signed-off-by: Jairaj Arava Link: https://lore.kernel.org/r/1569702150-11976-1-git-send-email-sathyanarayana.nujella@intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98373.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/max98373.c b/sound/soc/codecs/max98373.c index e609abcf3220..eb709d528259 100644 --- a/sound/soc/codecs/max98373.c +++ b/sound/soc/codecs/max98373.c @@ -901,16 +901,20 @@ static void max98373_slot_config(struct i2c_client *i2c, max98373->i_slot = value & 0xF; else max98373->i_slot = 1; - - max98373->reset_gpio = of_get_named_gpio(dev->of_node, + if (dev->of_node) { + max98373->reset_gpio = of_get_named_gpio(dev->of_node, "maxim,reset-gpio", 0); - if (!gpio_is_valid(max98373->reset_gpio)) { - dev_err(dev, "Looking up %s property in node %s failed %d\n", - "maxim,reset-gpio", dev->of_node->full_name, - max98373->reset_gpio); + if (!gpio_is_valid(max98373->reset_gpio)) { + dev_err(dev, "Looking up %s property in node %s failed %d\n", + "maxim,reset-gpio", dev->of_node->full_name, + max98373->reset_gpio); + } else { + dev_dbg(dev, "maxim,reset-gpio=%d", + max98373->reset_gpio); + } } else { - dev_dbg(dev, "maxim,reset-gpio=%d", - max98373->reset_gpio); + /* this makes reset_gpio as invalid */ + max98373->reset_gpio = -1; } if (!device_property_read_u32(dev, "maxim,spkfb-slot-no", &value)) From 57ff2df1b952c7934d7b0e1d3a2ec403ec76edec Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 16 Sep 2019 17:47:51 +0300 Subject: [PATCH 029/572] pinctrl: intel: Allocate IRQ chip dynamic Keeping the IRQ chip definition static shares it with multiple instances of the GPIO chip in the system. This is bad and now we get this warning from GPIO library: "detected irqchip that is shared with multiple gpiochips: please fix the driver." Hence, move the IRQ chip definition from being driver static into the struct intel_pinctrl. So a unique IRQ chip is used for each GPIO chip instance. Fixes: ee1a6ca43dba ("pinctrl: intel: Add Intel Broxton pin controller support") Depends-on: 5ff56b015e85 ("pinctrl: intel: Disable GPIO pin interrupts in suspend") Reported-by: Federico Ricchiuto Suggested-by: Mika Westerberg Signed-off-by: Andy Shevchenko Signed-off-by: Mika Westerberg --- drivers/pinctrl/intel/pinctrl-intel.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 1f13bcd0e4e1..bc013599a9a3 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -96,6 +96,7 @@ struct intel_pinctrl_context { * @pctldesc: Pin controller description * @pctldev: Pointer to the pin controller device * @chip: GPIO chip in this pin controller + * @irqchip: IRQ chip in this pin controller * @soc: SoC/PCH specific pin configuration data * @communities: All communities in this pin controller * @ncommunities: Number of communities in this pin controller @@ -108,6 +109,7 @@ struct intel_pinctrl { struct pinctrl_desc pctldesc; struct pinctrl_dev *pctldev; struct gpio_chip chip; + struct irq_chip irqchip; const struct intel_pinctrl_soc_data *soc; struct intel_community *communities; size_t ncommunities; @@ -1139,16 +1141,6 @@ static irqreturn_t intel_gpio_irq(int irq, void *data) return ret; } -static struct irq_chip intel_gpio_irqchip = { - .name = "intel-gpio", - .irq_ack = intel_gpio_irq_ack, - .irq_mask = intel_gpio_irq_mask, - .irq_unmask = intel_gpio_irq_unmask, - .irq_set_type = intel_gpio_irq_type, - .irq_set_wake = intel_gpio_irq_wake, - .flags = IRQCHIP_MASK_ON_SUSPEND, -}; - static int intel_gpio_add_pin_ranges(struct intel_pinctrl *pctrl, const struct intel_community *community) { @@ -1198,12 +1190,22 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq) pctrl->chip = intel_gpio_chip; + /* Setup GPIO chip */ pctrl->chip.ngpio = intel_gpio_ngpio(pctrl); pctrl->chip.label = dev_name(pctrl->dev); pctrl->chip.parent = pctrl->dev; pctrl->chip.base = -1; pctrl->irq = irq; + /* Setup IRQ chip */ + pctrl->irqchip.name = dev_name(pctrl->dev); + pctrl->irqchip.irq_ack = intel_gpio_irq_ack; + pctrl->irqchip.irq_mask = intel_gpio_irq_mask; + pctrl->irqchip.irq_unmask = intel_gpio_irq_unmask; + pctrl->irqchip.irq_set_type = intel_gpio_irq_type; + pctrl->irqchip.irq_set_wake = intel_gpio_irq_wake; + pctrl->irqchip.flags = IRQCHIP_MASK_ON_SUSPEND; + ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl); if (ret) { dev_err(pctrl->dev, "failed to register gpiochip\n"); @@ -1233,15 +1235,14 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq) return ret; } - ret = gpiochip_irqchip_add(&pctrl->chip, &intel_gpio_irqchip, 0, + ret = gpiochip_irqchip_add(&pctrl->chip, &pctrl->irqchip, 0, handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_err(pctrl->dev, "failed to add irqchip\n"); return ret; } - gpiochip_set_chained_irqchip(&pctrl->chip, &intel_gpio_irqchip, irq, - NULL); + gpiochip_set_chained_irqchip(&pctrl->chip, &pctrl->irqchip, irq, NULL); return 0; } From 260996c30f4f3a732f45045e3e0efe27017615e4 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 23 Sep 2019 19:49:58 -0700 Subject: [PATCH 030/572] pinctrl: cherryview: restore Strago DMI workaround for all versions This is essentially a revert of: e3f72b749da2 pinctrl: cherryview: fix Strago DMI workaround 86c5dd6860a6 pinctrl: cherryview: limit Strago DMI workarounds to version 1.0 because even with 1.1 versions of BIOS there are some pins that are configured as interrupts but not claimed by any driver, and they sometimes fire up and result in interrupt storms that cause touchpad stop functioning and other issues. Given that we are unlikely to qualify another firmware version for a while it is better to keep the workaround active on all Strago boards. Reported-by: Alex Levin Fixes: 86c5dd6860a6 ("pinctrl: cherryview: limit Strago DMI workarounds to version 1.0") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Tested-by: Alex Levin Signed-off-by: Mika Westerberg --- drivers/pinctrl/intel/pinctrl-cherryview.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index aae51c507f59..c6251eac8946 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1513,7 +1513,6 @@ static const struct dmi_system_id chv_no_valid_mask[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"), - DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), }, }, { @@ -1521,7 +1520,6 @@ static const struct dmi_system_id chv_no_valid_mask[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), DMI_MATCH(DMI_PRODUCT_NAME, "Setzer"), - DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), }, }, { @@ -1529,7 +1527,6 @@ static const struct dmi_system_id chv_no_valid_mask[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), DMI_MATCH(DMI_PRODUCT_NAME, "Cyan"), - DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), }, }, { @@ -1537,7 +1534,6 @@ static const struct dmi_system_id chv_no_valid_mask[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), DMI_MATCH(DMI_PRODUCT_NAME, "Celes"), - DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), }, }, {} From 9daf4fd0302b2559223cf90dae7dc510c6679047 Mon Sep 17 00:00:00 2001 From: Li Xu Date: Tue, 1 Oct 2019 14:09:11 +0100 Subject: [PATCH 031/572] ASoC: wm_adsp: Fix theoretical NULL pointer for alg_region Fix potential NULL pointer dereference for alg_region in wm_adsp_buffer_parse_legacy. In practice this can never happen as loading the firmware should have failed at the wm_adsp2_setup_algs stage, however probably better for the code to be robust against future changes and this is more helpful for static analysis. Signed-off-by: Li Xu Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20191001130911.19238-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index ae28d9907c30..85396d920e0a 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -3697,11 +3697,16 @@ static int wm_adsp_buffer_parse_legacy(struct wm_adsp *dsp) u32 xmalg, addr, magic; int i, ret; + alg_region = wm_adsp_find_alg_region(dsp, WMFW_ADSP2_XM, dsp->fw_id); + if (!alg_region) { + adsp_err(dsp, "No algorithm region found\n"); + return -EINVAL; + } + buf = wm_adsp_buffer_alloc(dsp); if (!buf) return -ENOMEM; - alg_region = wm_adsp_find_alg_region(dsp, WMFW_ADSP2_XM, dsp->fw_id); xmalg = dsp->ops->sys_config_size / sizeof(__be32); addr = alg_region->base + xmalg + ALG_XM_FIELD(magic); From f75841aa3b4bf02fcc7941af2d3e00ff74a93bdb Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 1 Oct 2019 14:20:17 +0100 Subject: [PATCH 032/572] regulator: lochnagar: Add on_off_delay for VDDCORE The VDDCORE regulator takes a good length of time to discharge down, so add an on_off_delay to ensure DCVDD is removed before it is powered on again. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20191001132017.1785-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/regulator/lochnagar-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/lochnagar-regulator.c b/drivers/regulator/lochnagar-regulator.c index ff97cc50f2eb..9b05e03ba830 100644 --- a/drivers/regulator/lochnagar-regulator.c +++ b/drivers/regulator/lochnagar-regulator.c @@ -210,6 +210,7 @@ static const struct regulator_desc lochnagar_regulators[] = { .enable_time = 3000, .ramp_delay = 1000, + .off_on_delay = 15000, .owner = THIS_MODULE, }, From 798614885a0e1b867ceb0197c30c2d82575c73b0 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 27 Sep 2019 15:05:26 -0500 Subject: [PATCH 033/572] ASoC: SOF: loader: fix kernel oops on firmware boot failure When we fail to boot the firmware, we encounter a kernel oops in hda_dsp_get_registers(), which is called conditionally in hda_dsp_dump() when the sdev_>boot_complete flag is set. Setting this flag _after_ dumping the data fixes the issue and does not change the programming flow. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190927200538.660-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/loader.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c index d7f32745fefe..9a9a381a908d 100644 --- a/sound/soc/sof/loader.c +++ b/sound/soc/sof/loader.c @@ -546,10 +546,10 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev) msecs_to_jiffies(sdev->boot_timeout)); if (ret == 0) { dev_err(sdev->dev, "error: firmware boot failure\n"); - /* after this point FW_READY msg should be ignored */ - sdev->boot_complete = true; snd_sof_dsp_dbg_dump(sdev, SOF_DBG_REGS | SOF_DBG_MBOX | SOF_DBG_TEXT | SOF_DBG_PCI); + /* after this point FW_READY msg should be ignored */ + sdev->boot_complete = true; return -EIO; } From 2e305a074061121220a2828f97a57d315cf8efba Mon Sep 17 00:00:00 2001 From: Keyon Jie Date: Fri, 27 Sep 2019 15:05:27 -0500 Subject: [PATCH 034/572] ASoC: SOF: topology: fix parse fail issue for byte/bool tuple types We are using sof_parse_word_tokens() to parse tokens with bool/byte/short/word tuple types, here add the missing check, to fix the parsing failure at byte/bool tuple types. Signed-off-by: Keyon Jie Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190927200538.660-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/topology.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index fc85efbad378..0aabb3190ddc 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -920,7 +920,9 @@ static void sof_parse_word_tokens(struct snd_soc_component *scomp, for (j = 0; j < count; j++) { /* match token type */ if (!(tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_WORD || - tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_SHORT)) + tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_SHORT || + tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_BYTE || + tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_BOOL)) continue; /* match token id */ From e66e52c5b7422824cedf0084c0766602dea7e8a7 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 27 Sep 2019 15:05:30 -0500 Subject: [PATCH 035/572] ASoC: SOF: pcm: fix resource leak in hw_free Fix a bug in sof_pcm_hw_free() where some cleanup actions were skipped if STREAM_PCM_FREE IPC was already successfully sent to DSP when the stream was stopped or suspended. This is incorrect as hw_free should clean up also other resources, including pcm lib page allocations, period elapsed work queue and call to platform hw_free. Fixes: c29d96c3b9b4 ("ASoC: SOF: reset DMA state in prepare") Signed-off-by: Kai Vehmanen Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190927200538.660-6-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/pcm.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c index e3f6a6dc0f36..fa7769dd825c 100644 --- a/sound/soc/sof/pcm.c +++ b/sound/soc/sof/pcm.c @@ -244,7 +244,7 @@ static int sof_pcm_hw_free(struct snd_pcm_substream *substream) snd_soc_rtdcom_lookup(rtd, DRV_NAME); struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component); struct snd_sof_pcm *spcm; - int ret; + int ret, err = 0; /* nothing to do for BE */ if (rtd->dai_link->no_pcm) @@ -254,26 +254,26 @@ static int sof_pcm_hw_free(struct snd_pcm_substream *substream) if (!spcm) return -EINVAL; - if (!spcm->prepared[substream->stream]) - return 0; - dev_dbg(sdev->dev, "pcm: free stream %d dir %d\n", spcm->pcm.pcm_id, substream->stream); - ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm); + if (spcm->prepared[substream->stream]) { + ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm); + if (ret < 0) + err = ret; + } snd_pcm_lib_free_pages(substream); cancel_work_sync(&spcm->stream[substream->stream].period_elapsed_work); - if (ret < 0) - return ret; - ret = snd_sof_pcm_platform_hw_free(sdev, substream); - if (ret < 0) + if (ret < 0) { dev_err(sdev->dev, "error: platform hw free failed\n"); + err = ret; + } - return ret; + return err; } static int sof_pcm_prepare(struct snd_pcm_substream *substream) From 0a1b08345bc5d9214dc701f8ec5d67c473fab735 Mon Sep 17 00:00:00 2001 From: Pan Xiuli Date: Fri, 27 Sep 2019 15:05:31 -0500 Subject: [PATCH 036/572] ASoC: SOF: pcm: harden PCM STOP sequence The old STOP sequence is: 1. stop DMA 2. send STOP ipc If delay happen before the steps 1 and 2, the DMA buffer will be empty in short time and cause pipeline xrun then stop the pipeline. Then the step 2 ipc stop will return error as pipeline is already stopped. Suggested change to avoid the issue is to switch the order of steps 1 and 2 for the stop sequence. Signed-off-by: Pan Xiuli Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190927200538.660-7-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/pcm.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c index fa7769dd825c..2b876d497447 100644 --- a/sound/soc/sof/pcm.c +++ b/sound/soc/sof/pcm.c @@ -323,6 +323,7 @@ static int sof_pcm_trigger(struct snd_pcm_substream *substream, int cmd) struct sof_ipc_stream stream; struct sof_ipc_reply reply; bool reset_hw_params = false; + bool ipc_first = false; int ret; /* nothing to do for BE */ @@ -343,6 +344,7 @@ static int sof_pcm_trigger(struct snd_pcm_substream *substream, int cmd) switch (cmd) { case SNDRV_PCM_TRIGGER_PAUSE_PUSH: stream.hdr.cmd |= SOF_IPC_STREAM_TRIG_PAUSE; + ipc_first = true; break; case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: stream.hdr.cmd |= SOF_IPC_STREAM_TRIG_RELEASE; @@ -363,6 +365,7 @@ static int sof_pcm_trigger(struct snd_pcm_substream *substream, int cmd) case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_STOP: stream.hdr.cmd |= SOF_IPC_STREAM_TRIG_STOP; + ipc_first = true; reset_hw_params = true; break; default: @@ -370,12 +373,22 @@ static int sof_pcm_trigger(struct snd_pcm_substream *substream, int cmd) return -EINVAL; } - snd_sof_pcm_platform_trigger(sdev, substream, cmd); + /* + * DMA and IPC sequence is different for start and stop. Need to send + * STOP IPC before stop DMA + */ + if (!ipc_first) + snd_sof_pcm_platform_trigger(sdev, substream, cmd); /* send IPC to the DSP */ ret = sof_ipc_tx_message(sdev->ipc, stream.hdr.cmd, &stream, sizeof(stream), &reply, sizeof(reply)); + /* need to STOP DMA even if STOP IPC failed */ + if (ipc_first) + snd_sof_pcm_platform_trigger(sdev, substream, cmd); + + /* free PCM if reset_hw_params is set and the STOP IPC is successful */ if (!ret && reset_hw_params) ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm); From 4ff5f6439fe69624e8f7d559915e9b54a6477684 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 27 Sep 2019 15:05:35 -0500 Subject: [PATCH 037/572] ASoC: SOF: Intel: hda: fix warnings during FW load The "snd_pcm_substream" handle was not initialized properly in hda-loader.c for firmware load. When the HDA DMAs were used to load the firmware, the interrupts related to firmware load also triggered calls to snd_sof_pcm_period_elapsed() on a non-existent ALSA PCM stream. This caused runtime kernel warnings from pcm_lib.c:snd_pcm_period_elapsed(). Signed-off-by: Kai Vehmanen Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190927200538.660-11-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-loader.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index 6427f0b3a2f1..65c2af3fcaab 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -44,6 +44,7 @@ static int cl_stream_prepare(struct snd_sof_dev *sdev, unsigned int format, return -ENODEV; } hstream = &dsp_stream->hstream; + hstream->substream = NULL; /* allocate DMA buffer */ ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV_SG, &pci->dev, size, dmab); From ff2be865633e6fa523cd2db3b73197d795dec991 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Fri, 27 Sep 2019 15:05:36 -0500 Subject: [PATCH 038/572] ASoC: SOF: Intel: initialise and verify FW crash dump data. FW mailbox offset was not set before use and HDR size was not validated. Fix this. Signed-off-by: Liam Girdwood Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190927200538.660-12-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/bdw.c | 7 +++++++ sound/soc/sof/intel/byt.c | 6 ++++++ sound/soc/sof/intel/hda.c | 7 +++++++ 3 files changed, 20 insertions(+) diff --git a/sound/soc/sof/intel/bdw.c b/sound/soc/sof/intel/bdw.c index e282179263e8..80e2826fb447 100644 --- a/sound/soc/sof/intel/bdw.c +++ b/sound/soc/sof/intel/bdw.c @@ -37,6 +37,7 @@ #define MBOX_SIZE 0x1000 #define MBOX_DUMP_SIZE 0x30 #define EXCEPT_OFFSET 0x800 +#define EXCEPT_MAX_HDR_SIZE 0x400 /* DSP peripherals */ #define DMAC0_OFFSET 0xFE000 @@ -228,6 +229,11 @@ static void bdw_get_registers(struct snd_sof_dev *sdev, /* note: variable AR register array is not read */ /* then get panic info */ + if (xoops->arch_hdr.totalsize > EXCEPT_MAX_HDR_SIZE) { + dev_err(sdev->dev, "invalid header size 0x%x. FW oops is bogus\n", + xoops->arch_hdr.totalsize); + return; + } offset += xoops->arch_hdr.totalsize; sof_mailbox_read(sdev, offset, panic_info, sizeof(*panic_info)); @@ -451,6 +457,7 @@ static int bdw_probe(struct snd_sof_dev *sdev) /* TODO: add offsets */ sdev->mmio_bar = BDW_DSP_BAR; sdev->mailbox_bar = BDW_DSP_BAR; + sdev->dsp_oops_offset = MBOX_OFFSET; /* PCI base */ mmio = platform_get_resource(pdev, IORESOURCE_MEM, diff --git a/sound/soc/sof/intel/byt.c b/sound/soc/sof/intel/byt.c index 5e7a6aaa627a..a1e514f71739 100644 --- a/sound/soc/sof/intel/byt.c +++ b/sound/soc/sof/intel/byt.c @@ -28,6 +28,7 @@ #define MBOX_OFFSET 0x144000 #define MBOX_SIZE 0x1000 #define EXCEPT_OFFSET 0x800 +#define EXCEPT_MAX_HDR_SIZE 0x400 /* DSP peripherals */ #define DMAC0_OFFSET 0x098000 @@ -126,6 +127,11 @@ static void byt_get_registers(struct snd_sof_dev *sdev, /* note: variable AR register array is not read */ /* then get panic info */ + if (xoops->arch_hdr.totalsize > EXCEPT_MAX_HDR_SIZE) { + dev_err(sdev->dev, "invalid header size 0x%x. FW oops is bogus\n", + xoops->arch_hdr.totalsize); + return; + } offset += xoops->arch_hdr.totalsize; sof_mailbox_read(sdev, offset, panic_info, sizeof(*panic_info)); diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index c72e9a09eee1..06e84679087b 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -35,6 +35,8 @@ #define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348) #define IS_CNL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9dc8) +#define EXCEPT_MAX_HDR_SIZE 0x400 + /* * Debug */ @@ -131,6 +133,11 @@ static void hda_dsp_get_registers(struct snd_sof_dev *sdev, /* note: variable AR register array is not read */ /* then get panic info */ + if (xoops->arch_hdr.totalsize > EXCEPT_MAX_HDR_SIZE) { + dev_err(sdev->dev, "invalid header size 0x%x. FW oops is bogus\n", + xoops->arch_hdr.totalsize); + return; + } offset += xoops->arch_hdr.totalsize; sof_block_read(sdev, sdev->mmio_bar, offset, panic_info, sizeof(*panic_info)); From 43b2ab9009b13bfff47fcc1893de9244b39bdd54 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Fri, 27 Sep 2019 15:05:38 -0500 Subject: [PATCH 039/572] ASoC: SOF: Intel: hda: Disable DMI L1 entry during capture There is a known issue on some Intel platforms which causes pause/release to run into xrun's during capture usecases. The suggested workaround to address the issue is to disable the entry of lower power L1 state in the physical DMI link when there is a capture stream open. Signed-off-by: Ranjani Sridharan Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190927200538.660-14-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/Kconfig | 10 +++++++ sound/soc/sof/intel/hda-ctrl.c | 12 +++------ sound/soc/sof/intel/hda-stream.c | 45 +++++++++++++++++++++++++++----- sound/soc/sof/intel/hda.h | 5 +++- 4 files changed, 56 insertions(+), 16 deletions(-) diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index 479ba249e219..d62f51d33be1 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -273,6 +273,16 @@ config SND_SOC_SOF_HDA_AUDIO_CODEC Say Y if you want to enable HDAudio codecs with SOF. If unsure select "N". +config SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1 + bool "SOF enable DMI Link L1" + help + This option enables DMI L1 for both playback and capture + and disables known workarounds for specific HDaudio platforms. + Only use to look into power optimizations on platforms not + affected by DMI L1 issues. This option is not recommended. + Say Y if you want to enable DMI Link L1 + If unsure, select "N". + endif ## SND_SOC_SOF_HDA_COMMON config SND_SOC_SOF_HDA_LINK_BASELINE diff --git a/sound/soc/sof/intel/hda-ctrl.c b/sound/soc/sof/intel/hda-ctrl.c index bc41028a7a01..df1909e1d950 100644 --- a/sound/soc/sof/intel/hda-ctrl.c +++ b/sound/soc/sof/intel/hda-ctrl.c @@ -139,20 +139,16 @@ void hda_dsp_ctrl_misc_clock_gating(struct snd_sof_dev *sdev, bool enable) */ int hda_dsp_ctrl_clock_power_gating(struct snd_sof_dev *sdev, bool enable) { -#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) - struct hdac_bus *bus = sof_to_bus(sdev); -#endif u32 val; /* enable/disable audio dsp clock gating */ val = enable ? PCI_CGCTL_ADSPDCGE : 0; snd_sof_pci_update_bits(sdev, PCI_CGCTL, PCI_CGCTL_ADSPDCGE, val); -#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) - /* enable/disable L1 support */ - val = enable ? SOF_HDA_VS_EM2_L1SEN : 0; - snd_hdac_chip_updatel(bus, VS_EM2, SOF_HDA_VS_EM2_L1SEN, val); -#endif + /* enable/disable DMI Link L1 support */ + val = enable ? HDA_VS_INTEL_EM2_L1SEN : 0; + snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, HDA_VS_INTEL_EM2, + HDA_VS_INTEL_EM2_L1SEN, val); /* enable/disable audio dsp power gating */ val = enable ? 0 : PCI_PGCTL_ADSPPGD; diff --git a/sound/soc/sof/intel/hda-stream.c b/sound/soc/sof/intel/hda-stream.c index ad8d41f22e92..2c7447188402 100644 --- a/sound/soc/sof/intel/hda-stream.c +++ b/sound/soc/sof/intel/hda-stream.c @@ -185,6 +185,17 @@ hda_dsp_stream_get(struct snd_sof_dev *sdev, int direction) direction == SNDRV_PCM_STREAM_PLAYBACK ? "playback" : "capture"); + /* + * Disable DMI Link L1 entry when capture stream is opened. + * Workaround to address a known issue with host DMA that results + * in xruns during pause/release in capture scenarios. + */ + if (!IS_ENABLED(SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1)) + if (stream && direction == SNDRV_PCM_STREAM_CAPTURE) + snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, + HDA_VS_INTEL_EM2, + HDA_VS_INTEL_EM2_L1SEN, 0); + return stream; } @@ -193,23 +204,43 @@ int hda_dsp_stream_put(struct snd_sof_dev *sdev, int direction, int stream_tag) { struct hdac_bus *bus = sof_to_bus(sdev); struct hdac_stream *s; + bool active_capture_stream = false; + bool found = false; spin_lock_irq(&bus->reg_lock); - /* find used stream */ + /* + * close stream matching the stream tag + * and check if there are any open capture streams. + */ list_for_each_entry(s, &bus->stream_list, list) { - if (s->direction == direction && - s->opened && s->stream_tag == stream_tag) { + if (!s->opened) + continue; + + if (s->direction == direction && s->stream_tag == stream_tag) { s->opened = false; - spin_unlock_irq(&bus->reg_lock); - return 0; + found = true; + } else if (s->direction == SNDRV_PCM_STREAM_CAPTURE) { + active_capture_stream = true; } } spin_unlock_irq(&bus->reg_lock); - dev_dbg(sdev->dev, "stream_tag %d not opened!\n", stream_tag); - return -ENODEV; + /* Enable DMI L1 entry if there are no capture streams open */ + if (!IS_ENABLED(SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1)) + if (!active_capture_stream) + snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, + HDA_VS_INTEL_EM2, + HDA_VS_INTEL_EM2_L1SEN, + HDA_VS_INTEL_EM2_L1SEN); + + if (!found) { + dev_dbg(sdev->dev, "stream_tag %d not opened!\n", stream_tag); + return -ENODEV; + } + + return 0; } int hda_dsp_stream_trigger(struct snd_sof_dev *sdev, diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h index 5591841a1b6f..23e430d3e056 100644 --- a/sound/soc/sof/intel/hda.h +++ b/sound/soc/sof/intel/hda.h @@ -39,7 +39,6 @@ #define SOF_HDA_WAKESTS 0x0E #define SOF_HDA_WAKESTS_INT_MASK ((1 << 8) - 1) #define SOF_HDA_RIRBSTS 0x5d -#define SOF_HDA_VS_EM2_L1SEN BIT(13) /* SOF_HDA_GCTL register bist */ #define SOF_HDA_GCTL_RESET BIT(0) @@ -228,6 +227,10 @@ #define HDA_DSP_REG_HIPCIE (HDA_DSP_IPC_BASE + 0x0C) #define HDA_DSP_REG_HIPCCTL (HDA_DSP_IPC_BASE + 0x10) +/* Intel Vendor Specific Registers */ +#define HDA_VS_INTEL_EM2 0x1030 +#define HDA_VS_INTEL_EM2_L1SEN BIT(13) + /* HIPCI */ #define HDA_DSP_REG_HIPCI_BUSY BIT(31) #define HDA_DSP_REG_HIPCI_MSG_MASK 0x7FFFFFFF From 4413adc4fd872579de87bedaecda633f999ef995 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Fri, 27 Sep 2019 15:14:06 -0500 Subject: [PATCH 040/572] ASoC: intel: sof_rt5682: use separate route map for dmic dmic map can only be added when dmic dai link is present. Signed-off-by: Bard Liao Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190927201408.925-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_rt5682.c | 35 +++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c index a437567b8cee..57b4ef75be15 100644 --- a/sound/soc/intel/boards/sof_rt5682.c +++ b/sound/soc/intel/boards/sof_rt5682.c @@ -308,6 +308,9 @@ static const struct snd_soc_dapm_widget sof_widgets[] = { SND_SOC_DAPM_HP("Headphone Jack", NULL), SND_SOC_DAPM_MIC("Headset Mic", NULL), SND_SOC_DAPM_SPK("Spk", NULL), +}; + +static const struct snd_soc_dapm_widget dmic_widgets[] = { SND_SOC_DAPM_MIC("SoC DMIC", NULL), }; @@ -318,10 +321,6 @@ static const struct snd_soc_dapm_route sof_map[] = { /* other jacks */ { "IN1P", NULL, "Headset Mic" }, - - /* digital mics */ - {"DMic", NULL, "SoC DMIC"}, - }; static const struct snd_soc_dapm_route speaker_map[] = { @@ -329,6 +328,11 @@ static const struct snd_soc_dapm_route speaker_map[] = { { "Spk", NULL, "Speaker" }, }; +static const struct snd_soc_dapm_route dmic_map[] = { + /* digital mics */ + {"DMic", NULL, "SoC DMIC"}, +}; + static int speaker_codec_init(struct snd_soc_pcm_runtime *rtd) { struct snd_soc_card *card = rtd->card; @@ -342,6 +346,28 @@ static int speaker_codec_init(struct snd_soc_pcm_runtime *rtd) return ret; } +static int dmic_init(struct snd_soc_pcm_runtime *rtd) +{ + struct snd_soc_card *card = rtd->card; + int ret; + + ret = snd_soc_dapm_new_controls(&card->dapm, dmic_widgets, + ARRAY_SIZE(dmic_widgets)); + if (ret) { + dev_err(card->dev, "DMic widget addition failed: %d\n", ret); + /* Don't need to add routes if widget addition failed */ + return ret; + } + + ret = snd_soc_dapm_add_routes(&card->dapm, dmic_map, + ARRAY_SIZE(dmic_map)); + + if (ret) + dev_err(card->dev, "DMic map addition failed: %d\n", ret); + + return ret; +} + /* sof audio machine driver for rt5682 codec */ static struct snd_soc_card sof_audio_card_rt5682 = { .name = "sof_rt5682", @@ -445,6 +471,7 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev, links[id].name = "dmic01"; links[id].cpus = &cpus[id]; links[id].cpus->dai_name = "DMIC01 Pin"; + links[id].init = dmic_init; if (dmic_be_num > 1) { /* set up 2 BE links at most */ links[id + 1].name = "dmic16k"; From a315e76fc544f09daf619530a7b2f85865e6b25e Mon Sep 17 00:00:00 2001 From: Jaska Uimonen Date: Fri, 27 Sep 2019 15:14:07 -0500 Subject: [PATCH 041/572] ASoC: rt5682: add NULL handler to set_jack function Implement NULL handler in set_jack function to disable irq's. Signed-off-by: Jaska Uimonen Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190927201408.925-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 1ef470700ed5..c50b75ce82e0 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -995,6 +995,16 @@ static int rt5682_set_jack_detect(struct snd_soc_component *component, { struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); + rt5682->hs_jack = hs_jack; + + if (!hs_jack) { + regmap_update_bits(rt5682->regmap, RT5682_IRQ_CTRL_2, + RT5682_JD1_EN_MASK, RT5682_JD1_DIS); + regmap_update_bits(rt5682->regmap, RT5682_RC_CLK_CTRL, + RT5682_POW_JDH | RT5682_POW_JDL, 0); + return 0; + } + switch (rt5682->pdata.jd_src) { case RT5682_JD1: snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_2, @@ -1032,8 +1042,6 @@ static int rt5682_set_jack_detect(struct snd_soc_component *component, break; } - rt5682->hs_jack = hs_jack; - return 0; } From 6ba5041c23c1062d4e8287b2b76a1181538c6df1 Mon Sep 17 00:00:00 2001 From: Jaska Uimonen Date: Fri, 27 Sep 2019 15:14:08 -0500 Subject: [PATCH 042/572] ASoC: intel: sof_rt5682: add remove function to disable jack When removing sof module the rt5682 jack handler will oops if jack detection is not disabled. So add remove function, which disables the jack detection. Signed-off-by: Jaska Uimonen Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190927201408.925-5-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_rt5682.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c index 57b4ef75be15..5ce643d62faf 100644 --- a/sound/soc/intel/boards/sof_rt5682.c +++ b/sound/soc/intel/boards/sof_rt5682.c @@ -648,8 +648,24 @@ static int sof_audio_probe(struct platform_device *pdev) &sof_audio_card_rt5682); } +static int sof_rt5682_remove(struct platform_device *pdev) +{ + struct snd_soc_card *card = platform_get_drvdata(pdev); + struct snd_soc_component *component = NULL; + + for_each_card_components(card, component) { + if (!strcmp(component->name, rt5682_component[0].name)) { + snd_soc_component_set_jack(component, NULL, NULL); + break; + } + } + + return 0; +} + static struct platform_driver sof_audio = { .probe = sof_audio_probe, + .remove = sof_rt5682_remove, .driver = { .name = "sof_rt5682", .pm = &snd_soc_pm_ops, From 2bdf194e2030fce4f2e91300817338353414ab3b Mon Sep 17 00:00:00 2001 From: Jaska Uimonen Date: Fri, 27 Sep 2019 15:14:05 -0500 Subject: [PATCH 043/572] ASoC: intel: bytcr_rt5651: add null check to support_button_press When removing sof module the support_button_press function will oops because hp_jack pointer is not checked for NULL. So add a check to fix this. Signed-off-by: Jaska Uimonen Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190927201408.925-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5651.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/rt5651.c b/sound/soc/codecs/rt5651.c index 762595de956c..c506c9305043 100644 --- a/sound/soc/codecs/rt5651.c +++ b/sound/soc/codecs/rt5651.c @@ -1770,6 +1770,9 @@ static int rt5651_detect_headset(struct snd_soc_component *component) static bool rt5651_support_button_press(struct rt5651_priv *rt5651) { + if (!rt5651->hp_jack) + return false; + /* Button press support only works with internal jack-detection */ return (rt5651->hp_jack->status & SND_JACK_MICROPHONE) && rt5651->gpiod_hp_det == NULL; From 1252b283141f03c3dffd139292c862cae10e174d Mon Sep 17 00:00:00 2001 From: Yizhuo Date: Sun, 29 Sep 2019 10:09:57 -0700 Subject: [PATCH 044/572] regulator: pfuze100-regulator: Variable "val" in pfuze100_regulator_probe() could be uninitialized In function pfuze100_regulator_probe(), variable "val" could be initialized if regmap_read() fails. However, "val" is used to decide the control flow later in the if statement, which is potentially unsafe. Signed-off-by: Yizhuo Link: https://lore.kernel.org/r/20190929170957.14775-1-yzhai003@ucr.edu Signed-off-by: Mark Brown --- drivers/regulator/pfuze100-regulator.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index df5df1c495ad..689537927f6f 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -788,7 +788,13 @@ static int pfuze100_regulator_probe(struct i2c_client *client, /* SW2~SW4 high bit check and modify the voltage value table */ if (i >= sw_check_start && i <= sw_check_end) { - regmap_read(pfuze_chip->regmap, desc->vsel_reg, &val); + ret = regmap_read(pfuze_chip->regmap, + desc->vsel_reg, &val); + if (ret) { + dev_err(&client->dev, "Fails to read from the register.\n"); + return ret; + } + if (val & sw_hi) { if (pfuze_chip->chip_id == PFUZE3000 || pfuze_chip->chip_id == PFUZE3001) { From 86c1aea84b97120a6d428ce17a2ebd55be677f56 Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Tue, 1 Oct 2019 10:37:27 -0700 Subject: [PATCH 045/572] selftests/bpf: test_progs: Don't leak server_fd in tcp_rtt server_fd needs to be closed if pthread can't be created. Fixes: 8a03222f508b ("selftests/bpf: test_progs: fix client/server race in tcp_rtt") Signed-off-by: Brian Vazquez Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20191001173728.149786-2-brianvv@google.com --- tools/testing/selftests/bpf/prog_tests/tcp_rtt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index a82da555b1b0..f4cd60d6fba2 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -260,13 +260,14 @@ void test_tcp_rtt(void) if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread, (void *)&server_fd))) - goto close_cgroup_fd; + goto close_server_fd; pthread_mutex_lock(&server_started_mtx); pthread_cond_wait(&server_started, &server_started_mtx); pthread_mutex_unlock(&server_started_mtx); CHECK_FAIL(run_test(cgroup_fd, server_fd)); +close_server_fd: close(server_fd); close_cgroup_fd: close(cgroup_fd); From a2d074e4c6e81ec9ab359d54f0b88273c738de37 Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Tue, 1 Oct 2019 10:37:28 -0700 Subject: [PATCH 046/572] selftests/bpf: test_progs: Don't leak server_fd in test_sockopt_inherit server_fd needs to be closed if pthread can't be created. Fixes: e3e02e1d9c24 ("selftests/bpf: test_progs: convert test_sockopt_inherit") Signed-off-by: Brian Vazquez Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20191001173728.149786-3-brianvv@google.com --- tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c index 6cbeea7b4bf1..8547ecbdc61f 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -195,7 +195,7 @@ static void run_test(int cgroup_fd) if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread, (void *)&server_fd))) - goto close_bpf_object; + goto close_server_fd; pthread_mutex_lock(&server_started_mtx); pthread_cond_wait(&server_started, &server_started_mtx); From c91a9cfe9f6d136172a52ff6e01b3f83ba850c19 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 26 Sep 2019 10:54:33 +0200 Subject: [PATCH 047/572] rt2x00: initialize last_reset Initialize last_reset variable to INITIAL_JIFFIES, otherwise it is not possible to test H/W reset for first 5 minutes of system run. Fixes: e403fa31ed71 ("rt2x00: add restart hw") Reported-and-tested-by: Jonathan Liu Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2x00debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c index 4d4e3888ef20..f2395309ec00 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c @@ -555,7 +555,7 @@ static ssize_t rt2x00debug_write_restart_hw(struct file *file, { struct rt2x00debug_intf *intf = file->private_data; struct rt2x00_dev *rt2x00dev = intf->rt2x00dev; - static unsigned long last_reset; + static unsigned long last_reset = INITIAL_JIFFIES; if (!rt2x00_has_cap_restart_hw(rt2x00dev)) return -EOPNOTSUPP; From 82af5b6609673f1cc7d3453d0be3d292dfffc813 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Tue, 1 Oct 2019 19:37:18 -0400 Subject: [PATCH 048/572] sysfs: Fixes __BIN_ATTR_WO() macro This patch fixes the size and write parameter for the macro __BIN_ATTR_WO(). Fixes: 7f905761e15a8 ("sysfs: add BIN_ATTR_WO() macro") Signed-off-by: Nayna Jain Link: https://lore.kernel.org/r/1569973038-2710-1-git-send-email-nayna@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 5420817ed317..fa7ee503fb76 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -196,9 +196,9 @@ struct bin_attribute { .size = _size, \ } -#define __BIN_ATTR_WO(_name) { \ +#define __BIN_ATTR_WO(_name, _size) { \ .attr = { .name = __stringify(_name), .mode = 0200 }, \ - .store = _name##_store, \ + .write = _name##_write, \ .size = _size, \ } From 3ae7359c0e39f42a96284d6798fc669acff38140 Mon Sep 17 00:00:00 2001 From: Stuart Henderson Date: Wed, 2 Oct 2019 09:42:40 +0100 Subject: [PATCH 049/572] ASoC: wm_adsp: Don't generate kcontrols without READ flags User space always expects to be able to read ALSA controls, so ensure no kcontrols are generated without an appropriate READ flag. In the case of a read of such a control zeros will be returned. Signed-off-by: Stuart Henderson Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20191002084240.21589-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 85396d920e0a..9b8bb7bbe945 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1259,8 +1259,7 @@ static unsigned int wmfw_convert_flags(unsigned int in, unsigned int len) } if (in) { - if (in & WMFW_CTL_FLAG_READABLE) - out |= rd; + out |= rd; if (in & WMFW_CTL_FLAG_WRITEABLE) out |= wr; if (in & WMFW_CTL_FLAG_VOLATILE) From afce285b859cea91c182015fc9858ea58c26cd0e Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Mon, 16 Sep 2019 12:45:48 -0700 Subject: [PATCH 050/572] Input: da9063 - fix capability and drop KEY_SLEEP Since commit f889beaaab1c ("Input: da9063 - report KEY_POWER instead of KEY_SLEEP during power key-press") KEY_SLEEP isn't supported anymore. This caused input device to not generate any events if "dlg,disable-key-power" is set. Fix this by unconditionally setting KEY_POWER capability, and not declaring KEY_SLEEP. Fixes: f889beaaab1c ("Input: da9063 - report KEY_POWER instead of KEY_SLEEP during power key-press") Signed-off-by: Marco Felsch Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/da9063_onkey.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/input/misc/da9063_onkey.c b/drivers/input/misc/da9063_onkey.c index dace8577fa43..79851923ee57 100644 --- a/drivers/input/misc/da9063_onkey.c +++ b/drivers/input/misc/da9063_onkey.c @@ -232,10 +232,7 @@ static int da9063_onkey_probe(struct platform_device *pdev) onkey->input->phys = onkey->phys; onkey->input->dev.parent = &pdev->dev; - if (onkey->key_power) - input_set_capability(onkey->input, EV_KEY, KEY_POWER); - - input_set_capability(onkey->input, EV_KEY, KEY_SLEEP); + input_set_capability(onkey->input, EV_KEY, KEY_POWER); INIT_DELAYED_WORK(&onkey->work, da9063_poll_on); From bd3b8480237680b5967aee3c814b92b2fd87a582 Mon Sep 17 00:00:00 2001 From: Yauhen Kharuzhy Date: Mon, 16 Sep 2019 16:32:24 -0700 Subject: [PATCH 051/572] Input: goodix - add support for 9-bytes reports Some variants of Goodix touchscreen firmwares use 9-bytes finger report format instead of common 8-bytes format. This report format may be present as: struct goodix_contact_data { uint8_t unknown1; uint8_t track_id; uint8_t unknown2; uint16_t x; uint16_t y; uint16_t w; }__attribute__((packed)); Add support for such format and use it for Lenovo Yoga Book notebook (which uses a Goodix touchpad as a touch keyboard). Signed-off-by: Yauhen Kharuzhy Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 58 +++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 5178ea8b5f30..fb43aa708660 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -53,6 +53,7 @@ struct goodix_ts_data { const char *cfg_name; struct completion firmware_loading_complete; unsigned long irq_flags; + unsigned int contact_size; }; #define GOODIX_GPIO_INT_NAME "irq" @@ -62,6 +63,7 @@ struct goodix_ts_data { #define GOODIX_MAX_WIDTH 4096 #define GOODIX_INT_TRIGGER 1 #define GOODIX_CONTACT_SIZE 8 +#define GOODIX_MAX_CONTACT_SIZE 9 #define GOODIX_MAX_CONTACTS 10 #define GOODIX_CONFIG_MAX_LENGTH 240 @@ -144,6 +146,19 @@ static const struct dmi_system_id rotated_screen[] = { {} }; +static const struct dmi_system_id nine_bytes_report[] = { +#if defined(CONFIG_DMI) && defined(CONFIG_X86) + { + .ident = "Lenovo YogaBook", + /* YB1-X91L/F and YB1-X90L/F */ + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9") + } + }, +#endif + {} +}; + /** * goodix_i2c_read - read data from a register of the i2c slave device. * @@ -249,7 +264,7 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data) max_timeout = jiffies + msecs_to_jiffies(GOODIX_BUFFER_STATUS_TIMEOUT); do { error = goodix_i2c_read(ts->client, GOODIX_READ_COOR_ADDR, - data, GOODIX_CONTACT_SIZE + 1); + data, ts->contact_size + 1); if (error) { dev_err(&ts->client->dev, "I2C transfer error: %d\n", error); @@ -262,12 +277,12 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data) return -EPROTO; if (touch_num > 1) { - data += 1 + GOODIX_CONTACT_SIZE; + data += 1 + ts->contact_size; error = goodix_i2c_read(ts->client, GOODIX_READ_COOR_ADDR + - 1 + GOODIX_CONTACT_SIZE, + 1 + ts->contact_size, data, - GOODIX_CONTACT_SIZE * + ts->contact_size * (touch_num - 1)); if (error) return error; @@ -286,7 +301,7 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data) return 0; } -static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data) +static void goodix_ts_report_touch_8b(struct goodix_ts_data *ts, u8 *coor_data) { int id = coor_data[0] & 0x0F; int input_x = get_unaligned_le16(&coor_data[1]); @@ -301,6 +316,21 @@ static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data) input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w); } +static void goodix_ts_report_touch_9b(struct goodix_ts_data *ts, u8 *coor_data) +{ + int id = coor_data[1] & 0x0F; + int input_x = get_unaligned_le16(&coor_data[3]); + int input_y = get_unaligned_le16(&coor_data[5]); + int input_w = get_unaligned_le16(&coor_data[7]); + + input_mt_slot(ts->input_dev, id); + input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, true); + touchscreen_report_pos(ts->input_dev, &ts->prop, + input_x, input_y, true); + input_report_abs(ts->input_dev, ABS_MT_TOUCH_MAJOR, input_w); + input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w); +} + /** * goodix_process_events - Process incoming events * @@ -311,7 +341,7 @@ static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data) */ static void goodix_process_events(struct goodix_ts_data *ts) { - u8 point_data[1 + GOODIX_CONTACT_SIZE * GOODIX_MAX_CONTACTS]; + u8 point_data[1 + GOODIX_MAX_CONTACT_SIZE * GOODIX_MAX_CONTACTS]; int touch_num; int i; @@ -326,8 +356,12 @@ static void goodix_process_events(struct goodix_ts_data *ts) input_report_key(ts->input_dev, KEY_LEFTMETA, point_data[0] & BIT(4)); for (i = 0; i < touch_num; i++) - goodix_ts_report_touch(ts, - &point_data[1 + GOODIX_CONTACT_SIZE * i]); + if (ts->contact_size == 9) + goodix_ts_report_touch_9b(ts, + &point_data[1 + ts->contact_size * i]); + else + goodix_ts_report_touch_8b(ts, + &point_data[1 + ts->contact_size * i]); input_mt_sync_frame(ts->input_dev); input_sync(ts->input_dev); @@ -730,6 +764,13 @@ static int goodix_configure_dev(struct goodix_ts_data *ts) "Applying '180 degrees rotated screen' quirk\n"); } + if (dmi_check_system(nine_bytes_report)) { + ts->contact_size = 9; + + dev_dbg(&ts->client->dev, + "Non-standard 9-bytes report format quirk\n"); + } + error = input_mt_init_slots(ts->input_dev, ts->max_touch_num, INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED); if (error) { @@ -810,6 +851,7 @@ static int goodix_ts_probe(struct i2c_client *client, ts->client = client; i2c_set_clientdata(client, ts); init_completion(&ts->firmware_loading_complete); + ts->contact_size = GOODIX_CONTACT_SIZE; error = goodix_get_gpio_config(ts); if (error) From 35b9ad840892d979dbeffe702dae95a3cbefa07b Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Tue, 6 Aug 2019 14:24:39 +1000 Subject: [PATCH 052/572] ACPI: HMAT: ACPI_HMAT_MEMORY_PD_VALID is deprecated since ACPI-6.3 ACPI-6.3 corresponds to when HMAT revision was bumped from 1 to 2. In this version ACPI_HMAT_MEMORY_PD_VALID was deprecated and made reserved. As such in revision 2+ we shouldn't be testing this flag. This is as per ACPI-6.3, 5.2.27.3, Table 5-145 "Memory Proximity Domain Attributes Structure" for Flags. Signed-off-by: Daniel Black Reviewed-by: Tao Xu Signed-off-by: Rafael J. Wysocki --- drivers/acpi/hmat/hmat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/hmat/hmat.c b/drivers/acpi/hmat/hmat.c index 8f9a28a870b0..8b0de8a3c647 100644 --- a/drivers/acpi/hmat/hmat.c +++ b/drivers/acpi/hmat/hmat.c @@ -403,7 +403,7 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade pr_info("HMAT: Memory Flags:%04x Processor Domain:%d Memory Domain:%d\n", p->flags, p->processor_PD, p->memory_PD); - if (p->flags & ACPI_HMAT_MEMORY_PD_VALID) { + if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) { target = find_mem_target(p->memory_PD); if (!target) { pr_debug("HMAT: Memory Domain missing from SRAT\n"); From e8307ec51efebf579da5966aa5da5ab5353c61c7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 2 Oct 2019 17:29:46 +0200 Subject: [PATCH 053/572] mmc: renesas_sdhi: Do not use platform_get_irq() to count interrupts As platform_get_irq() now prints an error when the interrupt does not exist, counting interrupts by looping until failure causes the printing of scary messages like: renesas_sdhi_internal_dmac ee140000.sd: IRQ index 1 not found Fix this by using the platform_irq_count() helper to avoid touching non-existent interrupts. Fixes: 7723f4c5ecdb8d83 ("driver core: platform: Add an error message to platform_get_irq*()") Signed-off-by: Geert Uytterhoeven Reviewed-by: Yoshihiro Shimoda Reviewed-by: Wolfram Sang Tested-by: Yoshihiro Shimoda Tested-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 31 +++++++++++++++++----------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index d4ada5cca2d1..234551a68739 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -646,8 +646,8 @@ int renesas_sdhi_probe(struct platform_device *pdev, struct tmio_mmc_dma *dma_priv; struct tmio_mmc_host *host; struct renesas_sdhi *priv; + int num_irqs, irq, ret, i; struct resource *res; - int irq, ret, i; u16 ver; of_data = of_device_get_match_data(&pdev->dev); @@ -825,24 +825,31 @@ int renesas_sdhi_probe(struct platform_device *pdev, host->hs400_complete = renesas_sdhi_hs400_complete; } - i = 0; - while (1) { + num_irqs = platform_irq_count(pdev); + if (num_irqs < 0) { + ret = num_irqs; + goto eirq; + } + + /* There must be at least one IRQ source */ + if (!num_irqs) { + ret = -ENXIO; + goto eirq; + } + + for (i = 0; i < num_irqs; i++) { irq = platform_get_irq(pdev, i); - if (irq < 0) - break; - i++; + if (irq < 0) { + ret = irq; + goto eirq; + } + ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0, dev_name(&pdev->dev), host); if (ret) goto eirq; } - /* There must be at least one IRQ source */ - if (!i) { - ret = irq; - goto eirq; - } - dev_info(&pdev->dev, "%s base at 0x%08lx max clock rate %u MHz\n", mmc_hostname(host->mmc), (unsigned long) (platform_get_resource(pdev, IORESOURCE_MEM, 0)->start), From faf97b84fa86d40eba5a30e9071dfb55133fb1b7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 1 Oct 2019 20:08:34 +0200 Subject: [PATCH 054/572] mmc: sh_mmcif: Use platform_get_irq_optional() for optional interrupt As platform_get_irq() now prints an error when the interrupt does not exist, a scary warning may be printed for an optional interrupt: sh_mmcif ee200000.mmc: IRQ index 1 not found Fix this by calling platform_get_irq_optional() instead for the second interrupt, which is optional. Remove the now superfluous error printing for the first interrupt, which is mandatory. Fixes: 7723f4c5ecdb8d83 ("driver core: platform: Add an error message to platform_get_irq*()") Signed-off-by: Geert Uytterhoeven Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mmcif.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index 81bd9afb0980..98c575de43c7 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -1393,11 +1393,9 @@ static int sh_mmcif_probe(struct platform_device *pdev) const char *name; irq[0] = platform_get_irq(pdev, 0); - irq[1] = platform_get_irq(pdev, 1); - if (irq[0] < 0) { - dev_err(dev, "Get irq error\n"); + irq[1] = platform_get_irq_optional(pdev, 1); + if (irq[0] < 0) return -ENXIO; - } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); reg = devm_ioremap_resource(dev, res); From b1e620e7d32f5aad5353cc3cfc13ed99fea65d3a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 2 Oct 2019 16:30:37 +0100 Subject: [PATCH 055/572] ASoc: rockchip: i2s: Fix RPM imbalance If rockchip_pcm_platform_register() fails, e.g. upon deferring to wait for an absent DMA channel, we return without disabling RPM, which makes subsequent re-probe attempts scream with errors about the unbalanced enable. Don't do that. Fixes: ebb75c0bdba2 ("ASoC: rockchip: i2s: Adjust devm usage") Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/bcb12a849a05437fb18372bc7536c649b94bdf07.1570029862.git.robin.murphy@arm.com Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index af2d5a6124c8..61c984f10d8e 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -677,7 +677,7 @@ static int rockchip_i2s_probe(struct platform_device *pdev) ret = rockchip_pcm_platform_register(&pdev->dev); if (ret) { dev_err(&pdev->dev, "Could not register PCM\n"); - return ret; + goto err_suspend; } return 0; From e55190f26f92927e95aba22b069679311d5379ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 1 Oct 2019 13:22:49 +0200 Subject: [PATCH 056/572] samples/bpf: Fix build for task_fd_query_user.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing include for which was removed from perf-sys.h in commit 91854f9a077e ("perf tools: Move everything related to sys_perf_event_open() to perf-sys.h"). Fixes: 91854f9a077e ("perf tools: Move everything related to sys_perf_event_open() to perf-sys.h") Reported-by: KP Singh Reported-by: Florent Revest Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Tested-by: KP Singh Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20191001112249.27341-1-bjorn.topel@gmail.com --- samples/bpf/task_fd_query_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index e39938058223..4c31b305e6ef 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "libbpf.h" #include "bpf_load.h" From df551058f7a303bd3a17a4cef001349974962ce8 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Wed, 2 Oct 2019 08:31:59 +0200 Subject: [PATCH 057/572] xsk: Fix crash in poll when device does not support ndo_xsk_wakeup Fixes a crash in poll() when an AF_XDP socket is opened in copy mode and the bound device does not have ndo_xsk_wakeup defined. Avoid trying to call the non-existing ndo and instead call the internal xsk sendmsg function to send packets in the same way (from the application's point of view) as calling sendmsg() in any mode or poll() in zero-copy mode would have done. The application should behave in the same way independent on if zero-copy mode or copy mode is used. Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings") Reported-by: syzbot+a5765ed8cdb1cca4d249@syzkaller.appspotmail.com Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/1569997919-11541-1-git-send-email-magnus.karlsson@intel.com --- net/xdp/xsk.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index fa8fbb8fa3c8..9044073fbf22 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -305,9 +305,8 @@ out: } EXPORT_SYMBOL(xsk_umem_consume_tx); -static int xsk_zc_xmit(struct sock *sk) +static int xsk_zc_xmit(struct xdp_sock *xs) { - struct xdp_sock *xs = xdp_sk(sk); struct net_device *dev = xs->dev; return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, @@ -327,11 +326,10 @@ static void xsk_destruct_skb(struct sk_buff *skb) sock_wfree(skb); } -static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, - size_t total_len) +static int xsk_generic_xmit(struct sock *sk) { - u32 max_batch = TX_BATCH_SIZE; struct xdp_sock *xs = xdp_sk(sk); + u32 max_batch = TX_BATCH_SIZE; bool sent_frame = false; struct xdp_desc desc; struct sk_buff *skb; @@ -394,6 +392,18 @@ out: return err; } +static int __xsk_sendmsg(struct sock *sk) +{ + struct xdp_sock *xs = xdp_sk(sk); + + if (unlikely(!(xs->dev->flags & IFF_UP))) + return -ENETDOWN; + if (unlikely(!xs->tx)) + return -ENOBUFS; + + return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk); +} + static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { bool need_wait = !(m->msg_flags & MSG_DONTWAIT); @@ -402,21 +412,18 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) if (unlikely(!xsk_is_bound(xs))) return -ENXIO; - if (unlikely(!(xs->dev->flags & IFF_UP))) - return -ENETDOWN; - if (unlikely(!xs->tx)) - return -ENOBUFS; - if (need_wait) + if (unlikely(need_wait)) return -EOPNOTSUPP; - return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len); + return __xsk_sendmsg(sk); } static unsigned int xsk_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { unsigned int mask = datagram_poll(file, sock, wait); - struct xdp_sock *xs = xdp_sk(sock->sk); + struct sock *sk = sock->sk; + struct xdp_sock *xs = xdp_sk(sk); struct net_device *dev; struct xdp_umem *umem; @@ -426,9 +433,14 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock, dev = xs->dev; umem = xs->umem; - if (umem->need_wakeup) - dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, - umem->need_wakeup); + if (umem->need_wakeup) { + if (dev->netdev_ops->ndo_xsk_wakeup) + dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, + umem->need_wakeup); + else + /* Poll needs to drive Tx also in copy mode */ + __xsk_sendmsg(sk); + } if (xs->rx && !xskq_empty_desc(xs->rx)) mask |= POLLIN | POLLRDNORM; From 98beb3edeb974e906a81f305d88f7bc96b2ec83e Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 2 Oct 2019 21:16:52 +0200 Subject: [PATCH 058/572] samples/bpf: Add a workaround for asm_inline This was added in commit eb111869301e ("compiler-types.h: add asm_inline definition") and breaks samples/bpf as clang does not support asm __inline. Fixes: eb111869301e ("compiler-types.h: add asm_inline definition") Co-developed-by: Florent Revest Signed-off-by: Florent Revest Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20191002191652.11432-1-kpsingh@chromium.org --- samples/bpf/asm_goto_workaround.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h index 7409722727ca..7048bb3594d6 100644 --- a/samples/bpf/asm_goto_workaround.h +++ b/samples/bpf/asm_goto_workaround.h @@ -3,7 +3,8 @@ #ifndef __ASM_GOTO_WORKAROUND_H #define __ASM_GOTO_WORKAROUND_H -/* this will bring in asm_volatile_goto macro definition +/* + * This will bring in asm_volatile_goto and asm_inline macro definitions * if enabled by compiler and config options. */ #include @@ -13,5 +14,15 @@ #define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") #endif +/* + * asm_inline is defined as asm __inline in "include/linux/compiler_types.h" + * if supported by the kernel's CC (i.e CONFIG_CC_HAS_ASM_INLINE) which is not + * supported by CLANG. + */ +#ifdef asm_inline +#undef asm_inline +#define asm_inline asm +#endif + #define volatile(x...) volatile("") #endif From 96d49bbfe6c1a6bb43ccd00fb87aca100e32e5e2 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 3 Oct 2019 09:44:10 -0700 Subject: [PATCH 059/572] ARM: omap2plus_defconfig: Fix selected panels after generic panel changes The old omapdrm panels got removed for v5.4 in favor of generic panels, and the Kconfig options changed. Let's update omap2plus_defconfig accordingly so the same panels are still enabled. Cc: Jyri Sarha Cc: Laurent Pinchart Cc: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/configs/omap2plus_defconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 64eb896907bf..b647e463b9c7 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -356,14 +356,14 @@ CONFIG_DRM_OMAP_CONNECTOR_HDMI=m CONFIG_DRM_OMAP_CONNECTOR_ANALOG_TV=m CONFIG_DRM_OMAP_PANEL_DPI=m CONFIG_DRM_OMAP_PANEL_DSI_CM=m -CONFIG_DRM_OMAP_PANEL_SONY_ACX565AKM=m -CONFIG_DRM_OMAP_PANEL_LGPHILIPS_LB035Q02=m -CONFIG_DRM_OMAP_PANEL_SHARP_LS037V7DW01=m -CONFIG_DRM_OMAP_PANEL_TPO_TD028TTEC1=m -CONFIG_DRM_OMAP_PANEL_TPO_TD043MTEA1=m -CONFIG_DRM_OMAP_PANEL_NEC_NL8048HL11=m CONFIG_DRM_TILCDC=m CONFIG_DRM_PANEL_SIMPLE=m +CONFIG_DRM_PANEL_LG_LB035Q02=m +CONFIG_DRM_PANEL_NEC_NL8048HL11=m +CONFIG_DRM_PANEL_SHARP_LS037V7DW01=m +CONFIG_DRM_PANEL_SONY_ACX565AKM=m +CONFIG_DRM_PANEL_TPO_TD028TTEC1=m +CONFIG_DRM_PANEL_TPO_TD043MTEA1=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_MODE_HELPERS=y From deea9f5fc32040fd6f6132f2260ba410fb5cf98c Mon Sep 17 00:00:00 2001 From: Hugh Cole-Baker Date: Sat, 21 Sep 2019 14:14:57 +0100 Subject: [PATCH 060/572] arm64: dts: rockchip: fix Rockpro64 RK808 interrupt line Fix the pinctrl and interrupt specifier for RK808 to use GPIO3_B2. On the Rockpro64 schematic [1] page 16, it shows GPIO3_B2 used for the interrupt line PMIC_INT_L from the RK808, and there's a note which translates as: "PMU termination GPIO1_C5 changed to this". Tested by setting an RTC wakealarm and checking /proc/interrupts counters. Without this patch, neither the rockchip_gpio_irq counter for the RK808, nor the RTC alarm counter increment when the alarm time is reached. With this patch, both interrupt counters increment by 1 as expected. [1] http://files.pine64.org/doc/rockpro64/rockpro64_v21-SCH.pdf Fixes: e4f3fb490967 ("arm64: dts: rockchip: add initial dts support for Rockpro64") Signed-off-by: Hugh Cole-Baker Link: https://lore.kernel.org/r/20190921131457.36258-1-sigmaris@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts index 0401d4ec1f45..c27d8a6ae1c5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts @@ -247,8 +247,8 @@ rk808: pmic@1b { compatible = "rockchip,rk808"; reg = <0x1b>; - interrupt-parent = <&gpio1>; - interrupts = <21 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&gpio3>; + interrupts = <10 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <1>; clock-output-names = "xin32k", "rk808-clkout2"; pinctrl-names = "default"; @@ -574,7 +574,7 @@ pmic { pmic_int_l: pmic-int-l { - rockchip,pins = <1 RK_PC5 RK_FUNC_GPIO &pcfg_pull_up>; + rockchip,pins = <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up>; }; vsel1_gpio: vsel1-gpio { From cb11a90e33c04623428eccb2c693a6b81947c686 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 17 Sep 2019 10:34:53 +0200 Subject: [PATCH 061/572] dt-bindings: arm: rockchip: fix Theobroma-System board bindings The naming convention for the existing Theobroma boards is soc-q7module-baseboard, so rk3399-puma-haikou and the in-kernel devicetrees also follow that scheme. For some reason in the binding a wrong or outdated naming slipped in which does not match the used devicetrees and makes the dt-schema complain now. Fix this by using the names used in the wild by actual boards. Fixes: a323a513c712 ("dt-bindings: arm: Convert Rockchip board/soc bindings to json-schema") [although the issue was also present in the old txt file] Signed-off-by: Heiko Stuebner Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20190917083453.25744-1-heiko@sntech.de --- Documentation/devicetree/bindings/arm/rockchip.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml index c82c5e57d44c..9c7e70335ac0 100644 --- a/Documentation/devicetree/bindings/arm/rockchip.yaml +++ b/Documentation/devicetree/bindings/arm/rockchip.yaml @@ -496,12 +496,12 @@ properties: - description: Theobroma Systems RK3368-uQ7 with Haikou baseboard items: - - const: tsd,rk3368-uq7-haikou + - const: tsd,rk3368-lion-haikou - const: rockchip,rk3368 - description: Theobroma Systems RK3399-Q7 with Haikou baseboard items: - - const: tsd,rk3399-q7-haikou + - const: tsd,rk3399-puma-haikou - const: rockchip,rk3399 - description: Tronsmart Orion R68 Meta From 8f8fed0cdbbd6cdbf28d9ebe662f45765d2f7d39 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 1 Oct 2019 16:48:39 +0900 Subject: [PATCH 062/572] scsi: core: save/restore command resid for error handling When a non-passthrough command is terminated with CHECK CONDITION, request sense is executed by hijacking the command descriptor. Since scsi_eh_prep_cmnd() and scsi_eh_restore_cmnd() do not save/restore the original command resid, the value returned on failure of the original command is lost and replaced with the value set by the execution of the request sense command. This value may in many instances be unaligned to the device sector size, causing sd_done() to print a warning message about the incorrect unaligned resid before the command is retried. Fix this problem by saving the original command residual in struct scsi_eh_save using scsi_eh_prep_cmnd() and restoring it in scsi_eh_restore_cmnd(). In addition, to make sure that the request sense command is executed with a correctly initialized command structure, also reset the residual to 0 in scsi_eh_prep_cmnd() after saving the original command value in struct scsi_eh_save. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191001074839.1994-1-damien.lemoal@wdc.com Signed-off-by: Damien Le Moal Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 3 +++ include/scsi/scsi_eh.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 1c470e31ae81..ae2fa170f6ad 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -967,6 +967,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, ses->data_direction = scmd->sc_data_direction; ses->sdb = scmd->sdb; ses->result = scmd->result; + ses->resid_len = scmd->req.resid_len; ses->underflow = scmd->underflow; ses->prot_op = scmd->prot_op; ses->eh_eflags = scmd->eh_eflags; @@ -977,6 +978,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, memset(scmd->cmnd, 0, BLK_MAX_CDB); memset(&scmd->sdb, 0, sizeof(scmd->sdb)); scmd->result = 0; + scmd->req.resid_len = 0; if (sense_bytes) { scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE, @@ -1029,6 +1031,7 @@ void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses) scmd->sc_data_direction = ses->data_direction; scmd->sdb = ses->sdb; scmd->result = ses->result; + scmd->req.resid_len = ses->resid_len; scmd->underflow = ses->underflow; scmd->prot_op = ses->prot_op; scmd->eh_eflags = ses->eh_eflags; diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h index 3810b340551c..6bd5ed695a5e 100644 --- a/include/scsi/scsi_eh.h +++ b/include/scsi/scsi_eh.h @@ -32,6 +32,7 @@ extern int scsi_ioctl_reset(struct scsi_device *, int __user *); struct scsi_eh_save { /* saved state */ int result; + unsigned int resid_len; int eh_eflags; enum dma_data_direction data_direction; unsigned underflow; From 2190168aaea42c31bff7b9a967e7b045f07df095 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 1 Oct 2019 12:49:49 +0200 Subject: [PATCH 063/572] scsi: zfcp: fix reaction on bit error threshold notification On excessive bit errors for the FCP channel ingress fibre path, the channel notifies us. Previously, we only emitted a kernel message and a trace record. Since performance can become suboptimal with I/O timeouts due to bit errors, we now stop using an FCP device by default on channel notification so multipath on top can timely failover to other paths. A new module parameter zfcp.ber_stop can be used to get zfcp old behavior. User explanation of new kernel message: * Description: * The FCP channel reported that its bit error threshold has been exceeded. * These errors might result from a problem with the physical components * of the local fibre link into the FCP channel. * The problem might be damage or malfunction of the cable or * cable connection between the FCP channel and * the adjacent fabric switch port or the point-to-point peer. * Find details about the errors in the HBA trace for the FCP device. * The zfcp device driver closed down the FCP device * to limit the performance impact from possible I/O command timeouts. * User action: * Check for problems on the local fibre link, ensure that fibre optics are * clean and functional, and all cables are properly plugged. * After the repair action, you can manually recover the FCP device by * writing "0" into its "failed" sysfs attribute. * If recovery through sysfs is not possible, set the CHPID of the device * offline and back online on the service element. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: #2.6.30+ Link: https://lore.kernel.org/r/20191001104949.42810-1-maier@linux.ibm.com Reviewed-by: Jens Remus Reviewed-by: Benjamin Block Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fsf.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 296bbc3c4606..cf63916814cc 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -27,6 +27,11 @@ struct kmem_cache *zfcp_fsf_qtcb_cache; +static bool ber_stop = true; +module_param(ber_stop, bool, 0600); +MODULE_PARM_DESC(ber_stop, + "Shuts down FCP devices for FCP channels that report a bit-error count in excess of its threshold (default on)"); + static void zfcp_fsf_request_timeout_handler(struct timer_list *t) { struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer); @@ -236,10 +241,15 @@ static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req) case FSF_STATUS_READ_SENSE_DATA_AVAIL: break; case FSF_STATUS_READ_BIT_ERROR_THRESHOLD: - dev_warn(&adapter->ccw_device->dev, - "The error threshold for checksum statistics " - "has been exceeded\n"); zfcp_dbf_hba_bit_err("fssrh_3", req); + if (ber_stop) { + dev_warn(&adapter->ccw_device->dev, + "All paths over this FCP device are disused because of excessive bit errors\n"); + zfcp_erp_adapter_shutdown(adapter, 0, "fssrh_b"); + } else { + dev_warn(&adapter->ccw_device->dev, + "The error threshold for checksum statistics has been exceeded\n"); + } break; case FSF_STATUS_READ_LINK_DOWN: zfcp_fsf_status_read_link_down(req); From b23f330d5145b92f90cf16f1adc5444ad06764b4 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Tue, 1 Oct 2019 08:33:38 -0700 Subject: [PATCH 064/572] scsi: MAINTAINERS: Update qla2xxx driver Update maintainer entry for qla2xxx driver now that email addresses have been changed to Marvell. Link: https://lore.kernel.org/r/20191001153338.28765-1-hmadhani@marvell.com Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 783569e3c4b4..91f33522393a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13184,7 +13184,7 @@ S: Maintained F: drivers/scsi/qla1280.[ch] QLOGIC QLA2XXX FC-SCSI DRIVER -M: qla2xxx-upstream@qlogic.com +M: hmadhani@marvell.com L: linux-scsi@vger.kernel.org S: Supported F: Documentation/scsi/LICENSE.qla2xxx From cd24ee2a9a091432deb8da461b9b1055107460d4 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 27 Nov 2018 21:50:56 +0100 Subject: [PATCH 065/572] MAINTAINERS: Add hp_sdc drivers to parisc arch Signed-off-by: Helge Deller --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 296de2b51c83..59dd4c8f070d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12310,12 +12310,15 @@ F: arch/parisc/ F: Documentation/parisc/ F: drivers/parisc/ F: drivers/char/agp/parisc-agp.c +F: drivers/input/misc/hp_sdc_rtc.c F: drivers/input/serio/gscps2.c +F: drivers/input/serio/hp_sdc* F: drivers/parport/parport_gsc.* F: drivers/tty/serial/8250/8250_gsc.c F: drivers/video/fbdev/sti* F: drivers/video/console/sti* F: drivers/video/logo/logo_parisc* +F: include/linux/hp_sdc.h PARMAN M: Jiri Pirko From 313c3fe9c2348e7147eca38bb446f295b45403a0 Mon Sep 17 00:00:00 2001 From: Michael Vassernis Date: Thu, 3 Oct 2019 07:31:38 +0000 Subject: [PATCH 066/572] mac80211_hwsim: fix incorrect dev_alloc_name failure goto If dev_alloc_name fails, hwsim_mon's memory allocated in alloc_netdev needs to be freed. Change goto command in dev_alloc_name failure to out_free_mon in order to perform free_netdev. Signed-off-by: Michael Vassernis Link: https://lore.kernel.org/r/20191003073049.3760-1-michael.vassernis@tandemg.com Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 45c73a6f09a1..14f562cd715c 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -4026,7 +4026,7 @@ static int __init init_mac80211_hwsim(void) err = dev_alloc_name(hwsim_mon, hwsim_mon->name); if (err < 0) { rtnl_unlock(); - goto out_free_radios; + goto out_free_mon; } err = register_netdevice(hwsim_mon); From 4152561f5da3fca92af7179dd538ea89e248f9d0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 4 Oct 2019 10:51:31 +0100 Subject: [PATCH 067/572] mac80211: Reject malformed SSID elements Although this shouldn't occur in practice, it's a good idea to bounds check the length field of the SSID element prior to using it for things like allocations or memcpy operations. Cc: Cc: Kees Cook Reported-by: Nicolas Waisman Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20191004095132.15777-1-will@kernel.org Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 26a2f49208b6..54dd8849d1cc 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2633,7 +2633,8 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, rcu_read_lock(); ssid = ieee80211_bss_get_ie(cbss, WLAN_EID_SSID); - if (WARN_ON_ONCE(ssid == NULL)) + if (WARN_ONCE(!ssid || ssid[1] > IEEE80211_MAX_SSID_LEN, + "invalid SSID element (len=%d)", ssid ? ssid[1] : -1)) ssid_len = 0; else ssid_len = ssid[1]; @@ -5233,7 +5234,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); ssidie = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID); - if (!ssidie) { + if (!ssidie || ssidie[1] > sizeof(assoc_data->ssid)) { rcu_read_unlock(); kfree(assoc_data); return -EINVAL; From 4ac2813cc867ae563a1ba5a9414bfb554e5796fa Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 4 Oct 2019 10:51:32 +0100 Subject: [PATCH 068/572] cfg80211: wext: avoid copying malformed SSIDs Ensure the SSID element is bounds-checked prior to invoking memcpy() with its length field, when copying to userspace. Cc: Cc: Kees Cook Reported-by: Nicolas Waisman Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20191004095132.15777-2-will@kernel.org [adjust commit log a bit] Signed-off-by: Johannes Berg --- net/wireless/wext-sme.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index c67d7a82ab13..73fd0eae08ca 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -202,6 +202,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; + int ret = 0; /* call only for station! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) @@ -219,7 +220,10 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, if (ie) { data->flags = 1; data->length = ie[1]; - memcpy(ssid, ie + 2, data->length); + if (data->length > IW_ESSID_MAX_SIZE) + ret = -EINVAL; + else + memcpy(ssid, ie + 2, data->length); } rcu_read_unlock(); } else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) { @@ -229,7 +233,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, } wdev_unlock(wdev); - return 0; + return ret; } int cfg80211_mgd_wext_siwap(struct net_device *dev, From 6b512b0ee091edcb8e46218894e4c917d919d3dc Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Fri, 16 Aug 2019 17:58:12 -0500 Subject: [PATCH 069/572] ARM: dts: logicpd-torpedo-som: Remove twl_keypad The TWL4030 used on the Logit PD Torpedo SOM does not have the keypad pins routed. This patch disables the twl_keypad driver to remove some splat during boot: twl4030_keypad 48070000.i2c:twl@48:keypad: missing or malformed property linux,keymap: -22 twl4030_keypad 48070000.i2c:twl@48:keypad: Failed to build keymap twl4030_keypad: probe of 48070000.i2c:twl@48:keypad failed with error -22 Signed-off-by: Adam Ford [tony@atomide.com: removed error time stamps] Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-torpedo-som.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index 3fdd0a72f87f..506b118e511a 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -192,3 +192,7 @@ &twl_gpio { ti,use-leds; }; + +&twl_keypad { + status = "disabled"; +}; From 77fd66c9ff3e992718a79fa6407148935d34b50f Mon Sep 17 00:00:00 2001 From: Kiran Gunda Date: Fri, 4 Oct 2019 15:46:55 +0530 Subject: [PATCH 070/572] regulator: qcom-rpmh: Fix PMIC5 BoB min voltage Correct the PMIC5 BoB min voltage from 0.3V to 3V. Also correct the voltage selector accordingly. Signed-off-by: Kiran Gunda Link: https://lore.kernel.org/r/1570184215-5355-1-git-send-email-kgunda@codeaurora.org Signed-off-by: Mark Brown --- drivers/regulator/qcom-rpmh-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index db6c085da65e..0246b6f99fb5 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -735,8 +735,8 @@ static const struct rpmh_vreg_hw_data pmic5_hfsmps515 = { static const struct rpmh_vreg_hw_data pmic5_bob = { .regulator_type = VRM, .ops = &rpmh_regulator_vrm_bypass_ops, - .voltage_range = REGULATOR_LINEAR_RANGE(300000, 0, 135, 32000), - .n_voltages = 136, + .voltage_range = REGULATOR_LINEAR_RANGE(3000000, 0, 31, 32000), + .n_voltages = 32, .pmic_mode_map = pmic_mode_map_pmic5_bob, .of_map_mode = rpmh_regulator_pmic4_bob_of_map_mode, }; From 0990c5e7573098117c69651821647c228483e31b Mon Sep 17 00:00:00 2001 From: Soeren Moch Date: Thu, 3 Oct 2019 23:50:34 +0200 Subject: [PATCH 071/572] arm64: dts: rockchip: fix RockPro64 vdd-log regulator settings The RockPro64 schematic [1] page 18 states a min voltage of 0.8V and a max voltage of 1.4V for the VDD_LOG pwm regulator. However, there is an additional note that the pwm parameter needs to be modified. From the schematics a voltage range of 0.8V to 1.7V can be calculated. Additional voltage measurements on the board show that this fix indeed leads to the correct voltage, while without this fix the voltage was set too high. [1] http://files.pine64.org/doc/rockpro64/rockpro64_v21-SCH.pdf Fixes: e4f3fb490967 ("arm64: dts: rockchip: add initial dts support for Rockpro64") Signed-off-by: Soeren Moch Link: https://lore.kernel.org/r/20191003215036.15023-1-smoch@web.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts index c27d8a6ae1c5..98a5f323bc1d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts @@ -173,7 +173,7 @@ regulator-always-on; regulator-boot-on; regulator-min-microvolt = <800000>; - regulator-max-microvolt = <1400000>; + regulator-max-microvolt = <1700000>; vin-supply = <&vcc5v0_sys>; }; }; From 2558b3b1b11a1b32b336be2dd0aabfa6d35ddcb5 Mon Sep 17 00:00:00 2001 From: Soeren Moch Date: Thu, 3 Oct 2019 23:50:35 +0200 Subject: [PATCH 072/572] arm64: dts: rockchip: fix RockPro64 sdhci settings The RockPro64 schematics [1], [2] show that the rk3399 EMMC_STRB pin is connected to the RESET pin instead of the DATA_STROBE pin of the eMMC module. So the data strobe cannot be used for its intended purpose on this board, and so the HS400 eMMC mode is not functional. Limit the controller to HS200. [1] http://files.pine64.org/doc/rockpro64/rockpro64_v21-SCH.pdf [2] http://files.pine64.org/doc/rock64/PINE64_eMMC_Module_20170719.pdf Fixes: e4f3fb490967 ("arm64: dts: rockchip: add initial dts support for Rockpro64") Signed-off-by: Soeren Moch Link: https://lore.kernel.org/r/20191003215036.15023-2-smoch@web.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts index 98a5f323bc1d..aa06a6587a11 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts @@ -636,8 +636,7 @@ &sdhci { bus-width = <8>; - mmc-hs400-1_8v; - mmc-hs400-enhanced-strobe; + mmc-hs200-1_8v; non-removable; status = "okay"; }; From 20504fa1d2ffd5d03cdd9dc9c9dd4ed4579b97ef Mon Sep 17 00:00:00 2001 From: Patrick Williams Date: Tue, 1 Oct 2019 10:46:31 -0500 Subject: [PATCH 073/572] pinctrl: armada-37xx: fix control of pins 32 and up The 37xx configuration registers are only 32 bits long, so pins 32-35 spill over into the next register. The calculation for the register address was done, but the bitmask was not, so any configuration to pin 32 or above resulted in a bitmask that overflowed and performed no action. Fix the register / offset calculation to also adjust the offset. Fixes: 5715092a458c ("pinctrl: armada-37xx: Add gpio support") Signed-off-by: Patrick Williams Acked-by: Gregory CLEMENT Cc: Link: https://lore.kernel.org/r/20191001154634.96165-1-alpawi@amazon.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index 6462d3ca7ceb..34c1fee52cbe 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -221,11 +221,11 @@ static const struct armada_37xx_pin_data armada_37xx_pin_sb = { }; static inline void armada_37xx_update_reg(unsigned int *reg, - unsigned int offset) + unsigned int *offset) { /* We never have more than 2 registers */ - if (offset >= GPIO_PER_REG) { - offset -= GPIO_PER_REG; + if (*offset >= GPIO_PER_REG) { + *offset -= GPIO_PER_REG; *reg += sizeof(u32); } } @@ -376,7 +376,7 @@ static inline void armada_37xx_irq_update_reg(unsigned int *reg, { int offset = irqd_to_hwirq(d); - armada_37xx_update_reg(reg, offset); + armada_37xx_update_reg(reg, &offset); } static int armada_37xx_gpio_direction_input(struct gpio_chip *chip, @@ -386,7 +386,7 @@ static int armada_37xx_gpio_direction_input(struct gpio_chip *chip, unsigned int reg = OUTPUT_EN; unsigned int mask; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); return regmap_update_bits(info->regmap, reg, mask, 0); @@ -399,7 +399,7 @@ static int armada_37xx_gpio_get_direction(struct gpio_chip *chip, unsigned int reg = OUTPUT_EN; unsigned int val, mask; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); regmap_read(info->regmap, reg, &val); @@ -413,7 +413,7 @@ static int armada_37xx_gpio_direction_output(struct gpio_chip *chip, unsigned int reg = OUTPUT_EN; unsigned int mask, val, ret; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); ret = regmap_update_bits(info->regmap, reg, mask, mask); @@ -434,7 +434,7 @@ static int armada_37xx_gpio_get(struct gpio_chip *chip, unsigned int offset) unsigned int reg = INPUT_VAL; unsigned int val, mask; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); regmap_read(info->regmap, reg, &val); @@ -449,7 +449,7 @@ static void armada_37xx_gpio_set(struct gpio_chip *chip, unsigned int offset, unsigned int reg = OUTPUT_VAL; unsigned int mask, val; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); val = value ? mask : 0; From f876dbff857b41b0f5c6139fb1db2361cb708279 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 2 Oct 2019 15:02:17 +0200 Subject: [PATCH 074/572] pinctrl: bcm-iproc: Use SPDX header This convert the BCM IPROC driver to use the SPDX header for indicating GPL v2.0 only licensing. Cc: Pramod Kumar Cc: Ray Jui Cc: Scott Branden Signed-off-by: Linus Walleij Acked-by: Scott Branden Link: https://lore.kernel.org/r/20191002130217.4491-1-linus.walleij@linaro.org --- drivers/pinctrl/bcm/pinctrl-iproc-gpio.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c index 6f7d3a2f2e97..61352cc6c0d3 100644 --- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2014-2017 Broadcom - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ /* From 39b65fbb813089e366b376bd8acc300b6fd646dc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 26 Sep 2019 11:14:26 +0300 Subject: [PATCH 075/572] pinctrl: ns2: Fix off by one bugs in ns2_pinmux_enable() The pinctrl->functions[] array has pinctrl->num_functions elements and the pinctrl->groups[] array is the same way. These are set in ns2_pinmux_probe(). So the > comparisons should be >= so that we don't read one element beyond the end of the array. Fixes: b5aa1006e4a9 ("pinctrl: ns2: add pinmux driver support for Broadcom NS2 SoC") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20190926081426.GB2332@mwanda Acked-by: Scott Branden Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-ns2-mux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-ns2-mux.c b/drivers/pinctrl/bcm/pinctrl-ns2-mux.c index 2bf6af7df7d9..9fabc451550e 100644 --- a/drivers/pinctrl/bcm/pinctrl-ns2-mux.c +++ b/drivers/pinctrl/bcm/pinctrl-ns2-mux.c @@ -640,8 +640,8 @@ static int ns2_pinmux_enable(struct pinctrl_dev *pctrl_dev, const struct ns2_pin_function *func; const struct ns2_pin_group *grp; - if (grp_select > pinctrl->num_groups || - func_select > pinctrl->num_functions) + if (grp_select >= pinctrl->num_groups || + func_select >= pinctrl->num_functions) return -EINVAL; func = &pinctrl->functions[func_select]; From 6abff1b9f7b8884a46b7bd80b49e7af0b5625aeb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 2 Oct 2019 10:52:25 -0700 Subject: [PATCH 076/572] nvme: fix possible deadlock when nvme_update_formats fails nvme_update_formats may fail to revalidate the namespace and attempt to remove the namespace. This may lead to a deadlock as nvme_ns_remove will attempt to acquire the subsystem lock which is already acquired by the passthru command with effects. Move the invalid namepsace removal to after the passthru command releases the subsystem lock. Reported-by: Judy Brock Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fd7dea36c3b6..ef1d8f81f69e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1306,8 +1306,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl) if (ns->disk && nvme_revalidate_disk(ns->disk)) nvme_set_queue_dying(ns); up_read(&ctrl->namespaces_rwsem); - - nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL); } static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) @@ -1323,6 +1321,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) nvme_unfreeze(ctrl); nvme_mpath_unfreeze(ctrl->subsys); mutex_unlock(&ctrl->subsys->lock); + nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL); mutex_unlock(&ctrl->scan_lock); } if (effects & NVME_CMD_EFFECTS_CCC) From 3a8ecc935efabdad106b5e06d07b150c394b4465 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Oct 2019 13:57:29 +0200 Subject: [PATCH 077/572] nvme: retain split access workaround for capability reads Commit 7fd8930f26be4 "nvme: add a common helper to read Identify Controller data" has re-introduced an issue that we have attempted to work around in the past, in commit a310acd7a7ea ("NVMe: use split lo_hi_{read,write}q"). The problem is that some PCIe NVMe controllers do not implement 64-bit outbound accesses correctly, which is why the commit above switched to using lo_hi_[read|write]q for all 64-bit BAR accesses occuring in the code. In the mean time, the NVMe subsystem has been refactored, and now calls into the PCIe support layer for NVMe via a .reg_read64() method, which fails to use lo_hi_readq(), and thus reintroduces the problem that the workaround above aimed to address. Given that, at the moment, .reg_read64() is only used to read the capability register [which is known to tolerate split reads], let's switch .reg_read64() to lo_hi_readq() as well. This fixes a boot issue on some ARM boxes with NVMe behind a Synopsys DesignWare PCIe host controller. Fixes: 7fd8930f26be4 ("nvme: add a common helper to read Identify Controller data") Signed-off-by: Ard Biesheuvel Signed-off-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bb88681f4dc3..78e403823646 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2672,7 +2672,7 @@ static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) { - *val = readq(to_nvme_dev(ctrl)->bar + off); + *val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off); return 0; } From 48659227e0a1d7897a4942c7b2cf925b581b6bf7 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Thu, 3 Oct 2019 13:03:09 +1300 Subject: [PATCH 078/572] pinctrl: iproc: allow for error from platform_get_irq() platform_get_irq() can return an error code. Allow for this when getting the irq. Fixes: 6f265e5d4da7 ("pinctrl: bcm-iproc: Pass irqchip when adding gpiochip") Signed-off-by: Chris Packham Link: https://lore.kernel.org/r/20191003000310.17099-2-chris.packham@alliedtelesis.co.nz Acked-by: Scott Branden Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-iproc-gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c index 61352cc6c0d3..42f7ab383ad9 100644 --- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c @@ -845,7 +845,7 @@ static int iproc_gpio_probe(struct platform_device *pdev) /* optional GPIO interrupt support */ irq = platform_get_irq(pdev, 0); - if (irq) { + if (irq > 0) { struct irq_chip *irqc; struct gpio_irq_chip *girq; From 2fd215b8fdbe4d3a609adbe3a323696393cb1e53 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 4 Oct 2019 14:23:42 +0200 Subject: [PATCH 079/572] pinctrl: stmfx: fix null pointer on remove dev_get_platdata(&pdev->dev) returns a pointer on struct stmfx_pinctrl, not on struct stmfx (platform_set_drvdata(pdev, pctl); in probe). Pointer on struct stmfx is stored in driver data of pdev parent (in probe: struct stmfx *stmfx = dev_get_drvdata(pdev->dev.parent);). Fixes: 1490d9f841b1 ("pinctrl: Add STMFX GPIO expander Pinctrl/GPIO driver") Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20191004122342.22018-1-amelie.delaunay@st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-stmfx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-stmfx.c b/drivers/pinctrl/pinctrl-stmfx.c index 974973777395..564660028fcc 100644 --- a/drivers/pinctrl/pinctrl-stmfx.c +++ b/drivers/pinctrl/pinctrl-stmfx.c @@ -705,7 +705,7 @@ static int stmfx_pinctrl_probe(struct platform_device *pdev) static int stmfx_pinctrl_remove(struct platform_device *pdev) { - struct stmfx *stmfx = dev_get_platdata(&pdev->dev); + struct stmfx *stmfx = dev_get_drvdata(pdev->dev.parent); return stmfx_function_disable(stmfx, STMFX_FUNC_GPIO | From 30ca9b04747ea865cbb5a7d05e10ef0a3761bf19 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 4 Sep 2019 15:13:14 -0400 Subject: [PATCH 080/572] soc: imx: imx-scu: Getting UID from SCU should have response The SCU firmware API for getting UID should have response, otherwise, the message stored in function stack could be released and then the response data received from SCU will be stored into that released stack and cause kernel NULL pointer dump. Fixes: 73feb4d0f8f1 ("soc: imx-scu: Add SoC UID(unique identifier) support") Signed-off-by: Anson Huang Signed-off-by: Shawn Guo --- drivers/soc/imx/soc-imx-scu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/imx/soc-imx-scu.c b/drivers/soc/imx/soc-imx-scu.c index 50831ebf126a..c68882eb80f7 100644 --- a/drivers/soc/imx/soc-imx-scu.c +++ b/drivers/soc/imx/soc-imx-scu.c @@ -46,7 +46,7 @@ static ssize_t soc_uid_show(struct device *dev, hdr->func = IMX_SC_MISC_FUNC_UNIQUE_ID; hdr->size = 1; - ret = imx_scu_call_rpc(soc_ipc_handle, &msg, false); + ret = imx_scu_call_rpc(soc_ipc_handle, &msg, true); if (ret) { pr_err("%s: get soc uid failed, ret %d\n", __func__, ret); return ret; From 21094ba5c1f4b15df096e8f6247a50b6ab57c869 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 6 Sep 2019 19:06:01 +0200 Subject: [PATCH 081/572] arm64: dts: zii-ultra: fix ARM regulator states The GPIO controlled regulator for the ARM power supply is supplying the higher voltage when the GPIO is driven high. This is opposite to the similar regulator setup on the EVK board and is impacting stability of the board as the ARM domain has been supplied with a too low voltage when to faster OPPs are in use. Fixes: 4a13b3bec3b4 (arm64: dts: imx: add Zii Ultra board support) Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi index af99473ada04..087b5b6ebe89 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi @@ -89,8 +89,8 @@ regulator-min-microvolt = <900000>; regulator-max-microvolt = <1000000>; gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>; - states = <1000000 0x0 - 900000 0x1>; + states = <1000000 0x1 + 900000 0x0>; regulator-always-on; }; }; From c763ac436b668d7417f0979430ec0312ede4093d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 5 Oct 2019 15:05:18 -0700 Subject: [PATCH 082/572] net: dsa: b53: Do not clear existing mirrored port mask Clearing the existing bitmask of mirrored ports essentially prevents us from capturing more than one port at any given time. This is clearly wrong, do not clear the bitmask prior to setting up the new port. Reported-by: Hubert Feurstein Fixes: ed3af5fd08eb ("net: dsa: b53: Add support for port mirroring") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 526ba2ab66f1..cc3536315eff 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1845,7 +1845,6 @@ int b53_mirror_add(struct dsa_switch *ds, int port, loc = B53_EG_MIR_CTL; b53_read16(dev, B53_MGMT_PAGE, loc, ®); - reg &= ~MIRROR_MASK; reg |= BIT(port); b53_write16(dev, B53_MGMT_PAGE, loc, reg); From b870b0f867c77b92d7fd17ace8421904270d3c6a Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Sun, 6 Oct 2019 13:08:55 +0200 Subject: [PATCH 083/572] net: stmmac: selftests: Check if filtering is available before running We need to check if the number of available Hash Filters is enough to run the test, otherwise we will get false failures. Fixes: 091810dbded9 ("net: stmmac: Introduce selftests support") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index cc76a42c7466..ed3926d4471d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -496,6 +496,9 @@ static int stmmac_test_hfilt(struct stmmac_priv *priv) if (ret) return ret; + if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins) + return -EOPNOTSUPP; + ret = dev_mc_add(priv->dev, gd_addr); if (ret) return ret; @@ -573,6 +576,8 @@ static int stmmac_test_mcfilt(struct stmmac_priv *priv) if (stmmac_filter_check(priv)) return -EOPNOTSUPP; + if (!priv->hw->multicast_filter_bins) + return -EOPNOTSUPP; /* Remove all MC addresses */ __dev_mc_unsync(priv->dev, NULL); @@ -611,6 +616,8 @@ static int stmmac_test_ucfilt(struct stmmac_priv *priv) if (stmmac_filter_check(priv)) return -EOPNOTSUPP; + if (!priv->hw->multicast_filter_bins) + return -EOPNOTSUPP; /* Remove all UC addresses */ __dev_uc_unsync(priv->dev, NULL); From 25683bab09a70542b9f8e3e28f79b3369e56701f Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Sun, 6 Oct 2019 13:08:56 +0200 Subject: [PATCH 084/572] net: stmmac: gmac4+: Not all Unicast addresses may be available Some setups may not have all Unicast addresses filters available. Check the number of available filters before trying to setup it. Fixes: 477286b53f55 ("stmmac: add GMAC4 core support") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 2cb9c53f93b8..5a7b0aca1d31 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -448,7 +448,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw, value |= GMAC_PACKET_FILTER_HPF; /* Handle multiple unicast addresses */ - if (netdev_uc_count(dev) > GMAC_MAX_PERFECT_ADDRESSES) { + if (netdev_uc_count(dev) > hw->unicast_filter_entries) { /* Switch to promiscuous mode if more than 128 addrs * are required */ From 2809fc13163f4946d7cde092807bea44bbae4478 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Sun, 6 Oct 2019 13:08:57 +0200 Subject: [PATCH 085/572] net: stmmac: selftests: Fix L2 Hash Filter test With the current MAC addresses hard-coded in the test we can get some false positives as we use the Hash Filtering method. Let's change the MAC addresses in the tests to be unique when hashed. Fixes: 091810dbded9 ("net: stmmac: Introduce selftests support") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index ed3926d4471d..e4ac3c401432 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -487,8 +487,8 @@ static int stmmac_filter_check(struct stmmac_priv *priv) static int stmmac_test_hfilt(struct stmmac_priv *priv) { - unsigned char gd_addr[ETH_ALEN] = {0x01, 0x00, 0xcc, 0xcc, 0xdd, 0xdd}; - unsigned char bd_addr[ETH_ALEN] = {0x09, 0x00, 0xaa, 0xaa, 0xbb, 0xbb}; + unsigned char gd_addr[ETH_ALEN] = {0x01, 0xee, 0xdd, 0xcc, 0xbb, 0xaa}; + unsigned char bd_addr[ETH_ALEN] = {0x01, 0x01, 0x02, 0x03, 0x04, 0x05}; struct stmmac_packet_attrs attr = { }; int ret; From c90012ac85c24547e5c3468ef00aabf44aa7332d Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sun, 6 Oct 2019 10:30:28 +1100 Subject: [PATCH 086/572] lib: test_user_copy: style cleanup While writing the tests for copy_struct_from_user(), I used a construct that Linus doesn't appear to be too fond of: On 2019-10-04, Linus Torvalds wrote: > Hmm. That code is ugly, both before and after the fix. > > This just doesn't make sense for so many reasons: > > if ((ret |= test(umem_src == NULL, "kmalloc failed"))) > > where the insanity comes from > > - why "|=" when you know that "ret" was zero before (and it had to > be, for the test to make sense) > > - why do this as a single line anyway? > > - don't do the stupid "double parenthesis" to hide a warning. Make it > use an actual comparison if you add a layer of parentheses. So instead, use a bog-standard check that isn't nearly as ugly. Fixes: 341115822f88 ("usercopy: Add parentheses around assignment in test_copy_struct_from_user") Fixes: f5a1a536fa14 ("lib: introduce copy_struct_from_user() helper") Signed-off-by: Aleksa Sarai Reviewed-by: Nathan Chancellor Reviewed-by: Christian Brauner Link: https://lore.kernel.org/r/20191005233028.18566-1-cyphar@cyphar.com Signed-off-by: Christian Brauner --- lib/test_user_copy.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c index e365ace06538..ad2372727b1b 100644 --- a/lib/test_user_copy.c +++ b/lib/test_user_copy.c @@ -52,13 +52,14 @@ static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size) size_t zero_end = size - zero_start; /* - * We conduct a series of check_nonzero_user() tests on a block of memory - * with the following byte-pattern (trying every possible [start,end] - * pair): + * We conduct a series of check_nonzero_user() tests on a block of + * memory with the following byte-pattern (trying every possible + * [start,end] pair): * * [ 00 ff 00 ff ... 00 00 00 00 ... ff 00 ff 00 ] * - * And we verify that check_nonzero_user() acts identically to memchr_inv(). + * And we verify that check_nonzero_user() acts identically to + * memchr_inv(). */ memset(kmem, 0x0, size); @@ -93,11 +94,13 @@ static int test_copy_struct_from_user(char *kmem, char __user *umem, size_t ksize, usize; umem_src = kmalloc(size, GFP_KERNEL); - if ((ret |= test(umem_src == NULL, "kmalloc failed"))) + ret = test(umem_src == NULL, "kmalloc failed"); + if (ret) goto out_free; expected = kmalloc(size, GFP_KERNEL); - if ((ret |= test(expected == NULL, "kmalloc failed"))) + ret = test(expected == NULL, "kmalloc failed"); + if (ret) goto out_free; /* Fill umem with a fixed byte pattern. */ From 1099f48457d06b816359fb43ac32a4a07e33219b Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 3 Oct 2019 12:39:19 +0800 Subject: [PATCH 087/572] ALSA: hda/realtek: Reduce the Headphone static noise on XPS 9350/9360 Headphone on XPS 9350/9360 produces a background white noise. The The noise level somehow correlates with "Headphone Mic Boost", when it sets to 1 the noise disappears. However, doing this has a side effect, which also decreases the overall headphone volume so I didn't send the patch upstream. The noise was bearable back then, but after commit 717f43d81afc ("ALSA: hda/realtek - Update headset mode for ALC256") the noise exacerbates to a point it starts hurting ears. So let's use the workaround to set "Headphone Mic Boost" to 1 and lock it so it's not touchable by userspace. Fixes: 717f43d81afc ("ALSA: hda/realtek - Update headset mode for ALC256") BugLink: https://bugs.launchpad.net/bugs/1654448 BugLink: https://bugs.launchpad.net/bugs/1845810 Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20191003043919.10960-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b000b36ac3c6..b5c225a56b98 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5358,6 +5358,17 @@ static void alc271_hp_gate_mic_jack(struct hda_codec *codec, } } +static void alc256_fixup_dell_xps_13_headphone_noise2(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + if (action != HDA_FIXUP_ACT_PRE_PROBE) + return; + + snd_hda_codec_amp_stereo(codec, 0x1a, HDA_INPUT, 0, HDA_AMP_VOLMASK, 1); + snd_hda_override_wcaps(codec, 0x1a, get_wcaps(codec, 0x1a) & ~AC_WCAP_IN_AMP); +} + static void alc269_fixup_limit_int_mic_boost(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -5822,6 +5833,7 @@ enum { ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, ALC275_FIXUP_DELL_XPS, ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE, + ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2, ALC293_FIXUP_LENOVO_SPK_NOISE, ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, ALC255_FIXUP_DELL_SPK_NOISE, @@ -6558,6 +6570,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc256_fixup_dell_xps_13_headphone_noise2, + .chained = true, + .chain_id = ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE + }, [ALC293_FIXUP_LENOVO_SPK_NOISE] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_disable_aamix, @@ -7001,17 +7019,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK), SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK), SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK), - SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), + SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2), SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE), SND_PCI_QUIRK(0x1028, 0x0738, "Dell Precision 5820", ALC269_FIXUP_NO_SHUTUP), - SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), + SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2), SND_PCI_QUIRK(0x1028, 0x075c, "Dell XPS 27 7760", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x07b0, "Dell Precision 7520", ALC295_FIXUP_DISABLE_DAC3), SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), SND_PCI_QUIRK(0x1028, 0x080c, "Dell WYSE", ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), + SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2), SND_PCI_QUIRK(0x1028, 0x084b, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), SND_PCI_QUIRK(0x1028, 0x084e, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), From 130bce3afbbbbe585cba8604f2124c28e8d86fb0 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Mon, 30 Sep 2019 09:29:45 -0500 Subject: [PATCH 088/572] ALSA: hdac: clear link output stream mapping Fix potential DMA hang upon starting playback on devices in HDA mode on Intel platforms (Gemini Lake/Whiskey Lake/Comet Lake/Ice Lake). It doesn't affect platforms before Gemini Lake or any Intel device in non-HDA mode. The reset value for the LOSDIV register is all output streams valid. Clear this register to invalidate non-existent streams when the bus is powered up. Signed-off-by: Rander Wang Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190930142945.7805-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- include/sound/hda_register.h | 3 +++ sound/hda/ext/hdac_ext_controller.c | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/include/sound/hda_register.h b/include/sound/hda_register.h index 0fd39295b426..057d2a2d0bd0 100644 --- a/include/sound/hda_register.h +++ b/include/sound/hda_register.h @@ -264,6 +264,9 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 }; #define AZX_REG_ML_LOUTPAY 0x20 #define AZX_REG_ML_LINPAY 0x30 +/* bit0 is reserved, with BIT(1) mapping to stream1 */ +#define ML_LOSIDV_STREAM_MASK 0xFFFE + #define ML_LCTL_SCF_MASK 0xF #define AZX_MLCTL_SPA (0x1 << 16) #define AZX_MLCTL_CPA (0x1 << 23) diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c index 211ca85acd8c..cfab60d88c92 100644 --- a/sound/hda/ext/hdac_ext_controller.c +++ b/sound/hda/ext/hdac_ext_controller.c @@ -270,6 +270,11 @@ int snd_hdac_ext_bus_link_get(struct hdac_bus *bus, ret = snd_hdac_ext_bus_link_power_up(link); + /* + * clear the register to invalidate all the output streams + */ + snd_hdac_updatew(link->ml_addr, AZX_REG_ML_LOSIDV, + ML_LOSIDV_STREAM_MASK, 0); /* * wait for 521usec for codec to report status * HDA spec section 4.3 - Codec Discovery From c48fc11b69e95007109206311b0187a3090591f3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 7 Oct 2019 10:58:28 +0100 Subject: [PATCH 089/572] rxrpc: Fix call ref leak When sendmsg() finds a call to continue on with, if the call is in an inappropriate state, it doesn't release the ref it just got on that call before returning an error. This causes the following symptom to show up with kasan: BUG: KASAN: use-after-free in rxrpc_send_keepalive+0x8a2/0x940 net/rxrpc/output.c:635 Read of size 8 at addr ffff888064219698 by task kworker/0:3/11077 where line 635 is: whdr.epoch = htonl(peer->local->rxnet->epoch); The local endpoint (which cannot be pinned by the call) has been released, but not the peer (which is pinned by the call). Fix this by releasing the call in the error path. Fixes: 37411cad633f ("rxrpc: Fix potential NULL-pointer exception") Reported-by: syzbot+d850c266e3df14da1d31@syzkaller.appspotmail.com Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 6a1547b270fe..22f51a7e356e 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -661,6 +661,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) case RXRPC_CALL_SERVER_PREALLOC: case RXRPC_CALL_SERVER_SECURING: case RXRPC_CALL_SERVER_ACCEPTING: + rxrpc_put_call(call, rxrpc_call_put); ret = -EBUSY; goto error_release_sock; default: From e1056f9bbf0d79e9120dc4cbdc96ce7fee6cd15f Mon Sep 17 00:00:00 2001 From: Pragnesh Patel Date: Wed, 18 Sep 2019 17:31:00 +0530 Subject: [PATCH 090/572] media: dt-bindings: Fix building error for dt_binding_check $id doesn't match the actual filename, so update the $id Fixes: c5e8f4ccd7750 ("media: dt-bindings: media: Add Allwinner A10 CSI binding") Signed-off-by: Pragnesh Patel Signed-off-by: Maxime Ripard --- .../devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml index 27f38eed389e..5dd1cf490cd9 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/arm/allwinner,sun4i-a10-csi.yaml# +$id: http://devicetree.org/schemas/media/allwinner,sun4i-a10-csi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: Allwinner A10 CMOS Sensor Interface (CSI) Device Tree Bindings From 55f6c98e3674ce16038a1949c3f9ca5a9a99f289 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 7 Oct 2019 10:58:29 +0100 Subject: [PATCH 091/572] rxrpc: Fix trace-after-put looking at the put peer record rxrpc_put_peer() calls trace_rxrpc_peer() after it has done the decrement of the refcount - which looks at the debug_id in the peer record. But unless the refcount was reduced to zero, we no longer have the right to look in the record and, indeed, it may be deleted by some other thread. Fix this by getting the debug_id out before decrementing the refcount and then passing that into the tracepoint. This can cause the following symptoms: BUG: KASAN: use-after-free in __rxrpc_put_peer net/rxrpc/peer_object.c:411 [inline] BUG: KASAN: use-after-free in rxrpc_put_peer+0x685/0x6a0 net/rxrpc/peer_object.c:435 Read of size 8 at addr ffff888097ec0058 by task syz-executor823/24216 Fixes: 1159d4b496f5 ("rxrpc: Add a tracepoint to track rxrpc_peer refcounting") Reported-by: syzbot+b9be979c55f2bea8ed30@syzkaller.appspotmail.com Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 +++--- net/rxrpc/peer_object.c | 11 +++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index edc5c887a44c..45556fe771c3 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -519,10 +519,10 @@ TRACE_EVENT(rxrpc_local, ); TRACE_EVENT(rxrpc_peer, - TP_PROTO(struct rxrpc_peer *peer, enum rxrpc_peer_trace op, + TP_PROTO(unsigned int peer_debug_id, enum rxrpc_peer_trace op, int usage, const void *where), - TP_ARGS(peer, op, usage, where), + TP_ARGS(peer_debug_id, op, usage, where), TP_STRUCT__entry( __field(unsigned int, peer ) @@ -532,7 +532,7 @@ TRACE_EVENT(rxrpc_peer, ), TP_fast_assign( - __entry->peer = peer->debug_id; + __entry->peer = peer_debug_id; __entry->op = op; __entry->usage = usage; __entry->where = where; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 9c3ac96f71cb..b700b7ecaa3d 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -382,7 +382,7 @@ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) int n; n = atomic_inc_return(&peer->usage); - trace_rxrpc_peer(peer, rxrpc_peer_got, n, here); + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here); return peer; } @@ -396,7 +396,7 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) if (peer) { int n = atomic_fetch_add_unless(&peer->usage, 1, 0); if (n > 0) - trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here); + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here); else peer = NULL; } @@ -426,11 +426,13 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) void rxrpc_put_peer(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); + unsigned int debug_id; int n; if (peer) { + debug_id = peer->debug_id; n = atomic_dec_return(&peer->usage); - trace_rxrpc_peer(peer, rxrpc_peer_put, n, here); + trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here); if (n == 0) __rxrpc_put_peer(peer); } @@ -443,10 +445,11 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) void rxrpc_put_peer_locked(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); + unsigned int debug_id = peer->debug_id; int n; n = atomic_dec_return(&peer->usage); - trace_rxrpc_peer(peer, rxrpc_peer_put, n, here); + trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here); if (n == 0) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); From 4c1295dccc0afe0905b6ca4c62ade7f2406f2cfb Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 7 Oct 2019 10:58:29 +0100 Subject: [PATCH 092/572] rxrpc: Fix trace-after-put looking at the put connection record rxrpc_put_*conn() calls trace_rxrpc_conn() after they have done the decrement of the refcount - which looks at the debug_id in the connection record. But unless the refcount was reduced to zero, we no longer have the right to look in the record and, indeed, it may be deleted by some other thread. Fix this by getting the debug_id out before decrementing the refcount and then passing that into the tracepoint. Fixes: 363deeab6d0f ("rxrpc: Add connection tracepoint and client conn state tracepoint") Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 +++--- net/rxrpc/call_accept.c | 2 +- net/rxrpc/conn_client.c | 6 ++++-- net/rxrpc/conn_object.c | 13 +++++++------ net/rxrpc/conn_service.c | 2 +- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 45556fe771c3..38a97e890cb6 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -546,10 +546,10 @@ TRACE_EVENT(rxrpc_peer, ); TRACE_EVENT(rxrpc_conn, - TP_PROTO(struct rxrpc_connection *conn, enum rxrpc_conn_trace op, + TP_PROTO(unsigned int conn_debug_id, enum rxrpc_conn_trace op, int usage, const void *where), - TP_ARGS(conn, op, usage, where), + TP_ARGS(conn_debug_id, op, usage, where), TP_STRUCT__entry( __field(unsigned int, conn ) @@ -559,7 +559,7 @@ TRACE_EVENT(rxrpc_conn, ), TP_fast_assign( - __entry->conn = conn->debug_id; + __entry->conn = conn_debug_id; __entry->op = op; __entry->usage = usage; __entry->where = where; diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 00c095d74145..c1b1b7dd2924 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -84,7 +84,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, smp_store_release(&b->conn_backlog_head, (head + 1) & (size - 1)); - trace_rxrpc_conn(conn, rxrpc_conn_new_service, + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, atomic_read(&conn->usage), here); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 3f1da1b49f69..700eb77173fc 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -212,7 +212,8 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) rxrpc_get_local(conn->params.local); key_get(conn->params.key); - trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage), + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client, + atomic_read(&conn->usage), __builtin_return_address(0)); trace_rxrpc_client(conn, -1, rxrpc_client_alloc); _leave(" = %p", conn); @@ -985,11 +986,12 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) void rxrpc_put_client_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); + unsigned int debug_id = conn->debug_id; int n; do { n = atomic_dec_return(&conn->usage); - trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here); + trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here); if (n > 0) return; ASSERTCMP(n, >=, 0); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index ed05b6922132..38d718e90dc6 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -269,7 +269,7 @@ bool rxrpc_queue_conn(struct rxrpc_connection *conn) if (n == 0) return false; if (rxrpc_queue_work(&conn->processor)) - trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here); else rxrpc_put_connection(conn); return true; @@ -284,7 +284,7 @@ void rxrpc_see_connection(struct rxrpc_connection *conn) if (conn) { int n = atomic_read(&conn->usage); - trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here); } } @@ -296,7 +296,7 @@ void rxrpc_get_connection(struct rxrpc_connection *conn) const void *here = __builtin_return_address(0); int n = atomic_inc_return(&conn->usage); - trace_rxrpc_conn(conn, rxrpc_conn_got, n, here); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here); } /* @@ -310,7 +310,7 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn) if (conn) { int n = atomic_fetch_add_unless(&conn->usage, 1, 0); if (n > 0) - trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here); else conn = NULL; } @@ -333,10 +333,11 @@ static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, void rxrpc_put_service_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); + unsigned int debug_id = conn->debug_id; int n; n = atomic_dec_return(&conn->usage); - trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); + trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here); ASSERTCMP(n, >=, 0); if (n == 1) rxrpc_set_service_reap_timer(conn->params.local->rxnet, @@ -420,7 +421,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) */ if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) continue; - trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, NULL); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL); if (rxrpc_conn_is_client(conn)) BUG(); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index b30e13f6d95f..123d6ceab15c 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -134,7 +134,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); write_unlock(&rxnet->conn_lock); - trace_rxrpc_conn(conn, rxrpc_conn_new_service, + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, atomic_read(&conn->usage), __builtin_return_address(0)); } From 48c9e0ec7cbbb7370448f859ccc8e3b7eb69e755 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 7 Oct 2019 10:58:29 +0100 Subject: [PATCH 093/572] rxrpc: Fix trace-after-put looking at the put call record rxrpc_put_call() calls trace_rxrpc_call() after it has done the decrement of the refcount - which looks at the debug_id in the call record. But unless the refcount was reduced to zero, we no longer have the right to look in the record and, indeed, it may be deleted by some other thread. Fix this by getting the debug_id out before decrementing the refcount and then passing that into the tracepoint. Fixes: e34d4234b0b7 ("rxrpc: Trace rxrpc_call usage") Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 +++--- net/rxrpc/call_accept.c | 2 +- net/rxrpc/call_object.c | 28 +++++++++++++++++----------- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 38a97e890cb6..191fe447f990 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -606,10 +606,10 @@ TRACE_EVENT(rxrpc_client, ); TRACE_EVENT(rxrpc_call, - TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op, + TP_PROTO(unsigned int call_debug_id, enum rxrpc_call_trace op, int usage, const void *where, const void *aux), - TP_ARGS(call, op, usage, where, aux), + TP_ARGS(call_debug_id, op, usage, where, aux), TP_STRUCT__entry( __field(unsigned int, call ) @@ -620,7 +620,7 @@ TRACE_EVENT(rxrpc_call, ), TP_fast_assign( - __entry->call = call->debug_id; + __entry->call = call_debug_id; __entry->op = op; __entry->usage = usage; __entry->where = where; diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index c1b1b7dd2924..1f778102ed8d 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -97,7 +97,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, call->flags |= (1 << RXRPC_CALL_IS_SERVICE); call->state = RXRPC_CALL_SERVER_PREALLOC; - trace_rxrpc_call(call, rxrpc_call_new_service, + trace_rxrpc_call(call->debug_id, rxrpc_call_new_service, atomic_read(&call->usage), here, (const void *)user_call_ID); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 32d8dc677142..6dace078971a 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -240,7 +240,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, if (p->intr) __set_bit(RXRPC_CALL_IS_INTR, &call->flags); call->tx_total_len = p->tx_total_len; - trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), + trace_rxrpc_call(call->debug_id, rxrpc_call_new_client, + atomic_read(&call->usage), here, (const void *)p->user_call_ID); /* We need to protect a partially set up call against the user as we @@ -290,8 +291,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, if (ret < 0) goto error; - trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), - here, NULL); + trace_rxrpc_call(call->debug_id, rxrpc_call_connected, + atomic_read(&call->usage), here, NULL); rxrpc_start_call_timer(call); @@ -313,8 +314,8 @@ error_dup_user_ID: error: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, ret); - trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), - here, ERR_PTR(ret)); + trace_rxrpc_call(call->debug_id, rxrpc_call_error, + atomic_read(&call->usage), here, ERR_PTR(ret)); rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put); @@ -376,7 +377,8 @@ bool rxrpc_queue_call(struct rxrpc_call *call) if (n == 0) return false; if (rxrpc_queue_work(&call->processor)) - trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL); + trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1, + here, NULL); else rxrpc_put_call(call, rxrpc_call_put_noqueue); return true; @@ -391,7 +393,8 @@ bool __rxrpc_queue_call(struct rxrpc_call *call) int n = atomic_read(&call->usage); ASSERTCMP(n, >=, 1); if (rxrpc_queue_work(&call->processor)) - trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL); + trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n, + here, NULL); else rxrpc_put_call(call, rxrpc_call_put_noqueue); return true; @@ -406,7 +409,8 @@ void rxrpc_see_call(struct rxrpc_call *call) if (call) { int n = atomic_read(&call->usage); - trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL); + trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n, + here, NULL); } } @@ -418,7 +422,7 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) const void *here = __builtin_return_address(0); int n = atomic_inc_return(&call->usage); - trace_rxrpc_call(call, op, n, here, NULL); + trace_rxrpc_call(call->debug_id, op, n, here, NULL); } /* @@ -445,7 +449,8 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); - trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage), + trace_rxrpc_call(call->debug_id, rxrpc_call_release, + atomic_read(&call->usage), here, (const void *)call->flags); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); @@ -534,12 +539,13 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { struct rxrpc_net *rxnet = call->rxnet; const void *here = __builtin_return_address(0); + unsigned int debug_id = call->debug_id; int n; ASSERT(call != NULL); n = atomic_dec_return(&call->usage); - trace_rxrpc_call(call, op, n, here, NULL); + trace_rxrpc_call(debug_id, op, n, here, NULL); ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); From 9ebeddef58c41bd700419cdcece24cf64ce32276 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 7 Oct 2019 10:58:29 +0100 Subject: [PATCH 094/572] rxrpc: rxrpc_peer needs to hold a ref on the rxrpc_local record The rxrpc_peer record needs to hold a reference on the rxrpc_local record it points as the peer is used as a base to access information in the rxrpc_local record. This can cause problems in __rxrpc_put_peer(), where we need the network namespace pointer, and in rxrpc_send_keepalive(), where we need to access the UDP socket, leading to symptoms like: BUG: KASAN: use-after-free in __rxrpc_put_peer net/rxrpc/peer_object.c:411 [inline] BUG: KASAN: use-after-free in rxrpc_put_peer+0x685/0x6a0 net/rxrpc/peer_object.c:435 Read of size 8 at addr ffff888097ec0058 by task syz-executor823/24216 Fix this by taking a ref on the local record for the peer record. Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Fixes: 2baec2c3f854 ("rxrpc: Support network namespacing") Reported-by: syzbot+b9be979c55f2bea8ed30@syzkaller.appspotmail.com Signed-off-by: David Howells --- net/rxrpc/peer_object.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index b700b7ecaa3d..64830d8c1fdb 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -216,7 +216,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) peer = kzalloc(sizeof(struct rxrpc_peer), gfp); if (peer) { atomic_set(&peer->usage, 1); - peer->local = local; + peer->local = rxrpc_get_local(local); INIT_HLIST_HEAD(&peer->error_targets); peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); @@ -307,7 +307,6 @@ void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local, unsigned long hash_key; hash_key = rxrpc_peer_hash_key(local, &peer->srx); - peer->local = local; rxrpc_init_peer(rx, peer, hash_key); spin_lock(&rxnet->peer_hash_lock); @@ -417,6 +416,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) list_del_init(&peer->keepalive_link); spin_unlock_bh(&rxnet->peer_hash_lock); + rxrpc_put_local(peer->local); kfree_rcu(peer, rcu); } @@ -453,6 +453,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer) if (n == 0) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); + rxrpc_put_local(peer->local); kfree_rcu(peer, rcu); } } From 91fcfbe8852edb929ff8702534525031a15d0aa6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 7 Oct 2019 10:58:29 +0100 Subject: [PATCH 095/572] rxrpc: Fix call crypto state cleanup Fix the cleanup of the crypto state on a call after the call has been disconnected. As the call has been disconnected, its connection ref has been discarded and so we can't go through that to get to the security ops table. Fix this by caching the security ops pointer in the rxrpc_call struct and using that when freeing the call security state. Also use this in other places we're dealing with call-specific security. The symptoms look like: BUG: KASAN: use-after-free in rxrpc_release_call+0xb2d/0xb60 net/rxrpc/call_object.c:481 Read of size 8 at addr ffff888062ffeb50 by task syz-executor.5/4764 Fixes: 1db88c534371 ("rxrpc: Fix -Wframe-larger-than= warnings from on-stack crypto") Reported-by: syzbot+eed305768ece6682bb7f@syzkaller.appspotmail.com Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_accept.c | 1 + net/rxrpc/call_object.c | 6 +++--- net/rxrpc/conn_client.c | 3 +++ net/rxrpc/recvmsg.c | 6 +++--- net/rxrpc/sendmsg.c | 2 +- 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1091bf35a199..ecc17dabec8f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -556,6 +556,7 @@ struct rxrpc_call { struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_sock __rcu *socket; /* socket responsible */ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */ + const struct rxrpc_security *security; /* applied security module */ struct mutex user_mutex; /* User access mutex */ unsigned long ack_at; /* When deferred ACK needs to happen */ unsigned long ack_lost_at; /* When ACK is figured as lost */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 1f778102ed8d..135bf5cd8dd5 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -307,6 +307,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, rxrpc_see_call(call); call->conn = conn; + call->security = conn->security; call->peer = rxrpc_get_peer(conn->params.peer); call->cong_cwnd = call->peer->cong_cwnd; return call; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 6dace078971a..a31c18c09894 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -493,10 +493,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - if (conn) { + if (conn) rxrpc_disconnect_call(call); - conn->security->free_call_crypto(call); - } + if (call->security) + call->security->free_call_crypto(call); rxrpc_cleanup_ring(call); _leave(""); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 700eb77173fc..376370cd9285 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -353,6 +353,7 @@ static int rxrpc_get_client_conn(struct rxrpc_sock *rx, if (cp->exclusive) { call->conn = candidate; + call->security = candidate->security; call->security_ix = candidate->security_ix; call->service_id = candidate->service_id; _leave(" = 0 [exclusive %d]", candidate->debug_id); @@ -404,6 +405,7 @@ static int rxrpc_get_client_conn(struct rxrpc_sock *rx, candidate_published: set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); call->conn = candidate; + call->security = candidate->security; call->security_ix = candidate->security_ix; call->service_id = candidate->service_id; spin_unlock(&local->client_conns_lock); @@ -426,6 +428,7 @@ found_extant_conn: spin_lock(&conn->channel_lock); call->conn = conn; + call->security = conn->security; call->security_ix = conn->security_ix; call->service_id = conn->service_id; list_add_tail(&call->chan_wait_link, &conn->waiting_calls); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 3b0becb12041..a4090797c9b2 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -251,8 +251,8 @@ static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, seq += subpacket; } - return call->conn->security->verify_packet(call, skb, offset, len, - seq, cksum); + return call->security->verify_packet(call, skb, offset, len, + seq, cksum); } /* @@ -291,7 +291,7 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, *_offset = offset; *_len = len; - call->conn->security->locate_data(call, skb, _offset, _len); + call->security->locate_data(call, skb, _offset, _len); return 0; } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 22f51a7e356e..813fd6888142 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -419,7 +419,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, call->tx_winsize) sp->hdr.flags |= RXRPC_MORE_PACKETS; - ret = conn->security->secure_packet( + ret = call->security->secure_packet( call, skb, skb->mark, skb->head); if (ret < 0) goto out; From 90b32268e15c003d894e5567b7181bcc2bad6b2c Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 3 Oct 2019 17:48:41 +0200 Subject: [PATCH 096/572] dt-bindings: media: sun4i-csi: Drop the module clock It turns out that what was thought to be the module clock was actually the clock meant to be used by the sensor, and isn't playing any role with the CSI controller itself. Let's drop that clock from our binding. Fixes: c5e8f4ccd775 ("media: dt-bindings: media: Add Allwinner A10 CSI binding") Reported-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- .../devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml index 5dd1cf490cd9..d3e423fcb6c2 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml @@ -27,14 +27,12 @@ properties: clocks: items: - description: The CSI interface clock - - description: The CSI module clock - description: The CSI ISP clock - description: The CSI DRAM clock clock-names: items: - const: bus - - const: mod - const: isp - const: ram @@ -89,9 +87,8 @@ examples: compatible = "allwinner,sun7i-a20-csi0"; reg = <0x01c09000 0x1000>; interrupts = ; - clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI0>, - <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>; - clock-names = "bus", "mod", "isp", "ram"; + clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>; + clock-names = "bus", "isp", "ram"; resets = <&ccu RST_CSI0>; port { From cf03c691eb959f438c16d58ffd11f150b1a95b1e Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 3 Oct 2019 17:48:42 +0200 Subject: [PATCH 097/572] ARM: dts: sun7i: Drop the module clock from the device tree What we thought would be the module clock is actually the clock meant to be used by the sensors, and play no role in the CSI controller. Now that the binding has been updated to reflect that, let's update the device tree too. Fixes: d2b9c6444301 ("ARM: dts: sun7i: Add CSI0 controller") Reported-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun7i-a20.dtsi | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 49380de754a9..31631a3504c0 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -380,9 +380,8 @@ compatible = "allwinner,sun7i-a20-csi0"; reg = <0x01c09000 0x1000>; interrupts = ; - clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI0>, - <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>; - clock-names = "bus", "mod", "isp", "ram"; + clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>; + clock-names = "bus", "isp", "ram"; resets = <&ccu RST_CSI0>; status = "disabled"; }; From 0632fa042541dbb3b8b960a8cd519eb9b6b584c0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 5 Oct 2019 23:22:02 +0200 Subject: [PATCH 098/572] ASoC: core: Fix pcm code debugfs error We can have 2 dcpm-s with the same backend and frontend name (capture + playback pair), this causes the following debugfs error on Intel Bay Trail systems: [ 298.969049] debugfs: Directory 'SSP2-Codec' with parent 'Baytrail Audio Port' already present! This commit adds a ":playback" or ":capture" postfix to the debugfs dir name fixing this. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20191005212202.5206-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index a1b99ac57d9e..b600d3eaaf5c 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1146,6 +1146,7 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe, { struct snd_soc_dpcm *dpcm; unsigned long flags; + char *name; /* only add new dpcms */ for_each_dpcm_be(fe, stream, dpcm) { @@ -1171,9 +1172,15 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe, stream ? "<-" : "->", be->dai_link->name); #ifdef CONFIG_DEBUG_FS - dpcm->debugfs_state = debugfs_create_dir(be->dai_link->name, - fe->debugfs_dpcm_root); - debugfs_create_u32("state", 0644, dpcm->debugfs_state, &dpcm->state); + name = kasprintf(GFP_KERNEL, "%s:%s", be->dai_link->name, + stream ? "capture" : "playback"); + if (name) { + dpcm->debugfs_state = debugfs_create_dir(name, + fe->debugfs_dpcm_root); + debugfs_create_u32("state", 0644, dpcm->debugfs_state, + &dpcm->state); + kfree(name); + } #endif return 1; } From 4b5149365faa8fddf7dc58e47cfed51b19db051e Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sun, 6 Oct 2019 13:21:07 -0700 Subject: [PATCH 099/572] ionic: fix stats memory dereference When the netdev is down, the queues and their debug stats do not exist, so don't try using a pointer to them when when printing the ethtool stats. Fixes: e470355bd96a ("ionic: Add driver stats") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../net/ethernet/pensando/ionic/ionic_lif.h | 2 ++ .../net/ethernet/pensando/ionic/ionic_stats.c | 29 ++++++++++++------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 812190e729c2..6a95b42a8d8c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -182,6 +182,8 @@ struct ionic_lif { #define lif_to_txqcq(lif, i) ((lif)->txqcqs[i].qcq) #define lif_to_rxqcq(lif, i) ((lif)->rxqcqs[i].qcq) +#define lif_to_txstats(lif, i) ((lif)->txqcqs[i].stats->tx) +#define lif_to_rxstats(lif, i) ((lif)->rxqcqs[i].stats->rx) #define lif_to_txq(lif, i) (&lif_to_txqcq((lif), i)->q) #define lif_to_rxq(lif, i) (&lif_to_txqcq((lif), i)->q) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c index e2907884f843..03916b6d47f2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c @@ -117,7 +117,8 @@ static u64 ionic_sw_stats_get_count(struct ionic_lif *lif) /* rx stats */ total += MAX_Q(lif) * IONIC_NUM_RX_STATS; - if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + if (test_bit(IONIC_LIF_UP, lif->state) && + test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { /* tx debug stats */ total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS + IONIC_NUM_TX_Q_STATS + @@ -149,7 +150,8 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf) *buf += ETH_GSTRING_LEN; } - if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + if (test_bit(IONIC_LIF_UP, lif->state) && + test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) { snprintf(*buf, ETH_GSTRING_LEN, "txq_%d_%s", @@ -187,7 +189,8 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf) *buf += ETH_GSTRING_LEN; } - if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + if (test_bit(IONIC_LIF_UP, lif->state) && + test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { snprintf(*buf, ETH_GSTRING_LEN, "rxq_%d_cq_%s", @@ -223,6 +226,8 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf) { struct ionic_lif_sw_stats lif_stats; struct ionic_qcq *txqcq, *rxqcq; + struct ionic_tx_stats *txstats; + struct ionic_rx_stats *rxstats; int i, q_num; ionic_get_lif_stats(lif, &lif_stats); @@ -233,15 +238,17 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf) } for (q_num = 0; q_num < MAX_Q(lif); q_num++) { - txqcq = lif_to_txqcq(lif, q_num); + txstats = &lif_to_txstats(lif, q_num); for (i = 0; i < IONIC_NUM_TX_STATS; i++) { - **buf = IONIC_READ_STAT64(&txqcq->stats->tx, + **buf = IONIC_READ_STAT64(txstats, &ionic_tx_stats_desc[i]); (*buf)++; } - if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + if (test_bit(IONIC_LIF_UP, lif->state) && + test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + txqcq = lif_to_txqcq(lif, q_num); for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) { **buf = IONIC_READ_STAT64(&txqcq->q, &ionic_txq_stats_desc[i]); @@ -258,22 +265,24 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf) (*buf)++; } for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) { - **buf = txqcq->stats->tx.sg_cntr[i]; + **buf = txstats->sg_cntr[i]; (*buf)++; } } } for (q_num = 0; q_num < MAX_Q(lif); q_num++) { - rxqcq = lif_to_rxqcq(lif, q_num); + rxstats = &lif_to_rxstats(lif, q_num); for (i = 0; i < IONIC_NUM_RX_STATS; i++) { - **buf = IONIC_READ_STAT64(&rxqcq->stats->rx, + **buf = IONIC_READ_STAT64(rxstats, &ionic_rx_stats_desc[i]); (*buf)++; } - if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + if (test_bit(IONIC_LIF_UP, lif->state) && + test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + rxqcq = lif_to_rxqcq(lif, q_num); for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { **buf = IONIC_READ_STAT64(&rxqcq->cq, &ionic_dbg_cq_stats_desc[i]); From 503c9addef613c872679e24fc8a78f3febeb5a08 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Mon, 7 Oct 2019 17:43:02 +0200 Subject: [PATCH 100/572] ptp: fix typo of "mechanism" in Kconfig help text Fix typo s/mechansim/mechanism/ Signed-off-by: Antonio Borneo Signed-off-by: David S. Miller --- drivers/ptp/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 960961fb0d7c..0517272a268e 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -97,8 +97,8 @@ config PTP_1588_CLOCK_PCH help This driver adds support for using the PCH EG20T as a PTP clock. The hardware supports time stamping of PTP packets - when using the end-to-end delay (E2E) mechansim. The peer - delay mechansim (P2P) is not supported. + when using the end-to-end delay (E2E) mechanism. The peer + delay mechanism (P2P) is not supported. This clock is only useful if your PTP programs are getting hardware time stamps on the PTP Ethernet packets using the From 1399c59fa92984836db90538cf92397fe7caaa57 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 4 Oct 2019 14:42:19 -0500 Subject: [PATCH 101/572] nl80211: fix memory leak in nl80211_get_ftm_responder_stats In nl80211_get_ftm_responder_stats, a new skb is created via nlmsg_new named msg. If nl80211hdr_put() fails, then msg should be released. The return statement should be replace by goto to error handling code. Fixes: 81e54d08d9d8 ("cfg80211: support FTM responder configuration/statistics") Signed-off-by: Navid Emamdoost Link: https://lore.kernel.org/r/20191004194220.19412-1-navid.emamdoost@gmail.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 141cdb171665..4453dd375de9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13682,7 +13682,7 @@ static int nl80211_get_ftm_responder_stats(struct sk_buff *skb, hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_FTM_RESPONDER_STATS); if (!hdr) - return -ENOBUFS; + goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; From 461c4c2b4c0731d7452bad4e77c0cdbdcea1804c Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Fri, 4 Oct 2019 15:37:06 +0300 Subject: [PATCH 102/572] cfg80211: fix a bunch of RCU issues in multi-bssid code cfg80211_update_notlisted_nontrans() leaves the RCU critical session too early, while still using nontrans_ssid which is RCU protected. In addition, it performs a bunch of RCU pointer update operations such as rcu_access_pointer and rcu_assign_pointer. The caller, cfg80211_inform_bss_frame_data(), also accesses the RCU pointer without holding the lock. Just wrap all of this with bss_lock. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20191004123706.15768-3-luca@coelho.fi Signed-off-by: Johannes Berg --- net/wireless/scan.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index ff1016607f0b..aef240fdf8df 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1703,8 +1703,7 @@ cfg80211_parse_mbssid_frame_data(struct wiphy *wiphy, static void cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, struct cfg80211_bss *nontrans_bss, - struct ieee80211_mgmt *mgmt, size_t len, - gfp_t gfp) + struct ieee80211_mgmt *mgmt, size_t len) { u8 *ie, *new_ie, *pos; const u8 *nontrans_ssid, *trans_ssid, *mbssid; @@ -1715,6 +1714,8 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, const struct cfg80211_bss_ies *old; u8 cpy_len; + lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock); + ie = mgmt->u.probe_resp.variable; new_ie_len = ielen; @@ -1731,23 +1732,22 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, if (!mbssid || mbssid < trans_ssid) return; new_ie_len -= mbssid[1]; - rcu_read_lock(); + nontrans_ssid = ieee80211_bss_get_ie(nontrans_bss, WLAN_EID_SSID); - if (!nontrans_ssid) { - rcu_read_unlock(); + if (!nontrans_ssid) return; - } + new_ie_len += nontrans_ssid[1]; - rcu_read_unlock(); /* generate new ie for nontrans BSS * 1. replace SSID with nontrans BSS' SSID * 2. skip MBSSID IE */ - new_ie = kzalloc(new_ie_len, gfp); + new_ie = kzalloc(new_ie_len, GFP_ATOMIC); if (!new_ie) return; - new_ies = kzalloc(sizeof(*new_ies) + new_ie_len, gfp); + + new_ies = kzalloc(sizeof(*new_ies) + new_ie_len, GFP_ATOMIC); if (!new_ies) goto out_free; @@ -1901,6 +1901,8 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, cfg80211_parse_mbssid_frame_data(wiphy, data, mgmt, len, &non_tx_data, gfp); + spin_lock_bh(&wiphy_to_rdev(wiphy)->bss_lock); + /* check if the res has other nontransmitting bss which is not * in MBSSID IE */ @@ -1915,8 +1917,9 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, ies2 = rcu_access_pointer(tmp_bss->ies); if (ies2->tsf < ies1->tsf) cfg80211_update_notlisted_nontrans(wiphy, tmp_bss, - mgmt, len, gfp); + mgmt, len); } + spin_unlock_bh(&wiphy_to_rdev(wiphy)->bss_lock); return res; } From 95697f9907bfe3eab0ef20265a766b22e27dde64 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 4 Oct 2019 15:37:05 +0300 Subject: [PATCH 103/572] mac80211: accept deauth frames in IBSS mode We can process deauth frames and all, but we drop them very early in the RX path today - this could never have worked. Fixes: 2cc59e784b54 ("mac80211: reply to AUTH with DEAUTH if sta allocation fails in IBSS") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20191004123706.15768-2-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 768d14c9a716..0e05ff037672 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3467,9 +3467,18 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) case cpu_to_le16(IEEE80211_STYPE_PROBE_RESP): /* process for all: mesh, mlme, ibss */ break; + case cpu_to_le16(IEEE80211_STYPE_DEAUTH): + if (is_multicast_ether_addr(mgmt->da) && + !is_broadcast_ether_addr(mgmt->da)) + return RX_DROP_MONITOR; + + /* process only for station/IBSS */ + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) + return RX_DROP_MONITOR; + break; case cpu_to_le16(IEEE80211_STYPE_ASSOC_RESP): case cpu_to_le16(IEEE80211_STYPE_REASSOC_RESP): - case cpu_to_le16(IEEE80211_STYPE_DEAUTH): case cpu_to_le16(IEEE80211_STYPE_DISASSOC): if (is_multicast_ether_addr(mgmt->da) && !is_broadcast_ether_addr(mgmt->da)) From dc0c18ed229cdcca283dd78fefa38273ec37a42c Mon Sep 17 00:00:00 2001 From: Aaron Komisar Date: Wed, 2 Oct 2019 13:59:07 +0000 Subject: [PATCH 104/572] mac80211: fix scan when operating on DFS channels in ETSI domains In non-ETSI regulatory domains scan is blocked when operating channel is a DFS channel. For ETSI, however, once DFS channel is marked as available after the CAC, this channel will remain available (for some time) even after leaving this channel. Therefore a scan can be done without any impact on the availability of the DFS channel as no new CAC is required after the scan. Enable scan in mac80211 in these cases. Signed-off-by: Aaron Komisar Link: https://lore.kernel.org/r/1570024728-17284-1-git-send-email-aaron.komisar@tandemg.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++++++ net/mac80211/scan.c | 30 ++++++++++++++++++++++++++++-- net/wireless/reg.c | 1 + net/wireless/reg.h | 8 -------- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ff45c3e1abff..4ab2c49423dc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5549,6 +5549,14 @@ const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy, */ const char *reg_initiator_name(enum nl80211_reg_initiator initiator); +/** + * regulatory_pre_cac_allowed - check if pre-CAC allowed in the current regdom + * @wiphy: wiphy for which pre-CAC capability is checked. + * + * Pre-CAC is allowed only in some regdomains (notable ETSI). + */ +bool regulatory_pre_cac_allowed(struct wiphy *wiphy); + /** * DOC: Internal regulatory db functions * diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index adf94ba1ed77..4d31d9688dc2 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -520,10 +520,33 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local, return 0; } +static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *sdata_iter; + + if (!ieee80211_is_radar_required(local)) + return true; + + if (!regulatory_pre_cac_allowed(local->hw.wiphy)) + return false; + + mutex_lock(&local->iflist_mtx); + list_for_each_entry(sdata_iter, &local->interfaces, list) { + if (sdata_iter->wdev.cac_started) { + mutex_unlock(&local->iflist_mtx); + return false; + } + } + mutex_unlock(&local->iflist_mtx); + + return true; +} + static bool ieee80211_can_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { - if (ieee80211_is_radar_required(local)) + if (!__ieee80211_can_leave_ch(sdata)) return false; if (!list_empty(&local->roc_list)) @@ -630,7 +653,10 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, lockdep_assert_held(&local->mtx); - if (local->scan_req || ieee80211_is_radar_required(local)) + if (local->scan_req) + return -EBUSY; + + if (!__ieee80211_can_leave_ch(sdata)) return -EBUSY; if (!ieee80211_can_scan(local, sdata)) { diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 420c4207ab59..446c76d44e65 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3883,6 +3883,7 @@ bool regulatory_pre_cac_allowed(struct wiphy *wiphy) return pre_cac_allowed; } +EXPORT_SYMBOL(regulatory_pre_cac_allowed); void regulatory_propagate_dfs_state(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 504133d76de4..dc8f689bd469 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -155,14 +155,6 @@ bool regulatory_indoor_allowed(void); */ #define REG_PRE_CAC_EXPIRY_GRACE_MS 2000 -/** - * regulatory_pre_cac_allowed - if pre-CAC allowed in the current dfs domain - * @wiphy: wiphy for which pre-CAC capability is checked. - - * Pre-CAC is allowed only in ETSI domain. - */ -bool regulatory_pre_cac_allowed(struct wiphy *wiphy); - /** * regulatory_propagate_dfs_state - Propagate DFS channel state to other wiphys * @wiphy - wiphy on which radar is detected and the event will be propagated From d3ec3a08fa700c8b46abb137dce4e2514a6f9668 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Feb 2019 16:01:21 +0000 Subject: [PATCH 105/572] arm64: KVM: Trap VM ops when ARM64_WORKAROUND_CAVIUM_TX2_219_TVM is set In order to workaround the TX2-219 erratum, it is necessary to trap TTBRx_EL1 accesses to EL2. This is done by setting HCR_EL2.TVM on guest entry, which has the side effect of trapping all the other VM-related sysregs as well. To minimize the overhead, a fast path is used so that we don't have to go all the way back to the main sysreg handling code, unless the rest of the hypervisor expects to see these accesses. Cc: Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpucaps.h | 3 +- arch/arm64/kvm/hyp/switch.c | 69 +++++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index f19fe4b9acc4..e81e0cbd728f 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -52,7 +52,8 @@ #define ARM64_HAS_IRQ_PRIO_MASKING 42 #define ARM64_HAS_DCPODP 43 #define ARM64_WORKAROUND_1463225 44 +#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 -#define ARM64_NCAPS 45 +#define ARM64_NCAPS 46 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 3d3815020e36..799e84a40335 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -124,6 +124,9 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) { u64 hcr = vcpu->arch.hcr_el2; + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) + hcr |= HCR_TVM; + write_sysreg(hcr, hcr_el2); if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) @@ -174,8 +177,10 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) * the crucial bit is "On taking a vSError interrupt, * HCR_EL2.VSE is cleared to 0." */ - if (vcpu->arch.hcr_el2 & HCR_VSE) - vcpu->arch.hcr_el2 = read_sysreg(hcr_el2); + if (vcpu->arch.hcr_el2 & HCR_VSE) { + vcpu->arch.hcr_el2 &= ~HCR_VSE; + vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; + } if (has_vhe()) deactivate_traps_vhe(); @@ -380,6 +385,61 @@ static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) return true; } +static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) +{ + u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu)); + int rt = kvm_vcpu_sys_get_rt(vcpu); + u64 val = vcpu_get_reg(vcpu, rt); + + /* + * The normal sysreg handling code expects to see the traps, + * let's not do anything here. + */ + if (vcpu->arch.hcr_el2 & HCR_TVM) + return false; + + switch (sysreg) { + case SYS_SCTLR_EL1: + write_sysreg_el1(val, SYS_SCTLR); + break; + case SYS_TTBR0_EL1: + write_sysreg_el1(val, SYS_TTBR0); + break; + case SYS_TTBR1_EL1: + write_sysreg_el1(val, SYS_TTBR1); + break; + case SYS_TCR_EL1: + write_sysreg_el1(val, SYS_TCR); + break; + case SYS_ESR_EL1: + write_sysreg_el1(val, SYS_ESR); + break; + case SYS_FAR_EL1: + write_sysreg_el1(val, SYS_FAR); + break; + case SYS_AFSR0_EL1: + write_sysreg_el1(val, SYS_AFSR0); + break; + case SYS_AFSR1_EL1: + write_sysreg_el1(val, SYS_AFSR1); + break; + case SYS_MAIR_EL1: + write_sysreg_el1(val, SYS_MAIR); + break; + case SYS_AMAIR_EL1: + write_sysreg_el1(val, SYS_AMAIR); + break; + case SYS_CONTEXTIDR_EL1: + write_sysreg_el1(val, SYS_CONTEXTIDR); + break; + default: + return false; + } + + __kvm_skip_instr(vcpu); + return true; +} + /* * Return true when we were able to fixup the guest exit and should return to * the guest, false when we should restore the host state and return to the @@ -399,6 +459,11 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (*exit_code != ARM_EXCEPTION_TRAP) goto exit; + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && + handle_tx2_tvm(vcpu)) + return true; + /* * We trap the first access to the FP/SIMD to save the host context * and restore the guest context lazily. From 93916beb70143c46bf1d2bacf814be3a124b253b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 9 Apr 2019 16:26:21 +0100 Subject: [PATCH 106/572] arm64: Enable workaround for Cavium TX2 erratum 219 when running SMT It appears that the only case where we need to apply the TX2_219_TVM mitigation is when the core is in SMT mode. So let's condition the enabling on detecting a CPU whose MPIDR_EL1.Aff0 is non-zero. Cc: Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 1e43ba5c79b7..d999ca2dd760 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -12,6 +12,7 @@ #include #include #include +#include static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) @@ -623,6 +624,30 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) return (need_wa > 0); } +static const __maybe_unused struct midr_range tx2_family_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), + {}, +}; + +static bool __maybe_unused +needs_tx2_tvm_workaround(const struct arm64_cpu_capabilities *entry, + int scope) +{ + int i; + + if (!is_affected_midr_range_list(entry, scope) || + !is_hyp_mode_available()) + return false; + + for_each_possible_cpu(i) { + if (MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0) != 0) + return true; + } + + return false; +} + #ifdef CONFIG_HARDEN_EL2_VECTORS static const struct midr_range arm64_harden_el2_vectors[] = { @@ -851,6 +876,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = has_cortex_a76_erratum_1463225, }, +#endif +#ifdef CONFIG_CAVIUM_TX2_ERRATUM_219 + { + .desc = "Cavium ThunderX2 erratum 219 (KVM guest sysreg trapping)", + .capability = ARM64_WORKAROUND_CAVIUM_TX2_219_TVM, + ERRATA_MIDR_RANGE_LIST(tx2_family_cpus), + .matches = needs_tx2_tvm_workaround, + }, #endif { } From 9405447ef79bc93101373e130f72e9e6cbf17dbb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 9 Apr 2019 16:22:24 +0100 Subject: [PATCH 107/572] arm64: Avoid Cavium TX2 erratum 219 when switching TTBR As a PRFM instruction racing against a TTBR update can have undesirable effects on TX2, NOP-out such PRFM on cores that are affected by the TX2-219 erratum. Cc: Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpu_errata.c | 5 +++++ arch/arm64/kernel/entry.S | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index e81e0cbd728f..ac1dbca3d0cd 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -53,7 +53,8 @@ #define ARM64_HAS_DCPODP 43 #define ARM64_WORKAROUND_1463225 44 #define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 +#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 -#define ARM64_NCAPS 46 +#define ARM64_NCAPS 47 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d999ca2dd760..a19bb3e4bcfb 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -884,6 +884,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(tx2_family_cpus), .matches = needs_tx2_tvm_workaround, }, + { + .desc = "Cavium ThunderX2 erratum 219 (PRFM removal)", + .capability = ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM, + ERRATA_MIDR_RANGE_LIST(tx2_family_cpus), + }, #endif { } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 84a822748c84..109894bd3194 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -1070,7 +1070,9 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 #else ldr x30, =vectors #endif +alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM prfm plil1strm, [x30, #(1b - tramp_vectors)] +alternative_else_nop_endif msr vbar_el1, x30 add x30, x30, #(1b - tramp_vectors) isb From 603afdc9438ac546181e843f807253d75d3dbc45 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Sep 2019 10:57:50 +0100 Subject: [PATCH 108/572] arm64: Allow CAVIUM_TX2_ERRATUM_219 to be selected Allow the user to select the workaround for TX2-219, and update the silicon-errata.rst file to reflect this. Cc: Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 17ea3fecddaa..ab7ed2fd072f 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -107,6 +107,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX2 SMMUv3| #126 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| Cavium | ThunderX2 Core | #219 | CAVIUM_TX2_ERRATUM_219 | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 41a9b4257b72..7d36fd95ae5a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -617,6 +617,23 @@ config CAVIUM_ERRATUM_30115 If unsure, say Y. +config CAVIUM_TX2_ERRATUM_219 + bool "Cavium ThunderX2 erratum 219: PRFM between TTBR change and ISB fails" + default y + help + On Cavium ThunderX2, a load, store or prefetch instruction between a + TTBR update and the corresponding context synchronizing operation can + cause a spurious Data Abort to be delivered to any hardware thread in + the CPU core. + + Work around the issue by avoiding the problematic code sequence and + trapping KVM guest TTBRx_EL1 writes to EL2 when SMT is enabled. The + trap handler performs the corresponding register access, skips the + instruction and ensures context synchronization by virtue of the + exception return. + + If unsure, say Y. + config QCOM_FALKOR_ERRATUM_1003 bool "Falkor E1003: Incorrect translation due to ASID change" default y From 734a9b21bb061acdbb0b9c08cd37f80368f381dd Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 8 Oct 2019 10:56:42 +0300 Subject: [PATCH 109/572] ARM: dts: omap5: fix gpu_cm clock provider name The clkctrl code searches for the parent clockdomain based on the name of the CM provider node. The introduction of SGX node for omap5 made the node name for the gpu_cm to be clock-controller. There is no clockdomain named like this, so the lookup fails. Fix by changing the node name properly. Fixes: 394534cb07d8 ("ARM: dts: Configure sgx for omap5") Signed-off-by: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap54xx-clocks.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap54xx-clocks.dtsi b/arch/arm/boot/dts/omap54xx-clocks.dtsi index fac2e57dcca9..4791834dacb2 100644 --- a/arch/arm/boot/dts/omap54xx-clocks.dtsi +++ b/arch/arm/boot/dts/omap54xx-clocks.dtsi @@ -1146,7 +1146,7 @@ }; }; - gpu_cm: clock-controller@1500 { + gpu_cm: gpu_cm@1500 { compatible = "ti,omap4-cm"; reg = <0x1500 0x100>; #address-cells = <1>; From 647c8977e111c0a62c93a489ebc4b045c833fdb4 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 3 Oct 2019 18:45:48 -0700 Subject: [PATCH 110/572] ARM: dts: am3874-iceboard: Fix 'i2c-mux-idle-disconnect' usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt, i2c-mux-idle-disconnect is a property of a parent node since it pertains to the mux/switch as a whole, so move it there and drop all of the concurrences in child nodes. Fixes: d031773169df ("ARM: dts: Adds device tree file for McGill's IceBoard, based on TI AM3874") Signed-off-by: Andrey Smirnov Cc: Benoît Cousson Cc: Tony Lindgren Cc: Graeme Smecher Cc: linux-omap@vger.kernel.org Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Tested-by: Graeme Smecher Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am3874-iceboard.dts | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/am3874-iceboard.dts b/arch/arm/boot/dts/am3874-iceboard.dts index 883fb85135d4..1b4b2b0500e4 100644 --- a/arch/arm/boot/dts/am3874-iceboard.dts +++ b/arch/arm/boot/dts/am3874-iceboard.dts @@ -111,13 +111,13 @@ reg = <0x70>; #address-cells = <1>; #size-cells = <0>; + i2c-mux-idle-disconnect; i2c@0 { /* FMC A */ #address-cells = <1>; #size-cells = <0>; reg = <0>; - i2c-mux-idle-disconnect; }; i2c@1 { @@ -125,7 +125,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <1>; - i2c-mux-idle-disconnect; }; i2c@2 { @@ -133,7 +132,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <2>; - i2c-mux-idle-disconnect; }; i2c@3 { @@ -141,7 +139,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <3>; - i2c-mux-idle-disconnect; }; i2c@4 { @@ -149,14 +146,12 @@ #address-cells = <1>; #size-cells = <0>; reg = <4>; - i2c-mux-idle-disconnect; }; i2c@5 { #address-cells = <1>; #size-cells = <0>; reg = <5>; - i2c-mux-idle-disconnect; ina230@40 { compatible = "ti,ina230"; reg = <0x40>; shunt-resistor = <5000>; }; ina230@41 { compatible = "ti,ina230"; reg = <0x41>; shunt-resistor = <5000>; }; @@ -182,14 +177,12 @@ #address-cells = <1>; #size-cells = <0>; reg = <6>; - i2c-mux-idle-disconnect; }; i2c@7 { #address-cells = <1>; #size-cells = <0>; reg = <7>; - i2c-mux-idle-disconnect; u41: pca9575@20 { compatible = "nxp,pca9575"; From 98d22b01f9f6f85eb8870290006241b316829fd3 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 2 Oct 2019 14:50:52 -0700 Subject: [PATCH 111/572] rt2x00: remove input-polldev.h header The driver does not use input subsystem so we do not need this header, and it is being removed, so stop pulling it in. Signed-off-by: Dmitry Torokhov Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2x00.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h index 2b216edd0c7d..a90a518b40d3 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include From 0a005856d35936e351cc20ff3de04499bd7ffbfd Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 23 Sep 2019 14:11:11 +0800 Subject: [PATCH 112/572] dm clone: Make __hash_find static drivers/md/dm-clone-target.c:594:34: warning: symbol '__hash_find' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: Mike Snitzer --- drivers/md/dm-clone-target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index cd6f9e9fc98e..4ca8f1977222 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -591,8 +591,8 @@ static struct hash_table_bucket *get_hash_table_bucket(struct clone *clone, * * NOTE: Must be called with the bucket lock held */ -struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket, - unsigned long region_nr) +static struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket, + unsigned long region_nr) { struct dm_clone_region_hydration *hd; From c6ee11c39fcc1fb55130748990a8f199e76263b4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Oct 2019 14:24:24 -0700 Subject: [PATCH 113/572] llc: fix sk_buff leak in llc_sap_state_process() syzbot reported: BUG: memory leak unreferenced object 0xffff888116270800 (size 224): comm "syz-executor641", pid 7047, jiffies 4294947360 (age 13.860s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 20 e1 2a 81 88 ff ff 00 40 3d 2a 81 88 ff ff . .*.....@=*.... backtrace: [<000000004d41b4cc>] kmemleak_alloc_recursive include/linux/kmemleak.h:55 [inline] [<000000004d41b4cc>] slab_post_alloc_hook mm/slab.h:439 [inline] [<000000004d41b4cc>] slab_alloc_node mm/slab.c:3269 [inline] [<000000004d41b4cc>] kmem_cache_alloc_node+0x153/0x2a0 mm/slab.c:3579 [<00000000506a5965>] __alloc_skb+0x6e/0x210 net/core/skbuff.c:198 [<000000001ba5a161>] alloc_skb include/linux/skbuff.h:1058 [inline] [<000000001ba5a161>] alloc_skb_with_frags+0x5f/0x250 net/core/skbuff.c:5327 [<0000000047d9c78b>] sock_alloc_send_pskb+0x269/0x2a0 net/core/sock.c:2225 [<000000003828fe54>] sock_alloc_send_skb+0x32/0x40 net/core/sock.c:2242 [<00000000e34d94f9>] llc_ui_sendmsg+0x10a/0x540 net/llc/af_llc.c:933 [<00000000de2de3fb>] sock_sendmsg_nosec net/socket.c:652 [inline] [<00000000de2de3fb>] sock_sendmsg+0x54/0x70 net/socket.c:671 [<000000008fe16e7a>] __sys_sendto+0x148/0x1f0 net/socket.c:1964 [...] The bug is that llc_sap_state_process() always takes an extra reference to the skb, but sometimes neither llc_sap_next_state() nor llc_sap_state_process() itself drops this reference. Fix it by changing llc_sap_next_state() to never consume a reference to the skb, rather than sometimes do so and sometimes not. Then remove the extra skb_get() and kfree_skb() from llc_sap_state_process(). Reported-by: syzbot+6bf095f9becf5efef645@syzkaller.appspotmail.com Reported-by: syzbot+31c16aa4202dace3812e@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Signed-off-by: Jakub Kicinski --- net/llc/llc_s_ac.c | 12 +++++++++--- net/llc/llc_sap.c | 23 ++++++++--------------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index a94bd56bcac6..7ae4cc684d3a 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -58,8 +58,10 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) + if (likely(!rc)) { + skb_get(skb); rc = dev_queue_xmit(skb); + } return rc; } @@ -81,8 +83,10 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) + if (likely(!rc)) { + skb_get(skb); rc = dev_queue_xmit(skb); + } return rc; } @@ -135,8 +139,10 @@ int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_test_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) + if (likely(!rc)) { + skb_get(skb); rc = dev_queue_xmit(skb); + } return rc; } diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index a7f7b8ff4729..be419062e19a 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -197,29 +197,22 @@ out: * After executing actions of the event, upper layer will be indicated * if needed(on receiving an UI frame). sk can be null for the * datalink_proto case. + * + * This function always consumes a reference to the skb. */ static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - /* - * We have to hold the skb, because llc_sap_next_state - * will kfree it in the sending path and we need to - * look at the skb->cb, where we encode llc_sap_state_ev. - */ - skb_get(skb); ev->ind_cfm_flag = 0; llc_sap_next_state(sap, skb); - if (ev->ind_cfm_flag == LLC_IND) { - if (skb->sk->sk_state == TCP_LISTEN) - kfree_skb(skb); - else { - llc_save_primitive(skb->sk, skb, ev->prim); - /* queue skb to the user. */ - if (sock_queue_rcv_skb(skb->sk, skb)) - kfree_skb(skb); - } + if (ev->ind_cfm_flag == LLC_IND && skb->sk->sk_state != TCP_LISTEN) { + llc_save_primitive(skb->sk, skb, ev->prim); + + /* queue skb to the user. */ + if (sock_queue_rcv_skb(skb->sk, skb) == 0) + return; } kfree_skb(skb); } From b74555de21acd791f12c4a1aeaf653dd7ac21133 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Oct 2019 14:24:25 -0700 Subject: [PATCH 114/572] llc: fix sk_buff leak in llc_conn_service() syzbot reported: BUG: memory leak unreferenced object 0xffff88811eb3de00 (size 224): comm "syz-executor559", pid 7315, jiffies 4294943019 (age 10.300s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 a0 38 24 81 88 ff ff 00 c0 f2 15 81 88 ff ff ..8$............ backtrace: [<000000008d1c66a1>] kmemleak_alloc_recursive include/linux/kmemleak.h:55 [inline] [<000000008d1c66a1>] slab_post_alloc_hook mm/slab.h:439 [inline] [<000000008d1c66a1>] slab_alloc_node mm/slab.c:3269 [inline] [<000000008d1c66a1>] kmem_cache_alloc_node+0x153/0x2a0 mm/slab.c:3579 [<00000000447d9496>] __alloc_skb+0x6e/0x210 net/core/skbuff.c:198 [<000000000cdbf82f>] alloc_skb include/linux/skbuff.h:1058 [inline] [<000000000cdbf82f>] llc_alloc_frame+0x66/0x110 net/llc/llc_sap.c:54 [<000000002418b52e>] llc_conn_ac_send_sabme_cmd_p_set_x+0x2f/0x140 net/llc/llc_c_ac.c:777 [<000000001372ae17>] llc_exec_conn_trans_actions net/llc/llc_conn.c:475 [inline] [<000000001372ae17>] llc_conn_service net/llc/llc_conn.c:400 [inline] [<000000001372ae17>] llc_conn_state_process+0x1ac/0x640 net/llc/llc_conn.c:75 [<00000000f27e53c1>] llc_establish_connection+0x110/0x170 net/llc/llc_if.c:109 [<00000000291b2ca0>] llc_ui_connect+0x10e/0x370 net/llc/af_llc.c:477 [<000000000f9c740b>] __sys_connect+0x11d/0x170 net/socket.c:1840 [...] The bug is that most callers of llc_conn_send_pdu() assume it consumes a reference to the skb, when actually due to commit b85ab56c3f81 ("llc: properly handle dev_queue_xmit() return value") it doesn't. Revert most of that commit, and instead make the few places that need llc_conn_send_pdu() to *not* consume a reference call skb_get() before. Fixes: b85ab56c3f81 ("llc: properly handle dev_queue_xmit() return value") Reported-by: syzbot+6b825a6494a04cc0e3f7@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Signed-off-by: Jakub Kicinski --- include/net/llc_conn.h | 2 +- net/llc/llc_c_ac.c | 8 ++++++-- net/llc/llc_conn.c | 32 +++++++++----------------------- 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index df528a623548..ea985aa7a6c5 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -104,7 +104,7 @@ void llc_sk_reset(struct sock *sk); /* Access to a connection */ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb); -int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); +void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit); void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 4d78375f9872..647c0554d04c 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -372,6 +372,7 @@ int llc_conn_ac_send_i_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_i_cmd(skb, 1, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { + skb_get(skb); llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -389,7 +390,8 @@ static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { - rc = llc_conn_send_pdu(sk, skb); + skb_get(skb); + llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } return rc; @@ -406,6 +408,7 @@ int llc_conn_ac_send_i_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { + skb_get(skb); llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -916,7 +919,8 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk, llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { - rc = llc_conn_send_pdu(sk, skb); + skb_get(skb); + llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } return rc; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 4ff89cb7c86f..ed2aca12460c 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -30,7 +30,7 @@ #endif static int llc_find_offset(int state, int ev_type); -static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *skb); +static void llc_conn_send_pdus(struct sock *sk); static int llc_conn_service(struct sock *sk, struct sk_buff *skb); static int llc_exec_conn_trans_actions(struct sock *sk, struct llc_conn_state_trans *trans, @@ -193,11 +193,11 @@ out_skb_put: return rc; } -int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb) +void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb) { /* queue PDU to send to MAC layer */ skb_queue_tail(&sk->sk_write_queue, skb); - return llc_conn_send_pdus(sk, skb); + llc_conn_send_pdus(sk); } /** @@ -255,7 +255,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit) if (howmany_resend > 0) llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO; /* any PDUs to re-send are queued up; start sending to MAC */ - llc_conn_send_pdus(sk, NULL); + llc_conn_send_pdus(sk); out:; } @@ -296,7 +296,7 @@ void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit) if (howmany_resend > 0) llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO; /* any PDUs to re-send are queued up; start sending to MAC */ - llc_conn_send_pdus(sk, NULL); + llc_conn_send_pdus(sk); out:; } @@ -340,16 +340,12 @@ out: /** * llc_conn_send_pdus - Sends queued PDUs * @sk: active connection - * @hold_skb: the skb held by caller, or NULL if does not care * - * Sends queued pdus to MAC layer for transmission. When @hold_skb is - * NULL, always return 0. Otherwise, return 0 if @hold_skb is sent - * successfully, or 1 for failure. + * Sends queued pdus to MAC layer for transmission. */ -static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb) +static void llc_conn_send_pdus(struct sock *sk) { struct sk_buff *skb; - int ret = 0; while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) { struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); @@ -361,20 +357,10 @@ static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb) skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb); if (!skb2) break; - dev_queue_xmit(skb2); - } else { - bool is_target = skb == hold_skb; - int rc; - - if (is_target) - skb_get(skb); - rc = dev_queue_xmit(skb); - if (is_target) - ret = rc; + skb = skb2; } + dev_queue_xmit(skb); } - - return ret; } /** From fc8d5db10cbe1338a52ebc74e7feab9276721774 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Oct 2019 14:24:26 -0700 Subject: [PATCH 115/572] llc: fix another potential sk_buff leak in llc_ui_sendmsg() All callers of llc_conn_state_process() except llc_build_and_send_pkt() (via llc_ui_sendmsg() -> llc_ui_send_data()) assume that it always consumes a reference to the skb. Fix this caller to do the same. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Signed-off-by: Jakub Kicinski --- net/llc/af_llc.c | 34 ++++++++++++++++++++-------------- net/llc/llc_conn.c | 2 ++ net/llc/llc_if.c | 12 ++++++++---- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2017b7d780f5..c74f44dfaa22 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -113,22 +113,26 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr) * * Send data via reliable llc2 connection. * Returns 0 upon success, non-zero if action did not succeed. + * + * This function always consumes a reference to the skb. */ static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock) { struct llc_sock* llc = llc_sk(sk); - int rc = 0; if (unlikely(llc_data_accept_state(llc->state) || llc->remote_busy_flag || llc->p_flag)) { long timeout = sock_sndtimeo(sk, noblock); + int rc; rc = llc_ui_wait_for_busy_core(sk, timeout); + if (rc) { + kfree_skb(skb); + return rc; + } } - if (unlikely(!rc)) - rc = llc_build_and_send_pkt(sk, skb); - return rc; + return llc_build_and_send_pkt(sk, skb); } static void llc_ui_sk_init(struct socket *sock, struct sock *sk) @@ -899,7 +903,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; - struct sk_buff *skb; + struct sk_buff *skb = NULL; size_t size = 0; int rc = -EINVAL, copied = 0, hdrlen; @@ -908,10 +912,10 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) lock_sock(sk); if (addr) { if (msg->msg_namelen < sizeof(*addr)) - goto release; + goto out; } else { if (llc_ui_addr_null(&llc->addr)) - goto release; + goto out; addr = &llc->addr; } /* must bind connection to sap if user hasn't done it. */ @@ -919,7 +923,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) /* bind to sap with null dev, exclusive. */ rc = llc_ui_autobind(sock, addr); if (rc) - goto release; + goto out; } hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr); size = hdrlen + len; @@ -928,12 +932,12 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) copied = size - hdrlen; rc = -EINVAL; if (copied < 0) - goto release; + goto out; release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); lock_sock(sk); if (!skb) - goto release; + goto out; skb->dev = llc->dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); skb_reserve(skb, hdrlen); @@ -943,29 +947,31 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) { llc_build_and_send_ui_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); + skb = NULL; goto out; } if (addr->sllc_test) { llc_build_and_send_test_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); + skb = NULL; goto out; } if (addr->sllc_xid) { llc_build_and_send_xid_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); + skb = NULL; goto out; } rc = -ENOPROTOOPT; if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua)) goto out; rc = llc_ui_send_data(sk, skb, noblock); + skb = NULL; out: - if (rc) { - kfree_skb(skb); -release: + kfree_skb(skb); + if (rc) dprintk("%s: failed sending from %02X to %02X: %d\n", __func__, llc->laddr.lsap, llc->daddr.lsap, rc); - } release_sock(sk); return rc ? : copied; } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index ed2aca12460c..0b0c6f12153b 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -55,6 +55,8 @@ int sysctl_llc2_busy_timeout = LLC2_BUSY_TIME * HZ; * (executing it's actions and changing state), upper layer will be * indicated or confirmed, if needed. Returns 0 for success, 1 for * failure. The socket lock has to be held before calling this function. + * + * This function always consumes a reference to the skb. */ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) { diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index 8db03c2d5440..ad6547736c21 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -38,6 +38,8 @@ * closed and -EBUSY when sending data is not permitted in this state or * LLC has send an I pdu with p bit set to 1 and is waiting for it's * response. + * + * This function always consumes a reference to the skb. */ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb) { @@ -46,20 +48,22 @@ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb) struct llc_sock *llc = llc_sk(sk); if (unlikely(llc->state == LLC_CONN_STATE_ADM)) - goto out; + goto out_free; rc = -EBUSY; if (unlikely(llc_data_accept_state(llc->state) || /* data_conn_refuse */ llc->p_flag)) { llc->failed_data_req = 1; - goto out; + goto out_free; } ev = llc_conn_ev(skb); ev->type = LLC_CONN_EV_TYPE_PRIM; ev->prim = LLC_DATA_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; skb->dev = llc->dev; - rc = llc_conn_state_process(sk, skb); -out: + return llc_conn_state_process(sk, skb); + +out_free: + kfree_skb(skb); return rc; } From 36453c852816f19947ca482a595dffdd2efa4965 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Oct 2019 14:24:27 -0700 Subject: [PATCH 116/572] llc: fix sk_buff refcounting in llc_conn_state_process() If llc_conn_state_process() sees that llc_conn_service() put the skb on a list, it will drop one fewer references to it. This is wrong because the current behavior is that llc_conn_service() never consumes a reference to the skb. The code also makes the number of skb references being dropped conditional on which of ind_prim and cfm_prim are nonzero, yet neither of these affects how many references are *acquired*. So there is extra code that tries to fix this up by sometimes taking another reference. Remove the unnecessary/broken refcounting logic and instead just add an skb_get() before the only two places where an extra reference is actually consumed. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Signed-off-by: Jakub Kicinski --- net/llc/llc_conn.c | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 0b0c6f12153b..a79b739eb223 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -64,12 +64,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) struct llc_sock *llc = llc_sk(skb->sk); struct llc_conn_state_ev *ev = llc_conn_ev(skb); - /* - * We have to hold the skb, because llc_conn_service will kfree it in - * the sending path and we need to look at the skb->cb, where we encode - * llc_conn_state_ev. - */ - skb_get(skb); ev->ind_prim = ev->cfm_prim = 0; /* * Send event to state machine @@ -77,21 +71,12 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) rc = llc_conn_service(skb->sk, skb); if (unlikely(rc != 0)) { printk(KERN_ERR "%s: llc_conn_service failed\n", __func__); - goto out_kfree_skb; - } - - if (unlikely(!ev->ind_prim && !ev->cfm_prim)) { - /* indicate or confirm not required */ - if (!skb->next) - goto out_kfree_skb; goto out_skb_put; } - if (unlikely(ev->ind_prim && ev->cfm_prim)) /* Paranoia */ - skb_get(skb); - switch (ev->ind_prim) { case LLC_DATA_PRIM: + skb_get(skb); llc_save_primitive(sk, skb, LLC_DATA_PRIM); if (unlikely(sock_queue_rcv_skb(sk, skb))) { /* @@ -108,6 +93,7 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) * skb->sk pointing to the newly created struct sock in * llc_conn_handler. -acme */ + skb_get(skb); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_state_change(sk); break; @@ -123,7 +109,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) sk->sk_state_change(sk); } } - kfree_skb(skb); sock_put(sk); break; case LLC_RESET_PRIM: @@ -132,14 +117,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) * RESET is not being notified to upper layers for now */ printk(KERN_INFO "%s: received a reset ind!\n", __func__); - kfree_skb(skb); break; default: - if (ev->ind_prim) { + if (ev->ind_prim) printk(KERN_INFO "%s: received unknown %d prim!\n", __func__, ev->ind_prim); - kfree_skb(skb); - } /* No indication */ break; } @@ -181,15 +163,12 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) printk(KERN_INFO "%s: received a reset conf!\n", __func__); break; default: - if (ev->cfm_prim) { + if (ev->cfm_prim) printk(KERN_INFO "%s: received unknown %d prim!\n", __func__, ev->cfm_prim); - break; - } - goto out_skb_put; /* No confirmation */ + /* No confirmation */ + break; } -out_kfree_skb: - kfree_skb(skb); out_skb_put: kfree_skb(skb); return rc; From fd418b01fe26c2430b1091675cceb3ab2b52e1e0 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 8 Oct 2019 15:10:44 +0200 Subject: [PATCH 117/572] selftests/bpf: Set rp_filter in test_flow_dissector Many distributions enable rp_filter. However, the flow dissector test generates packets that have 1.1.1.1 set as (inner) source address without this address being reachable. This causes the selftest to fail. The selftests should not assume a particular initial configuration. Switch off rp_filter. Fixes: 50b3ed57dee9 ("selftests/bpf: test bpf flow dissection") Signed-off-by: Jiri Benc Signed-off-by: Daniel Borkmann Acked-by: Petar Penkov Link: https://lore.kernel.org/bpf/513a298f53e99561d2f70b2e60e2858ea6cda754.1570539863.git.jbenc@redhat.com --- tools/testing/selftests/bpf/test_flow_dissector.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index d23d4da66b83..e2d06191bd35 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -63,6 +63,9 @@ fi # Setup tc qdisc add dev lo ingress +echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter +echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter +echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter echo "Testing IPv4..." # Drops all IP/UDP packets coming from port 9 From 106c35dda32f8b63f88cad7433f1b8bb0056958a Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 8 Oct 2019 15:10:45 +0200 Subject: [PATCH 118/572] selftests/bpf: More compatible nc options in test_lwt_ip_encap Out of the three nc implementations widely in use, at least two (BSD netcat and nmap-ncat) do not support -l combined with -s. Modify the nc invocation to be accepted by all of them. Fixes: 17a90a788473 ("selftests/bpf: test that GSO works in lwt_ip_encap") Signed-off-by: Jiri Benc Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/9f177682c387f3f943bb64d849e6c6774df3c5b4.1570539863.git.jbenc@redhat.com --- tools/testing/selftests/bpf/test_lwt_ip_encap.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index acf7a74f97cd..59ea56945e6c 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -314,15 +314,15 @@ test_gso() command -v nc >/dev/null 2>&1 || \ { echo >&2 "nc is not available: skipping TSO tests"; return; } - # listen on IPv*_DST, capture TCP into $TMPFILE + # listen on port 9000, capture TCP into $TMPFILE if [ "${PROTO}" == "IPv4" ] ; then IP_DST=${IPv4_DST} ip netns exec ${NS3} bash -c \ - "nc -4 -l -s ${IPv4_DST} -p 9000 > ${TMPFILE} &" + "nc -4 -l -p 9000 > ${TMPFILE} &" elif [ "${PROTO}" == "IPv6" ] ; then IP_DST=${IPv6_DST} ip netns exec ${NS3} bash -c \ - "nc -6 -l -s ${IPv6_DST} -p 9000 > ${TMPFILE} &" + "nc -6 -l -p 9000 > ${TMPFILE} &" RET=$? else echo " test_gso: unknown PROTO: ${PROTO}" From 0041412694eca70387aee4076254fbed8222700a Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Mon, 7 Oct 2019 16:13:25 +0300 Subject: [PATCH 119/572] net/mlx5: DR, Allow insertion of duplicate rules Duplicate rules were not allowed to be configured with SW steering. This restriction caused failures with the replace rule logic done by upper layers. This fix allows for multiple rules with the same match values, in such case the first inserted rules will match. Fixes: 41d07074154c ("net/mlx5: DR, Expose steering rule functionality") Signed-off-by: Alex Vesker Signed-off-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/steering/dr_rule.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 4187f2b112b8..e8b656075c6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -788,12 +788,10 @@ again: * it means that all the previous stes are the same, * if so, this rule is duplicated. */ - if (mlx5dr_ste_is_last_in_rule(nic_matcher, - matched_ste->ste_chain_location)) { - mlx5dr_info(dmn, "Duplicate rule inserted, aborting!!\n"); - return NULL; - } - return matched_ste; + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste_location)) + return matched_ste; + + mlx5dr_dbg(dmn, "Duplicate rule inserted\n"); } if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) { From 6f96c3c6904c26cea9ca2726d5d8a9b0b8205b3c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 7 Oct 2019 13:26:28 -0700 Subject: [PATCH 120/572] net_sched: fix backward compatibility for TCA_KIND Marcelo noticed a backward compatibility issue of TCA_KIND after we move from NLA_STRING to NLA_NUL_STRING, so it is probably too late to change it. Instead, to make everyone happy, we can just insert a NUL to terminate the string with nla_strlcpy() like we do for TC actions. Fixes: 62794fc4fbf5 ("net_sched: add max len check for TCA_KIND") Reported-by: Marcelo Ricardo Leitner Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sched/cls_api.c | 36 +++++++++++++++++++++++++++++++++--- net/sched/sch_api.c | 3 +-- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 64584a1df425..8717c0b26c90 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -162,11 +162,22 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) return TC_H_MAJ(first); } +static bool tcf_proto_check_kind(struct nlattr *kind, char *name) +{ + if (kind) + return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ; + memset(name, 0, IFNAMSIZ); + return false; +} + static bool tcf_proto_is_unlocked(const char *kind) { const struct tcf_proto_ops *ops; bool ret; + if (strlen(kind) == 0) + return false; + ops = tcf_proto_lookup_ops(kind, false, NULL); /* On error return false to take rtnl lock. Proto lookup/create * functions will perform lookup again and properly handle errors. @@ -1843,6 +1854,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; + char name[IFNAMSIZ]; struct tcmsg *t; u32 protocol; u32 prio; @@ -1899,13 +1911,19 @@ replay: if (err) return err; + if (tcf_proto_check_kind(tca[TCA_KIND], name)) { + NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); + err = -EINVAL; + goto errout; + } + /* Take rtnl mutex if rtnl_held was set to true on previous iteration, * block is shared (no qdisc found), qdisc is not unlocked, classifier * type is not specified, classifier is not unlocked. */ if (rtnl_held || (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || - !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + !tcf_proto_is_unlocked(name)) { rtnl_held = true; rtnl_lock(); } @@ -2063,6 +2081,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; + char name[IFNAMSIZ]; struct tcmsg *t; u32 protocol; u32 prio; @@ -2102,13 +2121,18 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (err) return err; + if (tcf_proto_check_kind(tca[TCA_KIND], name)) { + NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); + err = -EINVAL; + goto errout; + } /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc * found), qdisc is not unlocked, classifier type is not specified, * classifier is not unlocked. */ if (!prio || (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || - !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + !tcf_proto_is_unlocked(name)) { rtnl_held = true; rtnl_lock(); } @@ -2216,6 +2240,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; + char name[IFNAMSIZ]; struct tcmsg *t; u32 protocol; u32 prio; @@ -2252,12 +2277,17 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (err) return err; + if (tcf_proto_check_kind(tca[TCA_KIND], name)) { + NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); + err = -EINVAL; + goto errout; + } /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not * unlocked, classifier type is not specified, classifier is not * unlocked. */ if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || - !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + !tcf_proto_is_unlocked(name)) { rtnl_held = true; rtnl_lock(); } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 81d58b280612..1047825d9f48 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1390,8 +1390,7 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) } const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { - [TCA_KIND] = { .type = NLA_NUL_STRING, - .len = IFNAMSIZ - 1 }, + [TCA_KIND] = { .type = NLA_STRING }, [TCA_RATE] = { .type = NLA_BINARY, .len = sizeof(struct tc_estimator) }, [TCA_STAB] = { .type = NLA_NESTED }, From 4b793feccae3b06764268377a4030eb774ed924e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 7 Oct 2019 13:26:29 -0700 Subject: [PATCH 121/572] net_sched: fix backward compatibility for TCA_ACT_KIND For TCA_ACT_KIND, we have to keep the backward compatibility too, and rely on nla_strlcpy() to check and terminate the string with a NUL. Note for TC actions, nla_strcmp() is already used to compare kind strings, so we don't need to fix other places. Fixes: 199ce850ce11 ("net_sched: add policy validation for action attributes") Reported-by: Marcelo Ricardo Leitner Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sched/act_api.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2558f00f6b3e..4e7429c6f864 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -832,8 +832,7 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) } static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { - [TCA_ACT_KIND] = { .type = NLA_NUL_STRING, - .len = IFNAMSIZ - 1 }, + [TCA_ACT_KIND] = { .type = NLA_STRING }, [TCA_ACT_INDEX] = { .type = NLA_U32 }, [TCA_ACT_COOKIE] = { .type = NLA_BINARY, .len = TC_COOKIE_MAX_SIZE }, @@ -865,8 +864,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, NL_SET_ERR_MSG(extack, "TC action kind must be specified"); goto err_out; } - nla_strlcpy(act_name, kind, IFNAMSIZ); - + if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) { + NL_SET_ERR_MSG(extack, "TC action name too long"); + goto err_out; + } if (tb[TCA_ACT_COOKIE]) { cookie = nla_memdup_cookie(tb); if (!cookie) { From bcf059578980526d6067a2b1b0cd8344b58bf2d5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 8 Oct 2019 16:40:33 -0700 Subject: [PATCH 122/572] Input: soc_button_array - partial revert of support for newer surface devices Commit c394159310d0 ("Input: soc_button_array - add support for newer surface devices") not only added support for the MSHW0040 ACPI HID, but for some reason it also makes changes to the error handling of the soc_button_lookup_gpio() call in soc_button_device_create(). Note ideally this seamingly unrelated change would have been made in a separate commit, with a message explaining the what and why of this change. I guess this change may have been added to deal with -EPROBE_DEFER errors, but in case of the existing support for PNP0C40 devices, treating -EPROBE_DEFER as any other error is deliberate, see the comment this commit adds for why. The actual returning of -EPROBE_DEFER to the caller of soc_button_probe() introduced by the new error checking causes a serious regression: On devices with so called virtual GPIOs soc_button_lookup_gpio() will always return -EPROBE_DEFER for these fake GPIOs, when this happens during the second call of soc_button_device_create() we already have successfully registered our first child. This causes the kernel to think we are making progress with probing things even though we unregister the child before again before we return the -EPROBE_DEFER. Since we are making progress the kernel will retry deferred-probes again immediately ending up stuck in a loop with the following showing in dmesg: [ 124.022697] input: gpio-keys as /devices/platform/INTCFD9:00/gpio-keys.0.auto/input/input6537 [ 124.040764] input: gpio-keys as /devices/platform/INTCFD9:00/gpio-keys.0.auto/input/input6538 [ 124.056967] input: gpio-keys as /devices/platform/INTCFD9:00/gpio-keys.0.auto/input/input6539 [ 124.072143] input: gpio-keys as /devices/platform/INTCFD9:00/gpio-keys.0.auto/input/input6540 [ 124.092373] input: gpio-keys as /devices/platform/INTCFD9:00/gpio-keys.0.auto/input/input6541 [ 124.108065] input: gpio-keys as /devices/platform/INTCFD9:00/gpio-keys.0.auto/input/input6542 [ 124.128483] input: gpio-keys as /devices/platform/INTCFD9:00/gpio-keys.0.auto/input/input6543 [ 124.147141] input: gpio-keys as /devices/platform/INTCFD9:00/gpio-keys.0.auto/input/input6544 [ 124.165070] input: gpio-keys as /devices/platform/INTCFD9:00/gpio-keys.0.auto/input/input6545 [ 124.179775] input: gpio-keys as /devices/platform/INTCFD9:00/gpio-keys.0.auto/input/input6546 [ 124.202726] input: gpio-keys as /devices/platform/INTCFD9:00/gpio-keys.0.auto/input/input6547 And 1 CPU core being stuck at 100% and udev hanging since it is waiting for the modprobe of soc_button_array to return. This patch reverts the soc_button_lookup_gpio() error handling changes, fixing this regression. Fixes: c394159310d0 ("Input: soc_button_array - add support for newer surface devices") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=205031 Signed-off-by: Hans de Goede Tested-by: Maximilian Luz Acked-by: Maximilian Luz Link: https://lore.kernel.org/r/20191005105551.353273-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/soc_button_array.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index 97e3639e99d0..08520b3a18b8 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -92,11 +92,18 @@ soc_button_device_create(struct platform_device *pdev, continue; gpio = soc_button_lookup_gpio(&pdev->dev, info->acpi_index); - if (gpio < 0 && gpio != -ENOENT) { - error = gpio; - goto err_free_mem; - } else if (!gpio_is_valid(gpio)) { - /* Skip GPIO if not present */ + if (!gpio_is_valid(gpio)) { + /* + * Skip GPIO if not present. Note we deliberately + * ignore -EPROBE_DEFER errors here. On some devices + * Intel is using so called virtual GPIOs which are not + * GPIOs at all but some way for AML code to check some + * random status bits without need a custom opregion. + * In some cases the resources table we parse points to + * such a virtual GPIO, since these are not real GPIOs + * we do not have a driver for these so they will never + * show up, therefore we ignore -EPROBE_DEFER. + */ continue; } From 6af3aa57a0984e061f61308fe181a9a12359fecc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 7 Oct 2019 18:40:59 +0200 Subject: [PATCH 123/572] NFC: pn533: fix use-after-free and memleaks The driver would fail to deregister and its class device and free related resources on late probe errors. Reported-by: syzbot+cb035c75c03dbe34b796@syzkaller.appspotmail.com Fixes: 32ecc75ded72 ("NFC: pn533: change order operations in dev registation") Signed-off-by: Johan Hovold Signed-off-by: Jakub Kicinski --- drivers/nfc/pn533/usb.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index c5289eaf17ee..e897e4d768ef 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -547,18 +547,25 @@ static int pn533_usb_probe(struct usb_interface *interface, rc = pn533_finalize_setup(priv); if (rc) - goto error; + goto err_deregister; usb_set_intfdata(interface, phy); return 0; +err_deregister: + pn533_unregister_device(phy->priv); error: + usb_kill_urb(phy->in_urb); + usb_kill_urb(phy->out_urb); + usb_kill_urb(phy->ack_urb); + usb_free_urb(phy->in_urb); usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); usb_put_dev(phy->udev); kfree(in_buf); + kfree(phy->ack_buffer); return rc; } From b82316d25522e8ee748b8cc9d8aadeb3c135c6fe Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 8 Oct 2019 08:35:51 -0700 Subject: [PATCH 124/572] Doc: networking/device_drivers/pensando: fix ionic.rst warnings Fix documentation build warnings for Pensando ionic: Documentation/networking/device_drivers/pensando/ionic.rst:39: WARNING: Unexpected indentation. Documentation/networking/device_drivers/pensando/ionic.rst:43: WARNING: Unexpected indentation. Fixes: df69ba43217d ("ionic: Add basic framework for IONIC Network device driver") Signed-off-by: Randy Dunlap Acked-by: Shannon Nelson Signed-off-by: Jakub Kicinski --- Documentation/networking/device_drivers/pensando/ionic.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/device_drivers/pensando/ionic.rst b/Documentation/networking/device_drivers/pensando/ionic.rst index 67b6839d516b..13935896bee6 100644 --- a/Documentation/networking/device_drivers/pensando/ionic.rst +++ b/Documentation/networking/device_drivers/pensando/ionic.rst @@ -36,8 +36,10 @@ Support ======= For general Linux networking support, please use the netdev mailing list, which is monitored by Pensando personnel:: + netdev@vger.kernel.org For more specific support needs, please use the Pensando driver support email:: - drivers@pensando.io + + drivers@pensando.io From 4123f637a5129470ff9d3cb00a5a4e213f2e15cc Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Tue, 8 Oct 2019 17:56:03 +0800 Subject: [PATCH 125/572] ip6erspan: remove the incorrect mtu limit for ip6erspan ip6erspan driver calls ether_setup(), after commit 61e84623ace3 ("net: centralize net_device min/max MTU checking"), the range of mtu is [min_mtu, max_mtu], which is [68, 1500] by default. It causes the dev mtu of the erspan device to not be greater than 1500, this limit value is not correct for ip6erspan tap device. Fixes: 61e84623ace3 ("net: centralize net_device min/max MTU checking") Signed-off-by: Haishuang Yan Acked-by: William Tu Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d5779d6a6065..787d9f2a6e99 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2192,6 +2192,7 @@ static void ip6erspan_tap_setup(struct net_device *dev) { ether_setup(dev); + dev->max_mtu = 0; dev->netdev_ops = &ip6erspan_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6gre_dev_free; From 187c195ac57fbb84a84ca07e93c6f16fd8edfbe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 8 Oct 2019 16:59:44 +0200 Subject: [PATCH 126/572] arm64: dts: armada-3720-turris-mox: convert usb-phy to phy-supply MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update Turris Mox device tree to use the phy-supply property of the generic PHY framework instead of the legacy usb-phy property. This is needed since it caused a regression on Turris Mox since "usb: host: xhci-plat: Prevent an abnormally restrictive PHY init skipping". Signed-off-by: Marek Behún Fixes: eb6c2eb6c7fb ("usb: host: xhci-plat: Prevent an abnormally restrictive PHY init skipping") Reviewed-by: Miquel Raynal Cc: Andrew Lunn Signed-off-by: Gregory CLEMENT --- .../boot/dts/marvell/armada-3720-turris-mox.dts | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index d105986c6be1..5f350cc71a2f 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -60,11 +60,6 @@ gpio = <&gpiosb 0 GPIO_ACTIVE_HIGH>; }; - usb3_phy: usb3-phy { - compatible = "usb-nop-xceiv"; - vcc-supply = <&exp_usb3_vbus>; - }; - vsdc_reg: vsdc-reg { compatible = "regulator-gpio"; regulator-name = "vsdc"; @@ -255,10 +250,16 @@ status = "okay"; }; +&comphy2 { + connector { + compatible = "usb-a-connector"; + phy-supply = <&exp_usb3_vbus>; + }; +}; + &usb3 { status = "okay"; phys = <&comphy2 0>; - usb-phy = <&usb3_phy>; }; &mdio { From b835d6953009dc350d61402a854b5a7178d8c615 Mon Sep 17 00:00:00 2001 From: Patrick Williams Date: Tue, 1 Oct 2019 10:51:38 -0500 Subject: [PATCH 127/572] pinctrl: armada-37xx: swap polarity on LED group The configuration registers for the LED group have inverted polarity, which puts the GPIO into open-drain state when used in GPIO mode. Switch to '0' for GPIO and '1' for LED modes. Fixes: 87466ccd9401 ("pinctrl: armada-37xx: Add pin controller support for Armada 37xx") Signed-off-by: Patrick Williams Cc: Link: https://lore.kernel.org/r/20191001155154.99710-1-alpawi@amazon.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index 34c1fee52cbe..f2f5fcd9a237 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -183,10 +183,10 @@ static struct armada_37xx_pin_group armada_37xx_nb_groups[] = { PIN_GRP_EXTRA("uart2", 9, 2, BIT(1) | BIT(13) | BIT(14) | BIT(19), BIT(1) | BIT(13) | BIT(14), BIT(1) | BIT(19), 18, 2, "gpio", "uart"), - PIN_GRP_GPIO("led0_od", 11, 1, BIT(20), "led"), - PIN_GRP_GPIO("led1_od", 12, 1, BIT(21), "led"), - PIN_GRP_GPIO("led2_od", 13, 1, BIT(22), "led"), - PIN_GRP_GPIO("led3_od", 14, 1, BIT(23), "led"), + PIN_GRP_GPIO_2("led0_od", 11, 1, BIT(20), BIT(20), 0, "led"), + PIN_GRP_GPIO_2("led1_od", 12, 1, BIT(21), BIT(21), 0, "led"), + PIN_GRP_GPIO_2("led2_od", 13, 1, BIT(22), BIT(22), 0, "led"), + PIN_GRP_GPIO_2("led3_od", 14, 1, BIT(23), BIT(23), 0, "led"), }; From 19ec6bb80290e496021488084944b77f03a87dd7 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Fri, 4 Oct 2019 15:44:52 +0200 Subject: [PATCH 128/572] mmc: sdhci-iproc: fix spurious interrupts on Multiblock reads with bcm2711 The Raspberry Pi 4 SDHCI hardware seems to automatically issue CMD12 after multiblock reads even when ACMD12 is disabled. This triggers spurious interrupts after the data transfer is done with the following message: mmc1: Got data interrupt 0x00000002 even though no data operation was in progress. mmc1: sdhci: ============ SDHCI REGISTER DUMP =========== mmc1: sdhci: Sys addr: 0x00000000 | Version: 0x00001002 mmc1: sdhci: Blk size: 0x00007200 | Blk cnt: 0x00000000 mmc1: sdhci: Argument: 0x00000000 | Trn mode: 0x00000033 mmc1: sdhci: Present: 0x1fff0000 | Host ctl: 0x00000017 mmc1: sdhci: Power: 0x0000000f | Blk gap: 0x00000080 mmc1: sdhci: Wake-up: 0x00000000 | Clock: 0x00000107 mmc1: sdhci: Timeout: 0x00000000 | Int stat: 0x00000000 mmc1: sdhci: Int enab: 0x03ff100b | Sig enab: 0x03ff100b mmc1: sdhci: ACmd stat: 0x00000000 | Slot int: 0x00000000 mmc1: sdhci: Caps: 0x45ee6432 | Caps_1: 0x0000a525 mmc1: sdhci: Cmd: 0x00000c1a | Max curr: 0x00080008 mmc1: sdhci: Resp[0]: 0x00000b00 | Resp[1]: 0x00edc87f mmc1: sdhci: Resp[2]: 0x325b5900 | Resp[3]: 0x00400e00 mmc1: sdhci: Host ctl2: 0x00000001 mmc1: sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0xf3025208 mmc1: sdhci: ============================================ Enable SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 to enable ACMD12 on multiblock reads and suppress the spurious interrupts. Fixes: f84e411c85be ("mmc: sdhci-iproc: Add support for emmc2 of the BCM2711") Signed-off-by: Nicolas Saenz Julienne Tested-by: Matthias Brugger Acked-by: Stefan Wahren Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-iproc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index 2b9cdcd1dd9d..f4f5f0a70cda 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -262,6 +262,7 @@ static const struct sdhci_iproc_data bcm2835_data = { }; static const struct sdhci_pltfm_data sdhci_bcm2711_pltfm_data = { + .quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, .ops = &sdhci_iproc_32only_ops, }; From 28c9fac09ab0147158db0baeec630407a5e9b892 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 5 Oct 2019 13:21:01 +0200 Subject: [PATCH 129/572] memstick: jmb38x_ms: Fix an error handling path in 'jmb38x_ms_probe()' If 'jmb38x_ms_count_slots()' returns 0, we must undo the previous 'pci_request_regions()' call. Goto 'err_out_int' to fix it. Fixes: 60fdd931d577 ("memstick: add support for JMicron jmb38x MemoryStick host controller") Cc: stable@vger.kernel.org Signed-off-by: Christophe JAILLET Signed-off-by: Ulf Hansson --- drivers/memstick/host/jmb38x_ms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c index 32747425297d..64fff6abe60e 100644 --- a/drivers/memstick/host/jmb38x_ms.c +++ b/drivers/memstick/host/jmb38x_ms.c @@ -941,7 +941,7 @@ static int jmb38x_ms_probe(struct pci_dev *pdev, if (!cnt) { rc = -ENODEV; pci_dev_busy = 1; - goto err_out; + goto err_out_int; } jm = kzalloc(sizeof(struct jmb38x_ms) From 1fee35d04a42290a3c44b8a4e3c66dbdd6180c42 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 8 Aug 2019 13:52:50 +0300 Subject: [PATCH 130/572] iwlwifi: don't access trans_cfg via cfg We copy cfg->trans to trans->trans_cfg at the very beginning, so don't try to access it via cfg->trans anymore, because the cfg may be unset in later cases. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-io.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.h b/drivers/net/wireless/intel/iwlwifi/iwl-io.h index f8e4f0f5de0c..f09e368c7040 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-io.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.h @@ -112,38 +112,38 @@ int iwl_dump_fh(struct iwl_trans *trans, char **buf); */ static inline u32 iwl_umac_prph(struct iwl_trans *trans, u32 ofs) { - return ofs + trans->cfg->trans.umac_prph_offset; + return ofs + trans->trans_cfg->umac_prph_offset; } static inline u32 iwl_read_umac_prph_no_grab(struct iwl_trans *trans, u32 ofs) { return iwl_read_prph_no_grab(trans, ofs + - trans->cfg->trans.umac_prph_offset); + trans->trans_cfg->umac_prph_offset); } static inline u32 iwl_read_umac_prph(struct iwl_trans *trans, u32 ofs) { - return iwl_read_prph(trans, ofs + trans->cfg->trans.umac_prph_offset); + return iwl_read_prph(trans, ofs + trans->trans_cfg->umac_prph_offset); } static inline void iwl_write_umac_prph_no_grab(struct iwl_trans *trans, u32 ofs, u32 val) { - iwl_write_prph_no_grab(trans, ofs + trans->cfg->trans.umac_prph_offset, + iwl_write_prph_no_grab(trans, ofs + trans->trans_cfg->umac_prph_offset, val); } static inline void iwl_write_umac_prph(struct iwl_trans *trans, u32 ofs, u32 val) { - iwl_write_prph(trans, ofs + trans->cfg->trans.umac_prph_offset, val); + iwl_write_prph(trans, ofs + trans->trans_cfg->umac_prph_offset, val); } static inline int iwl_poll_umac_prph_bit(struct iwl_trans *trans, u32 addr, u32 bits, u32 mask, int timeout) { return iwl_poll_prph_bit(trans, addr + - trans->cfg->trans.umac_prph_offset, + trans->trans_cfg->umac_prph_offset, bits, mask, timeout); } From 3ed83da39aed275a5b74c74f77e85c839ad2efe9 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 23 Aug 2019 11:59:09 +0300 Subject: [PATCH 131/572] iwlwifi: fix ACPI table revision checks We can't check for the ACPI table revision validity in the same if where we check if the package was read correctly, because we return PTR_ERR(pkg) and if the table is not valid but the pointer is, we would return a valid pointer as an error. Fix that by moving the table checks to a separate if and return -EINVAL if it's not valid. Reported-by: Dan Carpenter Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 10 ++++---- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 24 +++++++++++++++----- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 7573af2d88ce..c2db758b9d54 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -162,12 +162,13 @@ int iwl_acpi_get_mcc(struct device *dev, char *mcc) wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_WRDD_WIFI_DATA_SIZE, &tbl_rev); - if (IS_ERR(wifi_pkg) || tbl_rev != 0) { + if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; } - if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { + if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER || + tbl_rev != 0) { ret = -EINVAL; goto out_free; } @@ -224,12 +225,13 @@ int iwl_acpi_get_eckv(struct device *dev, u32 *extl_clk) wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_ECKV_WIFI_DATA_SIZE, &tbl_rev); - if (IS_ERR(wifi_pkg) || tbl_rev != 0) { + if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; } - if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { + if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER || + tbl_rev != 0) { ret = -EINVAL; goto out_free; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 32a5e4e5461f..b2eb18eedf02 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -694,12 +694,13 @@ static int iwl_mvm_sar_get_wrds_table(struct iwl_mvm *mvm) wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data, ACPI_WRDS_WIFI_DATA_SIZE, &tbl_rev); - if (IS_ERR(wifi_pkg) || tbl_rev != 0) { + if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; } - if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { + if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER || + tbl_rev != 0) { ret = -EINVAL; goto out_free; } @@ -731,13 +732,14 @@ static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm) wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data, ACPI_EWRD_WIFI_DATA_SIZE, &tbl_rev); - if (IS_ERR(wifi_pkg) || tbl_rev != 0) { + if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; } if ((wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) || - (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER)) { + (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER) || + tbl_rev != 0) { ret = -EINVAL; goto out_free; } @@ -791,11 +793,16 @@ static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm) wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data, ACPI_WGDS_WIFI_DATA_SIZE, &tbl_rev); - if (IS_ERR(wifi_pkg) || tbl_rev > 1) { + if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; } + if (tbl_rev != 0) { + ret = -EINVAL; + goto out_free; + } + mvm->geo_rev = tbl_rev; for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) { for (j = 0; j < ACPI_GEO_TABLE_SIZE; j++) { @@ -1020,11 +1027,16 @@ static int iwl_mvm_get_ppag_table(struct iwl_mvm *mvm) wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data, ACPI_PPAG_WIFI_DATA_SIZE, &tbl_rev); - if (IS_ERR(wifi_pkg) || tbl_rev != 0) { + if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; } + if (tbl_rev != 0) { + ret = -EINVAL; + goto out_free; + } + enabled = &wifi_pkg->package.elements[1]; if (enabled->type != ACPI_TYPE_INTEGER || (enabled->integer.value != 0 && enabled->integer.value != 1)) { From a4584729291c71c5c14718ff00ea6c5f971c45b2 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Mon, 12 Aug 2019 13:25:42 +0300 Subject: [PATCH 132/572] iwlwifi: mvm: force single phy init The PHY is initialized during device initialization, but devices with the tx_siso_diversity flag set need to send PHY_CONFIGURATION_CMD first, otherwise the PHY would be reinitialized, causing a SYSASSERT. To fix this, use a bit that tells the FW not to complete the PHY initialization before a PHY_CONFIGURATION_CMD is received. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index b2eb18eedf02..0d2229319261 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -420,6 +420,9 @@ static int iwl_run_unified_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) }; int ret; + if (mvm->trans->cfg->tx_with_siso_diversity) + init_cfg.init_flags |= cpu_to_le32(BIT(IWL_INIT_PHY)); + lockdep_assert_held(&mvm->mutex); mvm->rfkill_safe_init_done = false; From a2113cc44d4387a7c3ef3a9082d273524f9230ac Mon Sep 17 00:00:00 2001 From: Naftali Goldstein Date: Mon, 9 Sep 2019 13:11:04 +0300 Subject: [PATCH 133/572] iwlwifi: mvm: fix race in sync rx queue notification Consider the following flow: 1. Driver starts to sync the rx queues due to a delba. mvm->queue_sync_cookie=1. This rx-queues-sync is synchronous, so it doesn't increment the cookie until all rx queues handle the notification from FW. 2. During this time, driver starts to sync rx queues due to nssn sync required. The cookie's value is still 1, but it doesn't matter since this rx-queue-sync is non-synchronous so in the notification handler the cookie is ignored. What _does_ matter is that this flow increments the cookie to 2 immediately. Remember though that the FW won't start servicing this command until it's done with the previous one. 3. FW is still handling the first command, so it sends a notification with internal_notif->sync=1, and internal_notif->cookie=0, which triggers a WARN_ONCE. The solution for this race is to only use the mvm->queue_sync_cookie in case of a synchronous sync-rx-queues. This way in step 2 the cookie's value won't change so we avoid the WARN. The commit in the "fixes" field is the first commit to introduce non-synchronous sending of this command to FW. Fixes: 3c514bf831ac ("iwlwifi: mvm: add a loose synchronization of the NSSN across Rx queues") Signed-off-by: Naftali Goldstein Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index cd1b10042fbf..d31f96c3f925 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4881,11 +4881,11 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, if (!iwl_mvm_has_new_rx_api(mvm)) return; - notif->cookie = mvm->queue_sync_cookie; - - if (notif->sync) + if (notif->sync) { + notif->cookie = mvm->queue_sync_cookie; atomic_set(&mvm->queue_sync_counter, mvm->trans->num_rx_queues); + } ret = iwl_mvm_notify_rx_queue(mvm, qmask, (u8 *)notif, size, !notif->sync); @@ -4905,7 +4905,8 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, out: atomic_set(&mvm->queue_sync_counter, 0); - mvm->queue_sync_cookie++; + if (notif->sync) + mvm->queue_sync_cookie++; } static void iwl_mvm_sync_rx_queues(struct ieee80211_hw *hw) From 08326a97afbe808e50e1610c2835fa2747bdb908 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 4 Sep 2019 16:23:27 +0200 Subject: [PATCH 134/572] iwlwifi: pcie: fix indexing in command dump for new HW We got a crash in iwl_trans_pcie_get_cmdlen(), while the TFD was being accessed to sum up the lengths. We want to access the TFD here, which is the information for the hardware. We always only allocate 32 buffers for the cmd queue, but on newer hardware (using TFH) we can also allocate only a shorter hardware array, also only 32 TFDs. Prior to the TFH, we had to allocate a bigger TFD array but would make those point to a smaller set of buffers. Additionally, now max_tfd_queue_size is up to 65536, so we can access *way* out of bounds of a really only 32-entry array, so it crashes. Fix this by making the TFD index depend on which hardware we are using right now. While changing the calculation, also fix it to not use void ptr arithmetic, but cast to u8 * before. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index f8a1f985a1d8..ab7480a85015 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3272,11 +3272,17 @@ static struct iwl_trans_dump_data ptr = cmdq->write_ptr; for (i = 0; i < cmdq->n_window; i++) { u8 idx = iwl_pcie_get_cmd_index(cmdq, ptr); + u8 tfdidx; u32 caplen, cmdlen; + if (trans->trans_cfg->use_tfh) + tfdidx = idx; + else + tfdidx = ptr; + cmdlen = iwl_trans_pcie_get_cmdlen(trans, - cmdq->tfds + - tfd_size * ptr); + (u8 *)cmdq->tfds + + tfd_size * tfdidx); caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen); if (cmdlen) { From 8188a18ee2e48c9a7461139838048363bfce3fef Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Sep 2019 09:04:09 +0200 Subject: [PATCH 135/572] iwlwifi: pcie: fix rb_allocator workqueue allocation We don't handle failures in the rb_allocator workqueue allocation correctly. To fix that, move the code earlier so the cleanup is easier and we don't have to undo all the interrupt allocations in this case. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index ab7480a85015..6961f00ff812 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3456,6 +3456,15 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, spin_lock_init(&trans_pcie->reg_lock); mutex_init(&trans_pcie->mutex); init_waitqueue_head(&trans_pcie->ucode_write_waitq); + + trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!trans_pcie->rba.alloc_wq) { + ret = -ENOMEM; + goto out_free_trans; + } + INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); + trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page); if (!trans_pcie->tso_hdr_page) { ret = -ENOMEM; @@ -3590,10 +3599,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans_pcie->inta_mask = CSR_INI_SET_MASK; } - trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", - WQ_HIGHPRI | WQ_UNBOUND, 1); - INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); - #ifdef CONFIG_IWLWIFI_DEBUGFS trans_pcie->fw_mon_data.state = IWL_FW_MON_DBGFS_STATE_CLOSED; mutex_init(&trans_pcie->fw_mon_data.mutex); @@ -3605,6 +3610,8 @@ out_free_ict: iwl_pcie_free_ict(trans); out_no_pci: free_percpu(trans_pcie->tso_hdr_page); + destroy_workqueue(trans_pcie->rba.alloc_wq); +out_free_trans: iwl_trans_free(trans); return ERR_PTR(ret); } From b4b814fec1a5a849383f7b3886b654a13abbda7d Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 12 Sep 2019 23:23:27 -0500 Subject: [PATCH 136/572] iwlwifi: dbg_ini: fix memory leak in alloc_sgtable In alloc_sgtable if alloc_page fails, the alocated table should be released. Signed-off-by: Navid Emamdoost Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 5c8602de9168..87421807e040 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -646,6 +646,7 @@ static struct scatterlist *alloc_sgtable(int size) if (new_page) __free_page(new_page); } + kfree(table); return NULL; } alloc_size = min_t(int, size, PAGE_SIZE); From 0f4f199443faca715523b0659aa536251d8b978f Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 27 Sep 2019 15:56:04 -0500 Subject: [PATCH 137/572] iwlwifi: pcie: fix memory leaks in iwl_pcie_ctxt_info_gen3_init In iwl_pcie_ctxt_info_gen3_init there are cases that the allocated dma memory is leaked in case of error. DMA memories prph_scratch, prph_info, and ctxt_info_gen3 are allocated and initialized to be later assigned to trans_pcie. But in any error case before such assignment the allocated memories should be released. First of such error cases happens when iwl_pcie_init_fw_sec fails. Current implementation correctly releases prph_scratch. But in two sunsequent error cases where dma_alloc_coherent may fail, such releases are missing. This commit adds release for prph_scratch when allocation for prph_info fails, and adds releases for prph_scratch and prph_info when allocation for ctxt_info_gen3 fails. Fixes: 2ee824026288 ("iwlwifi: pcie: support context information for 22560 devices") Signed-off-by: Navid Emamdoost Signed-off-by: Luca Coelho --- .../intel/iwlwifi/pcie/ctxt-info-gen3.c | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c index 75fa8a6aafee..74980382e64c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c @@ -107,13 +107,9 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, /* allocate ucode sections in dram and set addresses */ ret = iwl_pcie_init_fw_sec(trans, fw, &prph_scratch->dram); - if (ret) { - dma_free_coherent(trans->dev, - sizeof(*prph_scratch), - prph_scratch, - trans_pcie->prph_scratch_dma_addr); - return ret; - } + if (ret) + goto err_free_prph_scratch; + /* Allocate prph information * currently we don't assign to the prph info anything, but it would get @@ -121,16 +117,20 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, prph_info = dma_alloc_coherent(trans->dev, sizeof(*prph_info), &trans_pcie->prph_info_dma_addr, GFP_KERNEL); - if (!prph_info) - return -ENOMEM; + if (!prph_info) { + ret = -ENOMEM; + goto err_free_prph_scratch; + } /* Allocate context info */ ctxt_info_gen3 = dma_alloc_coherent(trans->dev, sizeof(*ctxt_info_gen3), &trans_pcie->ctxt_info_dma_addr, GFP_KERNEL); - if (!ctxt_info_gen3) - return -ENOMEM; + if (!ctxt_info_gen3) { + ret = -ENOMEM; + goto err_free_prph_info; + } ctxt_info_gen3->prph_info_base_addr = cpu_to_le64(trans_pcie->prph_info_dma_addr); @@ -186,6 +186,20 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, iwl_set_bit(trans, CSR_GP_CNTRL, CSR_AUTO_FUNC_INIT); return 0; + +err_free_prph_info: + dma_free_coherent(trans->dev, + sizeof(*prph_info), + prph_info, + trans_pcie->prph_info_dma_addr); + +err_free_prph_scratch: + dma_free_coherent(trans->dev, + sizeof(*prph_scratch), + prph_scratch, + trans_pcie->prph_scratch_dma_addr); + return ret; + } void iwl_pcie_ctxt_info_gen3_free(struct iwl_trans *trans) From 12e36d98d3e5acf5fc57774e0a15906d55f30cb9 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 8 Oct 2019 13:10:53 +0300 Subject: [PATCH 138/572] iwlwifi: exclude GEO SAR support for 3168 We currently support two NICs in FW version 29, namely 7265D and 3168. Out of these, only 7265D supports GEO SAR, so adjust the function that checks for it accordingly. Signed-off-by: Luca Coelho Fixes: f5a47fae6aa3 ("iwlwifi: mvm: fix version check for GEO_TX_POWER_LIMIT support") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 0d2229319261..d9eb2b286438 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -899,15 +899,17 @@ static bool iwl_mvm_sar_geo_support(struct iwl_mvm *mvm) * firmware versions. Unfortunately, we don't have a TLV API * flag to rely on, so rely on the major version which is in * the first byte of ucode_ver. This was implemented - * initially on version 38 and then backported to29 and 17. - * The intention was to have it in 36 as well, but not all - * 8000 family got this feature enabled. The 8000 family is - * the only one using version 36, so skip this version - * entirely. + * initially on version 38 and then backported to 17. It was + * also backported to 29, but only for 7265D devices. The + * intention was to have it in 36 as well, but not all 8000 + * family got this feature enabled. The 8000 family is the + * only one using version 36, so skip this version entirely. */ return IWL_UCODE_SERIAL(mvm->fw->ucode_ver) >= 38 || - IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 29 || - IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 17; + IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 17 || + (IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 29 && + ((mvm->trans->hw_rev & CSR_HW_REV_TYPE_MSK) == + CSR_HW_REV_TYPE_7265D)); } int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm) From aa0cc7dde17bb6b8cc533bbcfe3f53d70e0dd269 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 8 Oct 2019 13:21:02 +0300 Subject: [PATCH 139/572] iwlwifi: pcie: change qu with jf devices to use qu configuration There were a bunch of devices with qu and jf that were loading the configuration with pu and jf, which is wrong. Fix them all accordingly. Additionally, remove 0x1010 and 0x1210 subsytem IDs from the list, since they are obviously wrong, and 0x0044 and 0x0244, which were duplicate. Cc: stable@vger.kernel.org # 5.1+ Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 274 +++++++++--------- 1 file changed, 137 insertions(+), 137 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index e29c47744ef5..6f4bb7ce71a5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -513,31 +513,33 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x9074, iwl8265_2ac_cfg)}, /* 9000 Series */ - {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0034, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0060, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0064, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x00A0, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x00A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0230, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0234, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0238, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x023C, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0260, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0264, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x02A0, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x02A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x1551, iwl9560_killer_s_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x1552, iwl9560_killer_i_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x40A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x4234, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x42A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x06F0, 0x0030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x0034, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x0038, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, @@ -643,34 +645,34 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2720, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x003C, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0064, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x00A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x00A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0230, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0238, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x023C, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0260, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0264, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x02A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x02A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x1010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x1210, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x30DC, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x4034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x40A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x4234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x42A4, iwl9462_2ac_cfg_soc)}, + + {IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_160_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x0034, iwl9560_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x0038, iwl9560_2ac_160_cfg_shared_clk)}, @@ -726,62 +728,60 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x34F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, {IWL_PCI_DEVICE(0x34F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0060, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0064, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x00A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x00A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0230, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0238, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x023C, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0260, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0264, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x02A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x02A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x1010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x1210, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x3DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x40A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x4234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0060, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0064, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x00A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x00A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0230, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0238, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x023C, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0260, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0264, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x02A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x02A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x1010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x1210, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x43F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x40A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x4234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x42A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + + {IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_160_cfg_soc)}, @@ -821,34 +821,34 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0060, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0064, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x00A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x00A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0230, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0238, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x023C, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0260, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0264, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x02A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x02A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x1010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x1210, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0xA0F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x40A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x4234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x42A4, iwl9462_2ac_cfg_soc)}, + + {IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x0034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x0038, iwl9560_2ac_160_cfg_soc)}, From 598c30dbcc9434706f29a085a8eba4730573bcc2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 2 Oct 2019 16:10:24 -0500 Subject: [PATCH 140/572] drm/amdgpu/powerplay: fix typo in mvdd table setup Polaris and vegam use count for the value rather than level. This looks like a copy paste typo from when the code was adapted from previous asics. I'm not sure that the SMU actually uses this value, so I don't know that it actually is a bug per se. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=108609 Reported-by: Robert Strube Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c | 2 +- drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index dc754447f0dd..23c12018dbc1 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -655,7 +655,7 @@ static int polaris10_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, count = SMU_MAX_SMIO_LEVELS; for (level = 0; level < count; level++) { table->SmioTable2.Pattern[level].Voltage = - PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE); + PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[level].value * VOLTAGE_SCALE); /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/ table->SmioTable2.Pattern[level].Smio = (uint8_t) level; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c index 7c960b07746f..ae18fbcb26fb 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c @@ -456,7 +456,7 @@ static int vegam_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, count = SMU_MAX_SMIO_LEVELS; for (level = 0; level < count; level++) { table->SmioTable2.Pattern[level].Voltage = PP_HOST_TO_SMC_US( - data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE); + data->mvdd_voltage_table.entries[level].value * VOLTAGE_SCALE); /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/ table->SmioTable2.Pattern[level].Smio = (uint8_t) level; From bcab05880f9306e94531b0009c627421db110a74 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 9 Oct 2019 12:19:44 +0100 Subject: [PATCH 141/572] ASoC: msm8916-wcd-digital: add missing MIX2 path for RX1/2 This patch adds missing MIX2 path on RX1/2 which take IIR1 and IIR2 as inputs. Without this patch sound card fails to intialize with below warning: ASoC: no sink widget found for RX1 MIX2 INP1 ASoC: Failed to add route IIR1 -> IIR1 -> RX1 MIX2 INP1 ASoC: no sink widget found for RX2 MIX2 INP1 ASoC: Failed to add route IIR1 -> IIR1 -> RX2 MIX2 INP1 ASoC: no sink widget found for RX1 MIX2 INP1 ASoC: Failed to add route IIR2 -> IIR2 -> RX1 MIX2 INP1 ASoC: no sink widget found for RX2 MIX2 INP1 ASoC: Failed to add route IIR2 -> IIR2 -> RX2 MIX2 INP1 Reported-by: Stephan Gerhold Signed-off-by: Srinivas Kandagatla Tested-by: Stephan Gerhold Link: https://lore.kernel.org/r/20191009111944.28069-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/msm8916-wcd-digital.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c index 9fa5d44fdc79..58b2468fb2a7 100644 --- a/sound/soc/codecs/msm8916-wcd-digital.c +++ b/sound/soc/codecs/msm8916-wcd-digital.c @@ -243,6 +243,10 @@ static const char *const rx_mix1_text[] = { "ZERO", "IIR1", "IIR2", "RX1", "RX2", "RX3" }; +static const char * const rx_mix2_text[] = { + "ZERO", "IIR1", "IIR2" +}; + static const char *const dec_mux_text[] = { "ZERO", "ADC1", "ADC2", "ADC3", "DMIC1", "DMIC2" }; @@ -270,6 +274,16 @@ static const struct soc_enum rx3_mix1_inp_enum[] = { SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX3_B2_CTL, 0, 6, rx_mix1_text), }; +/* RX1 MIX2 */ +static const struct soc_enum rx_mix2_inp1_chain_enum = + SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX1_B3_CTL, + 0, 3, rx_mix2_text); + +/* RX2 MIX2 */ +static const struct soc_enum rx2_mix2_inp1_chain_enum = + SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX2_B3_CTL, + 0, 3, rx_mix2_text); + /* DEC */ static const struct soc_enum dec1_mux_enum = SOC_ENUM_SINGLE( LPASS_CDC_CONN_TX_B1_CTL, 0, 6, dec_mux_text); @@ -309,6 +323,10 @@ static const struct snd_kcontrol_new rx3_mix1_inp2_mux = SOC_DAPM_ENUM( "RX3 MIX1 INP2 Mux", rx3_mix1_inp_enum[1]); static const struct snd_kcontrol_new rx3_mix1_inp3_mux = SOC_DAPM_ENUM( "RX3 MIX1 INP3 Mux", rx3_mix1_inp_enum[2]); +static const struct snd_kcontrol_new rx1_mix2_inp1_mux = SOC_DAPM_ENUM( + "RX1 MIX2 INP1 Mux", rx_mix2_inp1_chain_enum); +static const struct snd_kcontrol_new rx2_mix2_inp1_mux = SOC_DAPM_ENUM( + "RX2 MIX2 INP1 Mux", rx2_mix2_inp1_chain_enum); /* Digital Gain control -38.4 dB to +38.4 dB in 0.3 dB steps */ static const DECLARE_TLV_DB_SCALE(digital_gain, -3840, 30, 0); @@ -740,6 +758,10 @@ static const struct snd_soc_dapm_widget msm8916_wcd_digital_dapm_widgets[] = { &rx3_mix1_inp2_mux), SND_SOC_DAPM_MUX("RX3 MIX1 INP3", SND_SOC_NOPM, 0, 0, &rx3_mix1_inp3_mux), + SND_SOC_DAPM_MUX("RX1 MIX2 INP1", SND_SOC_NOPM, 0, 0, + &rx1_mix2_inp1_mux), + SND_SOC_DAPM_MUX("RX2 MIX2 INP1", SND_SOC_NOPM, 0, 0, + &rx2_mix2_inp1_mux), SND_SOC_DAPM_MUX("CIC1 MUX", SND_SOC_NOPM, 0, 0, &cic1_mux), SND_SOC_DAPM_MUX("CIC2 MUX", SND_SOC_NOPM, 0, 0, &cic2_mux), From af6219590b541418d3192e9bfa03989834ca0e78 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Wed, 9 Oct 2019 18:36:15 +0300 Subject: [PATCH 142/572] ASoC: simple_card_utils.h: Fix potential multiple redefinition error asoc_simple_debug_info and asoc_simple_debug_dai must be static otherwise we might a compilation error if the compiler decides not to inline the given function. Fixes: 0580dde59438686d ("ASoC: simple-card-utils: add asoc_simple_debug_info()") Signed-off-by: Daniel Baluta Link: https://lore.kernel.org/r/20191009153615.32105-3-daniel.baluta@nxp.com Signed-off-by: Mark Brown --- include/sound/simple_card_utils.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h index 985a5f583de4..31f76b6abf71 100644 --- a/include/sound/simple_card_utils.h +++ b/include/sound/simple_card_utils.h @@ -135,9 +135,9 @@ int asoc_simple_init_priv(struct asoc_simple_priv *priv, struct link_info *li); #ifdef DEBUG -inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv, - char *name, - struct asoc_simple_dai *dai) +static inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv, + char *name, + struct asoc_simple_dai *dai) { struct device *dev = simple_priv_to_dev(priv); @@ -167,7 +167,7 @@ inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv, dev_dbg(dev, "%s clk %luHz\n", name, clk_get_rate(dai->clk)); } -inline void asoc_simple_debug_info(struct asoc_simple_priv *priv) +static inline void asoc_simple_debug_info(struct asoc_simple_priv *priv) { struct snd_soc_card *card = simple_priv_to_card(priv); struct device *dev = simple_priv_to_dev(priv); From e963408e8ff439e2b9da20e5399d7dca21462fcc Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 2 Oct 2019 17:45:11 +0100 Subject: [PATCH 143/572] drivers/amba: fix reset control error handling With commit 79bdcb202a35 ("ARM: 8906/1: drivers/amba: add reset control to amba bus probe") it is possible for the the amba bus driver to defer probing the device for its IDs because the reset driver may be probed later. However when a subsequent probe occurs, the call to request_resource() in the driver returns -EBUSY as the driver has not released the resource from the initial probe attempt - or cleaned up any of the preceding actions. Fix this both for the deferred probe case as well as a failure to get the reset. Fixes: 79bdcb202a35 ("ARM: 8906/1: drivers/amba: add reset control to amba bus probe") Reported-by: Dinh Nguyen Tested-by: Dinh Nguyen Signed-off-by: Russell King --- drivers/amba/bus.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index f39f075abff9..fe1523664816 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -409,9 +409,11 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent) */ rstc = of_reset_control_array_get_optional_shared(dev->dev.of_node); if (IS_ERR(rstc)) { - if (PTR_ERR(rstc) != -EPROBE_DEFER) - dev_err(&dev->dev, "Can't get amba reset!\n"); - return PTR_ERR(rstc); + ret = PTR_ERR(rstc); + if (ret != -EPROBE_DEFER) + dev_err(&dev->dev, "can't get reset: %d\n", + ret); + goto err_reset; } reset_control_deassert(rstc); reset_control_put(rstc); @@ -472,6 +474,12 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent) release_resource(&dev->res); err_out: return ret; + + err_reset: + amba_put_disable_pclk(dev); + iounmap(tmp); + dev_pm_domain_detach(&dev->dev, true); + goto err_release; } /* From 087a2b7ec973f6f30f6e7b72cb50b6f7734ffdd2 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 9 Oct 2019 15:11:27 -0700 Subject: [PATCH 144/572] ARM: dts: Use level interrupt for omap4 & 5 wlcore Commit 572cf7d7b07d ("ARM: dts: Improve omap l4per idling with wlcore edge sensitive interrupt") changed wlcore interrupts to use edge interrupt based on what's specified in the wl1835mod.pdf data sheet. However, there are still cases where we can have lost interrupts as described in omap_gpio_unidle(). And using a level interrupt instead of edge interrupt helps as we avoid the check for untriggered GPIO interrupts in omap_gpio_unidle(). And with commit e6818d29ea15 ("gpio: gpio-omap: configure edge detection for level IRQs for idle wakeup") GPIOs idle just fine with level interrupts. Let's change omap4 and 5 wlcore users back to using level interrupt instead of edge interrupt. Let's not change the others as I've only seen this on omap4 and 5, probably because the other SoCs don't have l4per idle independent of the CPUs. Fixes: 572cf7d7b07d ("ARM: dts: Improve omap l4per idling with wlcore edge sensitive interrupt") Depends-on: e6818d29ea15 ("gpio: gpio-omap: configure edge detection for level IRQs for idle wakeup") Cc: Anders Roxell Cc: Eyal Reizer Cc: Guy Mishol Cc: John Stultz Cc: Ulf Hansson Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap4-droid4-xt894.dts | 2 +- arch/arm/boot/dts/omap4-panda-common.dtsi | 2 +- arch/arm/boot/dts/omap4-sdp.dts | 2 +- arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi | 2 +- arch/arm/boot/dts/omap5-board-common.dtsi | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index 4454449de00c..a40fe8d49da6 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -369,7 +369,7 @@ compatible = "ti,wl1285", "ti,wl1283"; reg = <2>; /* gpio_100 with gpmc_wait2 pad as wakeirq */ - interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>, + interrupts-extended = <&gpio4 4 IRQ_TYPE_LEVEL_HIGH>, <&omap4_pmx_core 0x4e>; interrupt-names = "irq", "wakeup"; ref-clock-frequency = <26000000>; diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi index 14be2ecb62b1..55ea8b6189af 100644 --- a/arch/arm/boot/dts/omap4-panda-common.dtsi +++ b/arch/arm/boot/dts/omap4-panda-common.dtsi @@ -474,7 +474,7 @@ compatible = "ti,wl1271"; reg = <2>; /* gpio_53 with gpmc_ncs3 pad as wakeup */ - interrupts-extended = <&gpio2 21 IRQ_TYPE_EDGE_RISING>, + interrupts-extended = <&gpio2 21 IRQ_TYPE_LEVEL_HIGH>, <&omap4_pmx_core 0x3a>; interrupt-names = "irq", "wakeup"; ref-clock-frequency = <38400000>; diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts index 3c274965ff40..91480ac1f328 100644 --- a/arch/arm/boot/dts/omap4-sdp.dts +++ b/arch/arm/boot/dts/omap4-sdp.dts @@ -512,7 +512,7 @@ compatible = "ti,wl1281"; reg = <2>; interrupt-parent = <&gpio1>; - interrupts = <21 IRQ_TYPE_EDGE_RISING>; /* gpio 53 */ + interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; /* gpio 53 */ ref-clock-frequency = <26000000>; tcxo-clock-frequency = <26000000>; }; diff --git a/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi b/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi index 6dbbc9b3229c..d0032213101e 100644 --- a/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi +++ b/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi @@ -69,7 +69,7 @@ compatible = "ti,wl1271"; reg = <2>; interrupt-parent = <&gpio2>; - interrupts = <9 IRQ_TYPE_EDGE_RISING>; /* gpio 41 */ + interrupts = <9 IRQ_TYPE_LEVEL_HIGH>; /* gpio 41 */ ref-clock-frequency = <38400000>; }; }; diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 7fff555ee394..68ac04641bdb 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -362,7 +362,7 @@ pinctrl-names = "default"; pinctrl-0 = <&wlcore_irq_pin>; interrupt-parent = <&gpio1>; - interrupts = <14 IRQ_TYPE_EDGE_RISING>; /* gpio 14 */ + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; /* gpio 14 */ ref-clock-frequency = <26000000>; }; }; From 67e15fa5b487adb9b78a92789eeff2d6ec8f5cee Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 31 Aug 2019 17:01:58 +0100 Subject: [PATCH 145/572] ARM: mm: fix alignment handler faults under memory pressure When the system has high memory pressure, the page containing the instruction may be paged out. Using probe_kernel_address() means that if the page is swapped out, the resulting page fault will not be handled because page faults are disabled by this function. Use get_user() to read the instruction instead. Reported-by: Jing Xiangfeng Fixes: b255188f90e2 ("ARM: fix scheduling while atomic warning in alignment handling code") Signed-off-by: Russell King --- arch/arm/mm/alignment.c | 44 +++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 04b36436cbc0..6587432faf05 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -767,6 +767,36 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, return NULL; } +static int alignment_get_arm(struct pt_regs *regs, u32 *ip, unsigned long *inst) +{ + u32 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = probe_kernel_address(ip, instr); + + *inst = __mem_to_opcode_arm(instr); + + return fault; +} + +static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst) +{ + u16 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = probe_kernel_address(ip, instr); + + *inst = __mem_to_opcode_thumb16(instr); + + return fault; +} + static int do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { @@ -774,10 +804,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) unsigned long instr = 0, instrptr; int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs); unsigned int type; - unsigned int fault; u16 tinstr = 0; int isize = 4; int thumb2_32b = 0; + int fault; if (interrupts_enabled(regs)) local_irq_enable(); @@ -786,15 +816,14 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (thumb_mode(regs)) { u16 *ptr = (u16 *)(instrptr & ~1); - fault = probe_kernel_address(ptr, tinstr); - tinstr = __mem_to_opcode_thumb16(tinstr); + + fault = alignment_get_thumb(regs, ptr, &tinstr); if (!fault) { if (cpu_architecture() >= CPU_ARCH_ARMv7 && IS_T32(tinstr)) { /* Thumb-2 32-bit */ - u16 tinst2 = 0; - fault = probe_kernel_address(ptr + 1, tinst2); - tinst2 = __mem_to_opcode_thumb16(tinst2); + u16 tinst2; + fault = alignment_get_thumb(regs, ptr + 1, &tinst2); instr = __opcode_thumb32_compose(tinstr, tinst2); thumb2_32b = 1; } else { @@ -803,8 +832,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } } } else { - fault = probe_kernel_address((void *)instrptr, instr); - instr = __mem_to_opcode_arm(instr); + fault = alignment_get_arm(regs, (void *)instrptr, &instr); } if (fault) { From 1bb9fb0a147f9db11a34b2469ba5a52fc70b0349 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 6 Sep 2019 16:07:54 +0100 Subject: [PATCH 146/572] ARM: mm: alignment: use "u32" for 32-bit instructions Rather than using "unsigned long", use "u32" for 32-bit instructions in the alignment fault handler. Signed-off-by: Russell King --- arch/arm/mm/alignment.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6587432faf05..788c5cf46de5 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -324,7 +324,7 @@ union offset_union { __put32_unaligned_check("strbt", val, addr) static void -do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs *regs, union offset_union offset) +do_alignment_finish_ldst(unsigned long addr, u32 instr, struct pt_regs *regs, union offset_union offset) { if (!LDST_U_BIT(instr)) offset.un = -offset.un; @@ -337,7 +337,7 @@ do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs } static int -do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *regs) +do_alignment_ldrhstrh(unsigned long addr, u32 instr, struct pt_regs *regs) { unsigned int rd = RD_BITS(instr); @@ -386,8 +386,7 @@ do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *r } static int -do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, - struct pt_regs *regs) +do_alignment_ldrdstrd(unsigned long addr, u32 instr, struct pt_regs *regs) { unsigned int rd = RD_BITS(instr); unsigned int rd2; @@ -449,7 +448,7 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, } static int -do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *regs) +do_alignment_ldrstr(unsigned long addr, u32 instr, struct pt_regs *regs) { unsigned int rd = RD_BITS(instr); @@ -498,7 +497,7 @@ do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *reg * PU = 10 A B */ static int -do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *regs) +do_alignment_ldmstm(unsigned long addr, u32 instr, struct pt_regs *regs) { unsigned int rd, rn, correction, nr_regs, regbits; unsigned long eaddr, newaddr; @@ -539,7 +538,7 @@ do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *reg * processor for us. */ if (addr != eaddr) { - pr_err("LDMSTM: PC = %08lx, instr = %08lx, " + pr_err("LDMSTM: PC = %08lx, instr = %08x, " "addr = %08lx, eaddr = %08lx\n", instruction_pointer(regs), instr, addr, eaddr); show_regs(regs); @@ -716,10 +715,10 @@ thumb2arm(u16 tinstr) * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt) */ static void * -do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, +do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs, union offset_union *poffset) { - unsigned long instr = *pinstr; + u32 instr = *pinstr; u16 tinst1 = (instr >> 16) & 0xffff; u16 tinst2 = instr & 0xffff; @@ -767,7 +766,7 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, return NULL; } -static int alignment_get_arm(struct pt_regs *regs, u32 *ip, unsigned long *inst) +static int alignment_get_arm(struct pt_regs *regs, u32 *ip, u32 *inst) { u32 instr = 0; int fault; @@ -801,9 +800,10 @@ static int do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { union offset_union uninitialized_var(offset); - unsigned long instr = 0, instrptr; - int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs); + unsigned long instrptr; + int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs); unsigned int type; + u32 instr = 0; u16 tinstr = 0; int isize = 4; int thumb2_32b = 0; @@ -954,7 +954,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * Oops, we didn't handle the instruction. */ pr_err("Alignment trap: not handling instruction " - "%0*lx at [<%08lx>]\n", + "%0*x at [<%08lx>]\n", isize << 1, isize == 2 ? tinstr : instr, instrptr); ai_skipped += 1; @@ -964,7 +964,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) ai_user += 1; if (ai_usermode & UM_WARN) - printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx " + printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*x " "Address=0x%08lx FSR 0x%03x\n", current->comm, task_pid_nr(current), instrptr, isize << 1, From 5da202c88f8c355ad79bc2e8eb582e6d433060e7 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Mon, 7 Oct 2019 17:43:04 +0200 Subject: [PATCH 147/572] net: stmmac: fix length of PTP clock's name string The field "name" in struct ptp_clock_info has a fixed size of 16 chars and is used as zero terminated string by clock_name_show() in drivers/ptp/ptp_sysfs.c The current initialization value requires 17 chars to fit also the null termination, and this causes overflow to the next bytes in the struct when the string is read as null terminated: hexdump -C /sys/class/ptp/ptp0/clock_name 00000000 73 74 6d 6d 61 63 5f 70 74 70 5f 63 6c 6f 63 6b |stmmac_ptp_clock| 00000010 a0 ac b9 03 0a |.....| where the extra 4 bytes (excluding the newline) after the string represent the integer 0x03b9aca0 = 62500000 assigned to the field "max_adj" that follows "name" in the same struct. There is no strict requirement for the "name" content and in the comment in ptp_clock_kernel.h it's reported it should just be 'A short "friendly name" to identify the clock'. Replace it with "stmmac ptp". Signed-off-by: Antonio Borneo Fixes: 92ba6888510c ("stmmac: add the support for PTP hw clock driver") Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 173493db038c..df638b18b72c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -164,7 +164,7 @@ static int stmmac_enable(struct ptp_clock_info *ptp, /* structure describing a PTP hardware clock */ static struct ptp_clock_info stmmac_ptp_clock_ops = { .owner = THIS_MODULE, - .name = "stmmac_ptp_clock", + .name = "stmmac ptp", .max_adj = 62500000, .n_alarm = 0, .n_ext_ts = 0, From 520cf6002147281d1e7b522bb338416b623dcb93 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Mon, 7 Oct 2019 17:43:05 +0200 Subject: [PATCH 148/572] net: stmmac: fix disabling flexible PPS output Accordingly to Synopsys documentation [1] and [2], when bit PPSEN0 in register MAC_PPS_CONTROL is set it selects the functionality command in the same register, otherwise selects the functionality control. Command functionality is required to either enable (command 0x2) and disable (command 0x5) the flexible PPS output, but the bit PPSEN0 is currently set only for enabling. Set the bit PPSEN0 to properly disable flexible PPS output. Tested on STM32MP15x, based on dwmac 4.10a. [1] DWC Ethernet QoS Databook 4.10a October 2014 [2] DWC Ethernet QoS Databook 5.00a September 2017 Signed-off-by: Antonio Borneo Fixes: 9a8a02c9d46d ("net: stmmac: Add Flexible PPS support") Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac5.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index 3f4f3132e16b..e436fa160c7d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -515,6 +515,7 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, if (!enable) { val |= PPSCMDx(index, 0x5); + val |= PPSEN0; writel(val, ioaddr + MAC_PPS_CONTROL); return 0; } From a7137534b597b7c303203e6bc3ed87e87a273bb8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Oct 2019 15:43:01 -0700 Subject: [PATCH 149/572] bonding: fix potential NULL deref in bond_update_slave_arr syzbot got a NULL dereference in bond_update_slave_arr() [1], happening after a failure to allocate bond->slave_arr A workqueue (bond_slave_arr_handler) is supposed to retry the allocation later, but if the slave is removed before the workqueue had a chance to complete, bond->slave_arr can still be NULL. [1] Failed to build slave-array. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI Modules linked in: Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:bond_update_slave_arr.cold+0xc6/0x198 drivers/net/bonding/bond_main.c:4039 RSP: 0018:ffff88018fe33678 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffc9000290b000 RDX: 0000000000000000 RSI: ffffffff82b63037 RDI: ffff88019745ea20 RBP: ffff88018fe33760 R08: ffff880170754280 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff88019745ea00 R14: 0000000000000000 R15: ffff88018fe338b0 FS: 00007febd837d700(0000) GS:ffff8801dad00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004540a0 CR3: 00000001c242e005 CR4: 00000000001626f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: [] __bond_release_one+0x43e/0x500 drivers/net/bonding/bond_main.c:1923 [] bond_release drivers/net/bonding/bond_main.c:2039 [inline] [] bond_do_ioctl+0x416/0x870 drivers/net/bonding/bond_main.c:3562 [] dev_ifsioc+0x6f4/0x940 net/core/dev_ioctl.c:328 [] dev_ioctl+0x1b8/0xc70 net/core/dev_ioctl.c:495 [] sock_do_ioctl+0x1bd/0x300 net/socket.c:1088 [] sock_ioctl+0x300/0x5d0 net/socket.c:1196 [] vfs_ioctl fs/ioctl.c:47 [inline] [] file_ioctl fs/ioctl.c:501 [inline] [] do_vfs_ioctl+0xacb/0x1300 fs/ioctl.c:688 [] SYSC_ioctl fs/ioctl.c:705 [inline] [] SyS_ioctl+0xb6/0xe0 fs/ioctl.c:696 [] do_syscall_64+0x528/0x770 arch/x86/entry/common.c:305 [] entry_SYSCALL_64_after_hwframe+0x42/0xb7 Fixes: ee6377147409 ("bonding: Simplify the xmit function for modes that use xmit_hash") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Mahesh Bandewar Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 931d9d935686..21d8fcc83c9c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4039,7 +4039,7 @@ out: * this to-be-skipped slave to send a packet out. */ old_arr = rtnl_dereference(bond->slave_arr); - for (idx = 0; idx < old_arr->count; idx++) { + for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) { if (skipslave == old_arr->arr[idx]) { old_arr->arr[idx] = old_arr->arr[old_arr->count-1]; From 819be8108fded0b9e710bbbf81193e52f7bab2f7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 8 Oct 2019 19:09:23 +0800 Subject: [PATCH 150/572] sctp: add chunks to sk_backlog when the newsk sk_socket is not set This patch is to fix a NULL-ptr deref in selinux_socket_connect_helper: [...] kasan: GPF could be caused by NULL-ptr deref or user memory access [...] RIP: 0010:selinux_socket_connect_helper+0x94/0x460 [...] Call Trace: [...] selinux_sctp_bind_connect+0x16a/0x1d0 [...] security_sctp_bind_connect+0x58/0x90 [...] sctp_process_asconf+0xa52/0xfd0 [sctp] [...] sctp_sf_do_asconf+0x785/0x980 [sctp] [...] sctp_do_sm+0x175/0x5a0 [sctp] [...] sctp_assoc_bh_rcv+0x285/0x5b0 [sctp] [...] sctp_backlog_rcv+0x482/0x910 [sctp] [...] __release_sock+0x11e/0x310 [...] release_sock+0x4f/0x180 [...] sctp_accept+0x3f9/0x5a0 [sctp] [...] inet_accept+0xe7/0x720 It was caused by that the 'newsk' sk_socket was not set before going to security sctp hook when processing asconf chunk with SCTP_PARAM_ADD_IP or SCTP_PARAM_SET_PRIMARY: inet_accept()-> sctp_accept(): lock_sock(): lock listening 'sk' do_softirq(): sctp_rcv(): <-- [1] asconf chunk arrives and enqueued in 'sk' backlog sctp_sock_migrate(): set asoc's sk to 'newsk' release_sock(): sctp_backlog_rcv(): lock 'newsk' sctp_process_asconf() <-- [2] unlock 'newsk' sock_graft(): set sk_socket <-- [3] As it shows, at [1] the asconf chunk would be put into the listening 'sk' backlog, as accept() was holding its sock lock. Then at [2] asconf would get processed with 'newsk' as asoc's sk had been set to 'newsk'. However, 'newsk' sk_socket is not set until [3], while selinux_sctp_bind_connect() would deref it, then kernel crashed. Here to fix it by adding the chunk to sk_backlog until newsk sk_socket is set when .accept() is done. Note that sk->sk_socket can be NULL when the sock is closed, so SOCK_DEAD flag is also needed to check in sctp_newsk_ready(). Thanks to Ondrej for reviewing the code. Fixes: d452930fd3b9 ("selinux: Add SCTP support") Reported-by: Ying Xu Suggested-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: Jakub Kicinski --- include/net/sctp/sctp.h | 5 +++++ net/sctp/input.c | 12 +++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 5d60f13d2347..3ab5c6bbb90b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -610,4 +610,9 @@ static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize) return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize); } +static inline bool sctp_newsk_ready(const struct sock *sk) +{ + return sock_flag(sk, SOCK_DEAD) || sk->sk_socket; +} + #endif /* __net_sctp_h__ */ diff --git a/net/sctp/input.c b/net/sctp/input.c index 5a070fb5b278..f2771375bfc0 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -243,7 +243,7 @@ int sctp_rcv(struct sk_buff *skb) bh_lock_sock(sk); } - if (sock_owned_by_user(sk)) { + if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) { if (sctp_add_backlog(sk, skb)) { bh_unlock_sock(sk); sctp_chunk_free(chunk); @@ -321,7 +321,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) local_bh_disable(); bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { + if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) { if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) sctp_chunk_free(chunk); else @@ -336,7 +336,13 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) if (backloged) return 0; } else { - sctp_inq_push(inqueue, chunk); + if (!sctp_newsk_ready(sk)) { + if (!sk_add_backlog(sk, skb, sk->sk_rcvbuf)) + return 0; + sctp_chunk_free(chunk); + } else { + sctp_inq_push(inqueue, chunk); + } } done: From 9db74e51ec08d75c3e97f851299bc25e645cc5ad Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 8 Oct 2019 08:39:04 -0700 Subject: [PATCH 151/572] phylink: fix kernel-doc warnings Fix kernel-doc warnings in phylink.c: ../drivers/net/phy/phylink.c:595: warning: Function parameter or member 'config' not described in 'phylink_create' ../drivers/net/phy/phylink.c:595: warning: Excess function parameter 'ndev' description in 'phylink_create' Fixes: 8796c8923d9c ("phylink: add documentation for kernel APIs") Signed-off-by: Randy Dunlap Cc: Russell King Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index a5a57ca94c1a..20e2ebe458f2 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -576,7 +576,7 @@ static int phylink_register_sfp(struct phylink *pl, /** * phylink_create() - create a phylink instance - * @ndev: a pointer to the &struct net_device + * @config: a pointer to the target &struct phylink_config * @fwnode: a pointer to a &struct fwnode_handle describing the network * interface * @iface: the desired link mode defined by &typedef phy_interface_t From b528965bcc827dad32a8d21745feaacfc76c9703 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 8 Oct 2019 18:21:06 +0200 Subject: [PATCH 152/572] s390/qeth: Fix error handling during VNICC initialization Smatch discovered the use of uninitialized variable sup_cmds in error paths. Fixes: caa1f0b10d18 ("s390/qeth: add VNICC enable/disable support") Signed-off-by: Alexandra Winter Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_l2_main.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index b8799cd3e7aa..de62a9ccc882 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2021,10 +2021,10 @@ static bool qeth_l2_vnicc_recover_char(struct qeth_card *card, u32 vnicc, static void qeth_l2_vnicc_init(struct qeth_card *card) { u32 *timeout = &card->options.vnicc.learning_timeout; + bool enable, error = false; unsigned int chars_len, i; unsigned long chars_tmp; u32 sup_cmds, vnicc; - bool enable, error; QETH_CARD_TEXT(card, 2, "vniccini"); /* reset rx_bcast */ @@ -2045,7 +2045,10 @@ static void qeth_l2_vnicc_init(struct qeth_card *card) chars_len = sizeof(card->options.vnicc.sup_chars) * BITS_PER_BYTE; for_each_set_bit(i, &chars_tmp, chars_len) { vnicc = BIT(i); - qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds); + if (qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds)) { + sup_cmds = 0; + error = true; + } if (!(sup_cmds & IPA_VNICC_SET_TIMEOUT) || !(sup_cmds & IPA_VNICC_GET_TIMEOUT)) card->options.vnicc.getset_timeout_sup &= ~vnicc; @@ -2054,8 +2057,8 @@ static void qeth_l2_vnicc_init(struct qeth_card *card) card->options.vnicc.set_char_sup &= ~vnicc; } /* enforce assumed default values and recover settings, if changed */ - error = qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING, - timeout); + error |= qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING, + timeout); chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT; chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE; chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE; From be40a86c319706f90caca144343c64743c32b953 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 8 Oct 2019 18:21:07 +0200 Subject: [PATCH 153/572] s390/qeth: Fix initialization of vnicc cmd masks during set online Without this patch, a command bit in the supported commands mask is only ever set to unsupported during set online. If a command is ever marked as unsupported (e.g. because of error during qeth_l2_vnicc_query_cmds), subsequent successful initialization (offline/online) would not bring it back. Fixes: caa1f0b10d18 ("s390/qeth: add VNICC enable/disable support") Signed-off-by: Alexandra Winter Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_l2_main.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index de62a9ccc882..bd8143e51747 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2049,11 +2049,15 @@ static void qeth_l2_vnicc_init(struct qeth_card *card) sup_cmds = 0; error = true; } - if (!(sup_cmds & IPA_VNICC_SET_TIMEOUT) || - !(sup_cmds & IPA_VNICC_GET_TIMEOUT)) + if ((sup_cmds & IPA_VNICC_SET_TIMEOUT) && + (sup_cmds & IPA_VNICC_GET_TIMEOUT)) + card->options.vnicc.getset_timeout_sup |= vnicc; + else card->options.vnicc.getset_timeout_sup &= ~vnicc; - if (!(sup_cmds & IPA_VNICC_ENABLE) || - !(sup_cmds & IPA_VNICC_DISABLE)) + if ((sup_cmds & IPA_VNICC_ENABLE) && + (sup_cmds & IPA_VNICC_DISABLE)) + card->options.vnicc.set_char_sup |= vnicc; + else card->options.vnicc.set_char_sup &= ~vnicc; } /* enforce assumed default values and recover settings, if changed */ From a954380acde6adf584de76c41e1d520097820dd5 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Tue, 8 Oct 2019 16:20:07 -0700 Subject: [PATCH 154/572] net: taprio: Fix returning EINVAL when configuring without flags When configuring a taprio instance if "flags" is not specified (or it's zero), taprio currently replies with an "Invalid argument" error. So, set the return value to zero after we are done with all the checks. Fixes: 9c66d1564676 ("taprio: Add support for hardware offloading") Signed-off-by: Vinicius Costa Gomes Acked-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 68b543f85a96..6719a65169d4 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1341,6 +1341,10 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); goto out; } + + /* Everything went ok, return success. */ + err = 0; + out: return err; } From 11c9a7d38af524217efb7a176ad322b97ac2f163 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 9 Oct 2019 11:10:52 +0800 Subject: [PATCH 155/572] act_mirred: Fix mirred_init_module error handling If tcf_register_action failed, mirred_device_notifier should be unregistered. Fixes: 3b87956ea645 ("net sched: fix race in mirred device removal") Signed-off-by: YueHaibing Signed-off-by: Jakub Kicinski --- net/sched/act_mirred.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 9ce073a05414..08923b21e566 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -484,7 +484,11 @@ static int __init mirred_init_module(void) return err; pr_info("Mirror/redirect action on\n"); - return tcf_register_action(&act_mirred_ops, &mirred_net_ops); + err = tcf_register_action(&act_mirred_ops, &mirred_net_ops); + if (err) + unregister_netdevice_notifier(&mirred_device_notifier); + + return err; } static void __exit mirred_cleanup_module(void) From e0ae2c578d3909e60e9448207f5d83f785f1129f Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 9 Oct 2019 11:07:18 +0200 Subject: [PATCH 156/572] net: usb: qmi_wwan: add Telit 0x1050 composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for Telit FN980 0x1050 composition 0x1050: tty, adb, rmnet, tty, tty, tty, tty Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 3d77cd402ba9..596428ec71df 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1327,6 +1327,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ From 35a79a63517981a8aea395497c548776347deda8 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Wed, 18 Sep 2019 22:06:58 +0530 Subject: [PATCH 157/572] scsi: qla2xxx: fix a potential NULL pointer dereference alloc_workqueue is not checked for errors and as a result a potential NULL dereference could occur. Link: https://lore.kernel.org/r/1568824618-4366-1-git-send-email-allen.pais@oracle.com Signed-off-by: Allen Pais Reviewed-by: Martin Wilck Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index ee47de9fbc05..e4d765fc03ea 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3224,6 +3224,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) req->req_q_in, req->req_q_out, rsp->rsp_q_in, rsp->rsp_q_out); ha->wq = alloc_workqueue("qla2xxx_wq", 0, 0); + if (unlikely(!ha->wq)) { + ret = -ENOMEM; + goto probe_failed; + } if (ha->isp_ops->initialize_adapter(base_vha)) { ql_log(ql_log_fatal, base_vha, 0x00d6, From b6ce6fb121a655aefe41dccc077141c102145a37 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 7 Oct 2019 15:57:01 +0200 Subject: [PATCH 158/572] scsi: scsi_dh_alua: handle RTPG sense code correctly during state transitions Some arrays are not capable of returning RTPG data during state transitioning, but rather return an 'LUN not accessible, asymmetric access state transition' sense code. In these cases we can set the state to 'transitioning' directly and don't need to evaluate the RTPG data (which we won't have anyway). Link: https://lore.kernel.org/r/20191007135701.32389-1-hare@suse.de Reviewed-by: Laurence Oberman Reviewed-by: Ewan D. Milne Reviewed-by: Bart Van Assche Signed-off-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index f0066f8a1786..c6437a442ffc 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -511,6 +511,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) unsigned int tpg_desc_tbl_off; unsigned char orig_transition_tmo; unsigned long flags; + bool transitioning_sense = false; if (!pg->expiry) { unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; @@ -571,13 +572,19 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) goto retry; } /* - * Retry on ALUA state transition or if any - * UNIT ATTENTION occurred. + * If the array returns with 'ALUA state transition' + * sense code here it cannot return RTPG data during + * transition. So set the state to 'transitioning' directly. */ if (sense_hdr.sense_key == NOT_READY && - sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) - err = SCSI_DH_RETRY; - else if (sense_hdr.sense_key == UNIT_ATTENTION) + sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) { + transitioning_sense = true; + goto skip_rtpg; + } + /* + * Retry on any other UNIT ATTENTION occurred. + */ + if (sense_hdr.sense_key == UNIT_ATTENTION) err = SCSI_DH_RETRY; if (err == SCSI_DH_RETRY && pg->expiry != 0 && time_before(jiffies, pg->expiry)) { @@ -665,7 +672,11 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) off = 8 + (desc[7] * 4); } + skip_rtpg: spin_lock_irqsave(&pg->lock, flags); + if (transitioning_sense) + pg->state = SCSI_ACCESS_STATE_TRANSITIONING; + sdev_printk(KERN_INFO, sdev, "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), From 0ee6211408a8e939428f662833c7301394125b80 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 9 Oct 2019 17:11:18 +0200 Subject: [PATCH 159/572] scsi: sni_53c710: fix compilation error Drop out memory dev_printk() with wrong device pointer argument. [mkp: typo] Link: https://lore.kernel.org/r/20191009151118.32350-1-tbogendoerfer@suse.de Signed-off-by: Thomas Bogendoerfer Signed-off-by: Martin K. Petersen --- drivers/scsi/sni_53c710.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/scsi/sni_53c710.c b/drivers/scsi/sni_53c710.c index aef4881d8e21..a85d52b5dc32 100644 --- a/drivers/scsi/sni_53c710.c +++ b/drivers/scsi/sni_53c710.c @@ -66,10 +66,8 @@ static int snirm710_probe(struct platform_device *dev) base = res->start; hostdata = kzalloc(sizeof(*hostdata), GFP_KERNEL); - if (!hostdata) { - dev_printk(KERN_ERR, dev, "Failed to allocate host data\n"); + if (!hostdata) return -ENOMEM; - } hostdata->dev = &dev->dev; dma_set_mask(&dev->dev, DMA_BIT_MASK(32)); From 8cbf0c173aa096dda526d1ccd66fc751c31da346 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 9 Oct 2019 17:11:28 +0200 Subject: [PATCH 160/572] scsi: fix kconfig dependency warning related to 53C700_LE_ON_BE When building a kernel with SCSI_SNI_53C710 enabled, Kconfig warns: WARNING: unmet direct dependencies detected for 53C700_LE_ON_BE Depends on [n]: SCSI_LOWLEVEL [=y] && SCSI [=y] && SCSI_LASI700 [=n] Selected by [y]: - SCSI_SNI_53C710 [=y] && SCSI_LOWLEVEL [=y] && SNI_RM [=y] && SCSI [=y] Add the missing depends SCSI_SNI_53C710 to 53C700_LE_ON_BE to fix it. Link: https://lore.kernel.org/r/20191009151128.32411-1-tbogendoerfer@suse.de Signed-off-by: Thomas Bogendoerfer Signed-off-by: Martin K. Petersen --- drivers/scsi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 75f66f8ad3ea..d00e1ee09af3 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -898,7 +898,7 @@ config SCSI_SNI_53C710 config 53C700_LE_ON_BE bool - depends on SCSI_LASI700 + depends on SCSI_LASI700 || SCSI_SNI_53C710 default y config SCSI_STEX From 6a0990eaa768dfb7064f06777743acc6d392084b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Oct 2019 10:35:36 -0700 Subject: [PATCH 161/572] scsi: ch: Make it possible to open a ch device multiple times again Clearing ch->device in ch_release() is wrong because that pointer must remain valid until ch_remove() is called. This patch fixes the following crash the second time a ch device is opened: BUG: kernel NULL pointer dereference, address: 0000000000000790 RIP: 0010:scsi_device_get+0x5/0x60 Call Trace: ch_open+0x4c/0xa0 [ch] chrdev_open+0xa2/0x1c0 do_dentry_open+0x13a/0x380 path_openat+0x591/0x1470 do_filp_open+0x91/0x100 do_sys_open+0x184/0x220 do_syscall_64+0x5f/0x1a0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 085e56766f74 ("scsi: ch: add refcounting") Cc: Hannes Reinecke Cc: Link: https://lore.kernel.org/r/20191009173536.247889-1-bvanassche@acm.org Reported-by: Rob Turk Suggested-by: Rob Turk Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/ch.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c index 5f8153c37f77..76751d6c7f0d 100644 --- a/drivers/scsi/ch.c +++ b/drivers/scsi/ch.c @@ -579,7 +579,6 @@ ch_release(struct inode *inode, struct file *file) scsi_changer *ch = file->private_data; scsi_device_put(ch->device); - ch->device = NULL; file->private_data = NULL; kref_put(&ch->ref, ch_destroy); return 0; From 993e4c929a073595d22c85f59082f0c387e31c21 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 9 Oct 2019 11:19:10 +0200 Subject: [PATCH 162/572] netns: fix NLM_F_ECHO mechanism for RTM_NEWNSID The flag NLM_F_ECHO aims to reply to the user the message notified to all listeners. It was not the case with the command RTM_NEWNSID, let's fix this. Fixes: 0c7aecd4bde4 ("netns: add rtnl cmd to add and get peer netns ids") Reported-by: Guillaume Nault Signed-off-by: Nicolas Dichtel Acked-by: Guillaume Nault Tested-by: Guillaume Nault Signed-off-by: Jakub Kicinski --- net/core/net_namespace.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a0e0d298c991..6d3e4821b02d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -245,7 +245,8 @@ static int __peernet2id(struct net *net, struct net *peer) return __peernet2id_alloc(net, peer, &no); } -static void rtnl_net_notifyid(struct net *net, int cmd, int id); +static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, + struct nlmsghdr *nlh); /* This function returns the id of a peer netns. If no id is assigned, one will * be allocated and returned. */ @@ -268,7 +269,7 @@ int peernet2id_alloc(struct net *net, struct net *peer) id = __peernet2id_alloc(net, peer, &alloc); spin_unlock_bh(&net->nsid_lock); if (alloc && id >= 0) - rtnl_net_notifyid(net, RTM_NEWNSID, id); + rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL); if (alive) put_net(peer); return id; @@ -532,7 +533,7 @@ static void unhash_nsid(struct net *net, struct net *last) idr_remove(&tmp->netns_ids, id); spin_unlock_bh(&tmp->nsid_lock); if (id >= 0) - rtnl_net_notifyid(tmp, RTM_DELNSID, id); + rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL); if (tmp == last) break; } @@ -764,7 +765,8 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, err = alloc_netid(net, peer, nsid); spin_unlock_bh(&net->nsid_lock); if (err >= 0) { - rtnl_net_notifyid(net, RTM_NEWNSID, err); + rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid, + nlh); err = 0; } else if (err == -ENOSPC && nsid >= 0) { err = -EEXIST; @@ -1051,9 +1053,12 @@ end: return err < 0 ? err : skb->len; } -static void rtnl_net_notifyid(struct net *net, int cmd, int id) +static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, + struct nlmsghdr *nlh) { struct net_fill_args fillargs = { + .portid = portid, + .seq = nlh ? nlh->nlmsg_seq : 0, .cmd = cmd, .nsid = id, }; @@ -1068,7 +1073,7 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id) if (err < 0) goto err_out; - rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0); + rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, 0); return; err_out: From e37542ba111f3974dc622ae0a21c1787318de500 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2019 09:19:13 -0700 Subject: [PATCH 163/572] netfilter: conntrack: avoid possible false sharing As hinted by KCSAN, we need at least one READ_ONCE() to prevent a compiler optimization. More details on : https://github.com/google/ktsan/wiki/READ_ONCE-and-WRITE_ONCE#it-may-improve-performance sysbot report : BUG: KCSAN: data-race in __nf_ct_refresh_acct / __nf_ct_refresh_acct read to 0xffff888123eb4f08 of 4 bytes by interrupt on cpu 0: __nf_ct_refresh_acct+0xd4/0x1b0 net/netfilter/nf_conntrack_core.c:1796 nf_ct_refresh_acct include/net/netfilter/nf_conntrack.h:201 [inline] nf_conntrack_tcp_packet+0xd40/0x3390 net/netfilter/nf_conntrack_proto_tcp.c:1161 nf_conntrack_handle_packet net/netfilter/nf_conntrack_core.c:1633 [inline] nf_conntrack_in+0x410/0xaa0 net/netfilter/nf_conntrack_core.c:1727 ipv4_conntrack_in+0x27/0x40 net/netfilter/nf_conntrack_proto.c:178 nf_hook_entry_hookfn include/linux/netfilter.h:135 [inline] nf_hook_slow+0x83/0x160 net/netfilter/core.c:512 nf_hook include/linux/netfilter.h:260 [inline] NF_HOOK include/linux/netfilter.h:303 [inline] ip_rcv+0x12f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5004 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5118 netif_receive_skb_internal+0x59/0x190 net/core/dev.c:5208 napi_skb_finish net/core/dev.c:5671 [inline] napi_gro_receive+0x28f/0x330 net/core/dev.c:5704 receive_buf+0x284/0x30b0 drivers/net/virtio_net.c:1061 virtnet_receive drivers/net/virtio_net.c:1323 [inline] virtnet_poll+0x436/0x7d0 drivers/net/virtio_net.c:1428 napi_poll net/core/dev.c:6352 [inline] net_rx_action+0x3ae/0xa50 net/core/dev.c:6418 __do_softirq+0x115/0x33f kernel/softirq.c:292 write to 0xffff888123eb4f08 of 4 bytes by task 7191 on cpu 1: __nf_ct_refresh_acct+0xfb/0x1b0 net/netfilter/nf_conntrack_core.c:1797 nf_ct_refresh_acct include/net/netfilter/nf_conntrack.h:201 [inline] nf_conntrack_tcp_packet+0xd40/0x3390 net/netfilter/nf_conntrack_proto_tcp.c:1161 nf_conntrack_handle_packet net/netfilter/nf_conntrack_core.c:1633 [inline] nf_conntrack_in+0x410/0xaa0 net/netfilter/nf_conntrack_core.c:1727 ipv4_conntrack_local+0xbe/0x130 net/netfilter/nf_conntrack_proto.c:200 nf_hook_entry_hookfn include/linux/netfilter.h:135 [inline] nf_hook_slow+0x83/0x160 net/netfilter/core.c:512 nf_hook include/linux/netfilter.h:260 [inline] __ip_local_out+0x1f7/0x2b0 net/ipv4/ip_output.c:114 ip_local_out+0x31/0x90 net/ipv4/ip_output.c:123 __ip_queue_xmit+0x3a8/0xa40 net/ipv4/ip_output.c:532 ip_queue_xmit+0x45/0x60 include/net/ip.h:236 __tcp_transmit_skb+0xdeb/0x1cd0 net/ipv4/tcp_output.c:1158 __tcp_send_ack+0x246/0x300 net/ipv4/tcp_output.c:3685 tcp_send_ack+0x34/0x40 net/ipv4/tcp_output.c:3691 tcp_cleanup_rbuf+0x130/0x360 net/ipv4/tcp.c:1575 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 7191 Comm: syz-fuzzer Not tainted 5.3.0+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: cc16921351d8 ("netfilter: conntrack: avoid same-timeout update") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Jozsef Kadlecsik Cc: Florian Westphal Acked-by: Pablo Neira Ayuso Signed-off-by: Jakub Kicinski --- net/netfilter/nf_conntrack_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0c63120b2db2..5cd610b547e0 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1792,8 +1792,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, if (nf_ct_is_confirmed(ct)) extra_jiffies += nfct_time_stamp; - if (ct->timeout != extra_jiffies) - ct->timeout = extra_jiffies; + if (READ_ONCE(ct->timeout) != extra_jiffies) + WRITE_ONCE(ct->timeout, extra_jiffies); acct: if (do_acct) nf_ct_acct_update(ct, ctinfo, skb->len); From 4ffdd22e49f47db543906d75453a0048a53071ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2019 09:20:02 -0700 Subject: [PATCH 164/572] tun: remove possible false sharing in tun_flow_update() As mentioned in https://github.com/google/ktsan/wiki/READ_ONCE-and-WRITE_ONCE#it-may-improve-performance a C compiler can legally transform if (e->queue_index != queue_index) e->queue_index = queue_index; to : e->queue_index = queue_index; Note that the code using jiffies has no issue, since jiffies has volatile attribute. if (e->updated != jiffies) e->updated = jiffies; Fixes: 83b1bc122cab ("tun: align write-heavy flow entry members to a cache line") Signed-off-by: Eric Dumazet Cc: Zhang Yu Cc: Wang Li Cc: Li RongQing Acked-by: Jason Wang Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 812dc3a65efb..a8d3141582a5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -526,8 +526,8 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, e = tun_flow_find(head, rxhash); if (likely(e)) { /* TODO: keep queueing to old queue until it's empty? */ - if (e->queue_index != queue_index) - e->queue_index = queue_index; + if (READ_ONCE(e->queue_index) != queue_index) + WRITE_ONCE(e->queue_index, queue_index); if (e->updated != jiffies) e->updated = jiffies; sock_rps_record_flow_hash(e->rps_rxhash); From 503978aca46124cd714703e180b9c8292ba50ba7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2019 12:55:53 -0700 Subject: [PATCH 165/572] net: avoid possible false sharing in sk_leave_memory_pressure() As mentioned in https://github.com/google/ktsan/wiki/READ_ONCE-and-WRITE_ONCE#it-may-improve-performance a C compiler can legally transform : if (memory_pressure && *memory_pressure) *memory_pressure = 0; to : if (memory_pressure) *memory_pressure = 0; Fixes: 0604475119de ("tcp: add TCPMemoryPressuresChrono counter") Fixes: 180d8cd942ce ("foundations of per-cgroup memory pressure controlling.") Fixes: 3ab224be6d69 ("[NET] CORE: Introducing new memory accounting interface.") Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/core/sock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index fac2b4d80de5..50647a10fdb7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2334,8 +2334,8 @@ static void sk_leave_memory_pressure(struct sock *sk) } else { unsigned long *memory_pressure = sk->sk_prot->memory_pressure; - if (memory_pressure && *memory_pressure) - *memory_pressure = 0; + if (memory_pressure && READ_ONCE(*memory_pressure)) + WRITE_ONCE(*memory_pressure, 0); } } From 60b173ca3d1cd1782bd0096dc17298ec242f6fb1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2019 14:51:20 -0700 Subject: [PATCH 166/572] net: add {READ|WRITE}_ONCE() annotations on ->rskq_accept_head reqsk_queue_empty() is called from inet_csk_listen_poll() while other cpus might write ->rskq_accept_head value. Use {READ|WRITE}_ONCE() to avoid compiler tricks and potential KCSAN splats. Fixes: fff1f3001cc5 ("tcp: add a spinlock to protect struct request_sock_queue") Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- drivers/xen/pvcalls-back.c | 2 +- include/net/request_sock.h | 4 ++-- net/ipv4/inet_connection_sock.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 69a626b0e594..c57c71b7d53d 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -775,7 +775,7 @@ static int pvcalls_back_poll(struct xenbus_device *dev, mappass->reqcopy = *req; icsk = inet_csk(mappass->sock->sk); queue = &icsk->icsk_accept_queue; - data = queue->rskq_accept_head != NULL; + data = READ_ONCE(queue->rskq_accept_head) != NULL; if (data) { mappass->reqcopy.cmd = 0; ret = 0; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index fd178d58fa84..cf8b33213bbc 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -185,7 +185,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, static inline bool reqsk_queue_empty(const struct request_sock_queue *queue) { - return queue->rskq_accept_head == NULL; + return READ_ONCE(queue->rskq_accept_head) == NULL; } static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, @@ -197,7 +197,7 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue req = queue->rskq_accept_head; if (req) { sk_acceptq_removed(parent); - queue->rskq_accept_head = req->dl_next; + WRITE_ONCE(queue->rskq_accept_head, req->dl_next); if (queue->rskq_accept_head == NULL) queue->rskq_accept_tail = NULL; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a9183543ca30..dbcf34ec8dd2 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -934,7 +934,7 @@ struct sock *inet_csk_reqsk_queue_add(struct sock *sk, req->sk = child; req->dl_next = NULL; if (queue->rskq_accept_head == NULL) - queue->rskq_accept_head = req; + WRITE_ONCE(queue->rskq_accept_head, req); else queue->rskq_accept_tail->dl_next = req; queue->rskq_accept_tail = req; From 1f142c17d19a5618d5a633195a46f2c8be9bf232 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2019 15:10:15 -0700 Subject: [PATCH 167/572] tcp: annotate lockless access to tcp_memory_pressure tcp_memory_pressure is read without holding any lock, and its value could be changed on other cpus. Use READ_ONCE() to annotate these lockless reads. The write side is already using atomic ops. Fixes: b8da51ebb1aa ("tcp: introduce tcp_under_memory_pressure()") Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index c9a3f9688223..88e63d64c698 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -258,7 +258,7 @@ static inline bool tcp_under_memory_pressure(const struct sock *sk) mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; - return tcp_memory_pressure; + return READ_ONCE(tcp_memory_pressure); } /* * The next routines deal with comparing 32 bit unsigned ints diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f98a1882e537..888c92b63f5a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -326,7 +326,7 @@ void tcp_enter_memory_pressure(struct sock *sk) { unsigned long val; - if (tcp_memory_pressure) + if (READ_ONCE(tcp_memory_pressure)) return; val = jiffies; @@ -341,7 +341,7 @@ void tcp_leave_memory_pressure(struct sock *sk) { unsigned long val; - if (!tcp_memory_pressure) + if (!READ_ONCE(tcp_memory_pressure)) return; val = xchg(&tcp_memory_pressure, 0); if (val) From 8265792bf8871acc2d00fd03883d830e2249d395 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2019 15:21:13 -0700 Subject: [PATCH 168/572] net: silence KCSAN warnings around sk_add_backlog() calls sk_add_backlog() callers usually read sk->sk_rcvbuf without owning the socket lock. This means sk_rcvbuf value can be changed by other cpus, and KCSAN complains. Add READ_ONCE() annotations to document the lockless nature of these reads. Note that writes over sk_rcvbuf should also use WRITE_ONCE(), but this will be done in separate patches to ease stable backports (if we decide this is relevant for stable trees). BUG: KCSAN: data-race in tcp_add_backlog / tcp_recvmsg write to 0xffff88812ab369f8 of 8 bytes by interrupt on cpu 1: __sk_add_backlog include/net/sock.h:902 [inline] sk_add_backlog include/net/sock.h:933 [inline] tcp_add_backlog+0x45a/0xcc0 net/ipv4/tcp_ipv4.c:1737 tcp_v4_rcv+0x1aba/0x1bf0 net/ipv4/tcp_ipv4.c:1925 ip_protocol_deliver_rcu+0x51/0x470 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5004 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5118 netif_receive_skb_internal+0x59/0x190 net/core/dev.c:5208 napi_skb_finish net/core/dev.c:5671 [inline] napi_gro_receive+0x28f/0x330 net/core/dev.c:5704 receive_buf+0x284/0x30b0 drivers/net/virtio_net.c:1061 virtnet_receive drivers/net/virtio_net.c:1323 [inline] virtnet_poll+0x436/0x7d0 drivers/net/virtio_net.c:1428 napi_poll net/core/dev.c:6352 [inline] net_rx_action+0x3ae/0xa50 net/core/dev.c:6418 read to 0xffff88812ab369f8 of 8 bytes by task 7271 on cpu 0: tcp_recvmsg+0x470/0x1a30 net/ipv4/tcp.c:2047 inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 sock_recvmsg_nosec net/socket.c:871 [inline] sock_recvmsg net/socket.c:889 [inline] sock_recvmsg+0x92/0xb0 net/socket.c:885 sock_read_iter+0x15f/0x1e0 net/socket.c:967 call_read_iter include/linux/fs.h:1864 [inline] new_sync_read+0x389/0x4f0 fs/read_write.c:414 __vfs_read+0xb1/0xc0 fs/read_write.c:427 vfs_read fs/read_write.c:461 [inline] vfs_read+0x143/0x2c0 fs/read_write.c:446 ksys_read+0xd5/0x1b0 fs/read_write.c:587 __do_sys_read fs/read_write.c:597 [inline] __se_sys_read fs/read_write.c:595 [inline] __x64_sys_read+0x4c/0x60 fs/read_write.c:595 do_syscall_64+0xcf/0x2f0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 7271 Comm: syz-fuzzer Not tainted 5.3.0+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Jakub Kicinski --- net/core/sock.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/llc/llc_conn.c | 2 +- net/sctp/input.c | 6 +++--- net/tipc/socket.c | 6 +++--- net/x25/x25_dev.c | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 50647a10fdb7..1cf06934da50 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -522,7 +522,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { + } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) { bh_unlock_sock(sk); atomic_inc(&sk->sk_drops); goto discard_and_relse; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bf124b1742df..492bf6a6b023 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1644,7 +1644,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { - u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; + u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf); struct skb_shared_info *shinfo; const struct tcphdr *th; struct tcphdr *thtail; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index a79b739eb223..7b620acaca9e 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -813,7 +813,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) else { dprintk("%s: adding to backlog...\n", __func__); llc_set_backlog_type(skb, LLC_PACKET); - if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) + if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) goto drop_unlock; } out: diff --git a/net/sctp/input.c b/net/sctp/input.c index f2771375bfc0..2277981559d0 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -322,7 +322,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) bh_lock_sock(sk); if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) { - if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) + if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) sctp_chunk_free(chunk); else backloged = 1; @@ -337,7 +337,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) return 0; } else { if (!sctp_newsk_ready(sk)) { - if (!sk_add_backlog(sk, skb, sk->sk_rcvbuf)) + if (!sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) return 0; sctp_chunk_free(chunk); } else { @@ -364,7 +364,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) struct sctp_ep_common *rcvr = chunk->rcvr; int ret; - ret = sk_add_backlog(sk, skb, sk->sk_rcvbuf); + ret = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)); if (!ret) { /* Hold the assoc/ep while hanging on the backlog queue. * This way, we know structures we need will not disappear diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3b9f8cc328f5..7c736cfec57f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2119,13 +2119,13 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) struct tipc_msg *hdr = buf_msg(skb); if (unlikely(msg_in_group(hdr))) - return sk->sk_rcvbuf; + return READ_ONCE(sk->sk_rcvbuf); if (unlikely(!msg_connected(hdr))) - return sk->sk_rcvbuf << msg_importance(hdr); + return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr); if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) - return sk->sk_rcvbuf; + return READ_ONCE(sk->sk_rcvbuf); return FLOWCTL_MSG_LIM; } diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 5c111bc3c8ea..00e782335cb0 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -55,7 +55,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) if (!sock_owned_by_user(sk)) { queued = x25_process_rx_frame(sk, skb); } else { - queued = !sk_add_backlog(sk, skb, sk->sk_rcvbuf); + queued = !sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)); } bh_unlock_sock(sk); sock_put(sk); From eac66402d1c342f07ff38f8d631ff95eb7ad3220 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2019 15:32:35 -0700 Subject: [PATCH 169/572] net: annotate sk->sk_rcvlowat lockless reads sock_rcvlowat() or int_sk_rcvlowat() might be called without the socket lock for example from tcp_poll(). Use READ_ONCE() to document the fact that other cpus might change sk->sk_rcvlowat under us and avoid KCSAN splats. Use WRITE_ONCE() on write sides too. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/sock.h | 4 +++- net/core/filter.c | 2 +- net/core/sock.c | 2 +- net/ipv4/tcp.c | 2 +- net/sched/em_meta.c | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 2c53f1a1d905..79f54e1f8827 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2271,7 +2271,9 @@ static inline long sock_sndtimeo(const struct sock *sk, bool noblock) static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len) { - return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1; + int v = waitall ? len : min_t(int, READ_ONCE(sk->sk_rcvlowat), len); + + return v ?: 1; } /* Alas, with timeout socket operations are not restartable. diff --git a/net/core/filter.c b/net/core/filter.c index ed6563622ce3..a50c0b6846f2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4274,7 +4274,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, case SO_RCVLOWAT: if (val < 0) val = INT_MAX; - sk->sk_rcvlowat = val ? : 1; + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); break; case SO_MARK: if (sk->sk_mark != val) { diff --git a/net/core/sock.c b/net/core/sock.c index 1cf06934da50..b7c5c6ea51ba 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -974,7 +974,7 @@ set_rcvbuf: if (sock->ops->set_rcvlowat) ret = sock->ops->set_rcvlowat(sk, val); else - sk->sk_rcvlowat = val ? : 1; + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); break; case SO_RCVTIMEO_OLD: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 888c92b63f5a..8781a92ea4b6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1699,7 +1699,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) else cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1; val = min(val, cap); - sk->sk_rcvlowat = val ? : 1; + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); /* Check if we need to signal EPOLLIN right now */ tcp_data_ready(sk); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 82bd14e7ac93..4c9122fc35c9 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -554,7 +554,7 @@ META_COLLECTOR(int_sk_rcvlowat) *err = -1; return; } - dst->value = sk->sk_rcvlowat; + dst->value = READ_ONCE(sk->sk_rcvlowat); } META_COLLECTOR(int_sk_rcvtimeo) From 70c2655849a25431f31b505a07fe0c861e5e41fb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2019 15:41:03 -0700 Subject: [PATCH 170/572] net: silence KCSAN warnings about sk->sk_backlog.len reads sk->sk_backlog.len can be written by BH handlers, and read from process contexts in a lockless way. Note the write side should also use WRITE_ONCE() or a variant. We need some agreement about the best way to do this. syzbot reported : BUG: KCSAN: data-race in tcp_add_backlog / tcp_grow_window.isra.0 write to 0xffff88812665f32c of 4 bytes by interrupt on cpu 1: sk_add_backlog include/net/sock.h:934 [inline] tcp_add_backlog+0x4a0/0xcc0 net/ipv4/tcp_ipv4.c:1737 tcp_v4_rcv+0x1aba/0x1bf0 net/ipv4/tcp_ipv4.c:1925 ip_protocol_deliver_rcu+0x51/0x470 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5004 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5118 netif_receive_skb_internal+0x59/0x190 net/core/dev.c:5208 napi_skb_finish net/core/dev.c:5671 [inline] napi_gro_receive+0x28f/0x330 net/core/dev.c:5704 receive_buf+0x284/0x30b0 drivers/net/virtio_net.c:1061 virtnet_receive drivers/net/virtio_net.c:1323 [inline] virtnet_poll+0x436/0x7d0 drivers/net/virtio_net.c:1428 napi_poll net/core/dev.c:6352 [inline] net_rx_action+0x3ae/0xa50 net/core/dev.c:6418 read to 0xffff88812665f32c of 4 bytes by task 7292 on cpu 0: tcp_space include/net/tcp.h:1373 [inline] tcp_grow_window.isra.0+0x6b/0x480 net/ipv4/tcp_input.c:413 tcp_event_data_recv+0x68f/0x990 net/ipv4/tcp_input.c:717 tcp_rcv_established+0xbfe/0xf50 net/ipv4/tcp_input.c:5618 tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1542 sk_backlog_rcv include/net/sock.h:945 [inline] __release_sock+0x135/0x1e0 net/core/sock.c:2427 release_sock+0x61/0x160 net/core/sock.c:2943 tcp_recvmsg+0x63b/0x1a30 net/ipv4/tcp.c:2181 inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 sock_recvmsg_nosec net/socket.c:871 [inline] sock_recvmsg net/socket.c:889 [inline] sock_recvmsg+0x92/0xb0 net/socket.c:885 sock_read_iter+0x15f/0x1e0 net/socket.c:967 call_read_iter include/linux/fs.h:1864 [inline] new_sync_read+0x389/0x4f0 fs/read_write.c:414 __vfs_read+0xb1/0xc0 fs/read_write.c:427 vfs_read fs/read_write.c:461 [inline] vfs_read+0x143/0x2c0 fs/read_write.c:446 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 7292 Comm: syz-fuzzer Not tainted 5.3.0+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 3 ++- net/core/sock.c | 2 +- net/sctp/diag.c | 2 +- net/tipc/socket.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 88e63d64c698..35f6f7e0fdc2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1380,7 +1380,8 @@ static inline int tcp_win_from_space(const struct sock *sk, int space) /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { - return tcp_win_from_space(sk, sk->sk_rcvbuf - sk->sk_backlog.len - + return tcp_win_from_space(sk, sk->sk_rcvbuf - + READ_ONCE(sk->sk_backlog.len) - atomic_read(&sk->sk_rmem_alloc)); } diff --git a/net/core/sock.c b/net/core/sock.c index b7c5c6ea51ba..2a053999df11 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3210,7 +3210,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); - mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; + mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); } diff --git a/net/sctp/diag.c b/net/sctp/diag.c index fc9a4c6629ce..0851166b9175 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -175,7 +175,7 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); - mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; + mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7c736cfec57f..f8bbc4aab213 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3790,7 +3790,7 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf) i += scnprintf(buf + i, sz - i, " %d", sk->sk_sndbuf); i += scnprintf(buf + i, sz - i, " | %d", sk_rmem_alloc_get(sk)); i += scnprintf(buf + i, sz - i, " %d", sk->sk_rcvbuf); - i += scnprintf(buf + i, sz - i, " | %d\n", sk->sk_backlog.len); + i += scnprintf(buf + i, sz - i, " | %d\n", READ_ONCE(sk->sk_backlog.len)); if (dqueues & TIPC_DUMP_SK_SNDQ) { i += scnprintf(buf + i, sz - i, "sk_write_queue: "); From 2189624b3c5a6fb03416129e35c09aa5151b7392 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 26 Sep 2019 11:08:57 -0500 Subject: [PATCH 171/572] ACPI: PM: Drop Dell XPS13 9360 from LPS0 Idle _DSM blacklist This reverts part of commit 71630b7a832f ("ACPI / PM: Blacklist Low Power S0 Idle _DSM for Dell XPS13 9360") to remove the S0ix blacklist for the XPS 9360. The problems with this system occurred in one possible NVME SSD when putting system into s0ix. As the NVME sleep behavior has been adjusted in commit d916b1be94b6 ("nvme-pci: use host managed power state for suspend") this is expected to be now resolved. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=196907 Signed-off-by: Mario Limonciello Tested-by: Paul Menzel Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 9fa77d72ef27..2af937a8b1c5 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -361,19 +361,6 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "80E3"), }, }, - /* - * https://bugzilla.kernel.org/show_bug.cgi?id=196907 - * Some Dell XPS13 9360 cannot do suspend-to-idle using the Low Power - * S0 Idle firmware interface. - */ - { - .callback = init_default_s3, - .ident = "Dell XPS13 9360", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"), - }, - }, /* * ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using * the Low Power S0 Idle firmware interface (see From 65650b35133ff20f0c9ef0abd5c3c66dbce3ae57 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Oct 2019 01:29:10 +0200 Subject: [PATCH 172/572] cpufreq: Avoid cpufreq_suspend() deadlock on system shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is incorrect to set the cpufreq syscore shutdown callback pointer to cpufreq_suspend(), because that function cannot be run in the syscore stage of system shutdown for two reasons: (a) it may attempt to carry out actions depending on devices that have already been shut down at that point and (b) the RCU synchronization carried out by it may not be able to make progress then. The latter issue has been present since commit 45975c7d21a1 ("rcu: Define RCU-sched API in terms of RCU for Tree RCU PREEMPT builds"), but the former one has been there since commit 90de2a4aa9f3 ("cpufreq: suspend cpufreq governors on shutdown") regardless. Fix that by dropping cpufreq_syscore_ops altogether and making device_shutdown() call cpufreq_suspend() directly before shutting down devices, which is along the lines of what system-wide power management does. Fixes: 45975c7d21a1 ("rcu: Define RCU-sched API in terms of RCU for Tree RCU PREEMPT builds") Fixes: 90de2a4aa9f3 ("cpufreq: suspend cpufreq governors on shutdown") Reported-by: Ville Syrjälä Tested-by: Ville Syrjälä Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Cc: 4.0+ # 4.0+ --- drivers/base/core.c | 3 +++ drivers/cpufreq/cpufreq.c | 10 ---------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 2db62d98e395..7bd9cd366d41 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -3179,6 +3180,8 @@ void device_shutdown(void) wait_for_device_probe(); device_block_probing(); + cpufreq_suspend(); + spin_lock(&devices_kset->list_lock); /* * Walk the devices list backward, shutting down each in turn. diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c52d6fa32aac..bffc11b87247 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2737,14 +2737,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) } EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); -/* - * Stop cpufreq at shutdown to make sure it isn't holding any locks - * or mutexes when secondary CPUs are halted. - */ -static struct syscore_ops cpufreq_syscore_ops = { - .shutdown = cpufreq_suspend, -}; - struct kobject *cpufreq_global_kobject; EXPORT_SYMBOL(cpufreq_global_kobject); @@ -2756,8 +2748,6 @@ static int __init cpufreq_core_init(void) cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj); BUG_ON(!cpufreq_global_kobject); - register_syscore_ops(&cpufreq_syscore_ops); - return 0; } module_param(off, int, 0444); From f49249d58abdc12067d69f15d509487171148b30 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 8 Oct 2019 11:46:46 +0100 Subject: [PATCH 173/572] PM: sleep: include for pm_wq Include the for the definition of pm_wq to avoid the following warning: kernel/power/main.c:890:25: warning: symbol 'pm_wq' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/power/main.c b/kernel/power/main.c index e8710d179b35..e26de7af520b 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "power.h" From f2edbb6699b0bc6e4f789846b99007200546c6c2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 10 Oct 2019 15:55:33 +0530 Subject: [PATCH 174/572] opp: of: drop incorrect lockdep_assert_held() _find_opp_of_np() doesn't traverse the list of OPP tables but instead just the entries within an OPP table and so only requires to lock the OPP table itself. The lockdep_assert_held() was added there by mistake and isn't really required. Fixes: 5d6d106fa455 ("OPP: Populate required opp tables from "required-opps" property") Cc: v5.0+ # v5.0+ Reported-by: Niklas Cassel Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 1813f5ad5fa2..6dc41faf74b5 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -77,8 +77,6 @@ static struct dev_pm_opp *_find_opp_of_np(struct opp_table *opp_table, { struct dev_pm_opp *opp; - lockdep_assert_held(&opp_table_lock); - mutex_lock(&opp_table->lock); list_for_each_entry(opp, &opp_table->opp_list, node) { From 18380f52dbac230032a16545e67a6d90b548cf18 Mon Sep 17 00:00:00 2001 From: yu kuai Date: Sun, 29 Sep 2019 21:42:49 +0800 Subject: [PATCH 175/572] platform/x86: classmate-laptop: remove unused variable Fixes gcc '-Wunused-but-set-variable' warning: drivers/platform/x86/classmate-laptop.c: In function cmpc_accel_remove_v4: drivers/platform/x86/classmate-laptop.c:424:21: warning: variable accel set but not used [-Wunused-but-set-variable] drivers/platform/x86/classmate-laptop.c: In function cmpc_accel_remove: drivers/platform/x86/classmate-laptop.c:660:21: warning: variable accel set but not used [-Wunused-but-set-variable] In function cmpc_accel_remove_v4 and cmpc_accel_remove, variable accel is set but not used, so it can be removed. In that case, variable inputdev is set but not used and can be removed. Fixes: 7125587df4e8 ("classmate-laptop: Add support for Classmate V4 accelerometer.") Reported-by: Hulk Robot Signed-off-by: yu kuai Signed-off-by: Andy Shevchenko --- drivers/platform/x86/classmate-laptop.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/platform/x86/classmate-laptop.c b/drivers/platform/x86/classmate-laptop.c index 86cc2cc68fb5..af063f690846 100644 --- a/drivers/platform/x86/classmate-laptop.c +++ b/drivers/platform/x86/classmate-laptop.c @@ -420,12 +420,6 @@ failed_sensitivity: static int cmpc_accel_remove_v4(struct acpi_device *acpi) { - struct input_dev *inputdev; - struct cmpc_accel *accel; - - inputdev = dev_get_drvdata(&acpi->dev); - accel = dev_get_drvdata(&inputdev->dev); - device_remove_file(&acpi->dev, &cmpc_accel_sensitivity_attr_v4); device_remove_file(&acpi->dev, &cmpc_accel_g_select_attr_v4); return cmpc_remove_acpi_notify_device(acpi); @@ -656,12 +650,6 @@ failed_file: static int cmpc_accel_remove(struct acpi_device *acpi) { - struct input_dev *inputdev; - struct cmpc_accel *accel; - - inputdev = dev_get_drvdata(&acpi->dev); - accel = dev_get_drvdata(&inputdev->dev); - device_remove_file(&acpi->dev, &cmpc_accel_sensitivity_attr); return cmpc_remove_acpi_notify_device(acpi); } From 71eea7071583b04e9b796ee1d6f7a07334426495 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 10 Oct 2019 11:15:20 +0300 Subject: [PATCH 176/572] platform/x86: intel_punit_ipc: Avoid error message when retrieving IRQ Since the commit 7723f4c5ecdb ("driver core: platform: Add an error message to platform_get_irq*()") the platform_get_irq() started issuing an error message which is not what we want here. Switch to platform_get_irq_optional() to have only warning message provided by the driver. Fixes: 7723f4c5ecdb ("driver core: platform: Add an error message to platform_get_irq*()") Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel_punit_ipc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c index ab7ae1950867..fa97834fdb78 100644 --- a/drivers/platform/x86/intel_punit_ipc.c +++ b/drivers/platform/x86/intel_punit_ipc.c @@ -293,9 +293,8 @@ static int intel_punit_ipc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, punit_ipcdev); - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq < 0) { - punit_ipcdev->irq = 0; dev_warn(&pdev->dev, "Invalid IRQ, using polling mode\n"); } else { ret = devm_request_irq(&pdev->dev, irq, intel_punit_ioc, From b33a654a5bd6bba0052ca2d86098826b309f27b8 Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Fri, 27 Sep 2019 19:42:32 +0200 Subject: [PATCH 177/572] drm/tiny: Kconfig: Remove always-y THERMAL dep. from TINYDRM_REPAPER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [cherry-picked to drm-misc-fixes: drm-misc-next commit dfef959803c7] Commit 554b3529fe01 ("thermal/drivers/core: Remove the module Kconfig's option") changed the type of THERMAL from tristate to bool, so THERMAL || !THERMAL is now always y. Remove the redundant dependency. Discovered through Kconfiglib detecting a dependency loop. The C tools simplify the expression to y before running dependency loop detection, and so don't see it. Changing the type of THERMAL back to tristate makes the C tools detect the same loop. Not sure if running dep. loop detection after simplification can be called a bug. Fixing this nit unbreaks Kconfiglib on the kernel at least. Signed-off-by: Ulf Magnusson Signed-off-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/msgid/20190927174218.GA32085@huvuddator --- drivers/gpu/drm/tiny/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig index 504763423d46..a46ac284dd5e 100644 --- a/drivers/gpu/drm/tiny/Kconfig +++ b/drivers/gpu/drm/tiny/Kconfig @@ -63,7 +63,6 @@ config TINYDRM_REPAPER depends on DRM && SPI select DRM_KMS_HELPER select DRM_KMS_CMA_HELPER - depends on THERMAL || !THERMAL help DRM driver for the following Pervasive Displays panels: 1.44" TFT EPD Panel (E1144CS021) From a2f83e8b0c82c9500421a26c49eb198b25fcdea3 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 2 Oct 2019 06:14:17 -0400 Subject: [PATCH 178/572] dm snapshot: introduce account_start_copy() and account_end_copy() This simple refactoring moves code for modifying the semaphore cow_count into separate functions to prepare for changes that will extend these methods to provide for a more sophisticated mechanism for COW throttling. Signed-off-by: Mikulas Patocka Reviewed-by: Nikos Tsironis Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index f150f5c5492b..da3bd1794ee0 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1512,6 +1512,16 @@ static void snapshot_dtr(struct dm_target *ti) kfree(s); } +static void account_start_copy(struct dm_snapshot *s) +{ + down(&s->cow_count); +} + +static void account_end_copy(struct dm_snapshot *s) +{ + up(&s->cow_count); +} + /* * Flush a list of buffers. */ @@ -1732,7 +1742,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) rb_link_node(&pe->out_of_order_node, parent, p); rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); } - up(&s->cow_count); + account_end_copy(s); } /* @@ -1756,7 +1766,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ - down(&s->cow_count); + account_start_copy(s); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } @@ -1776,7 +1786,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io; - down(&s->cow_count); + account_start_copy(s); callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe); @@ -1866,7 +1876,7 @@ static void zero_callback(int read_err, unsigned long write_err, void *context) struct bio *bio = context; struct dm_snapshot *s = bio->bi_private; - up(&s->cow_count); + account_end_copy(s); bio->bi_status = write_err ? BLK_STS_IOERR : 0; bio_endio(bio); } @@ -1880,7 +1890,7 @@ static void zero_exception(struct dm_snapshot *s, struct dm_exception *e, dest.sector = bio->bi_iter.bi_sector; dest.count = s->store->chunk_size; - down(&s->cow_count); + account_start_copy(s); WARN_ON_ONCE(bio->bi_private); bio->bi_private = s; dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio); From b21555786f18cd77f2311ad89074533109ae3ffa Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 2 Oct 2019 06:15:53 -0400 Subject: [PATCH 179/572] dm snapshot: rework COW throttling to fix deadlock Commit 721b1d98fb517a ("dm snapshot: Fix excessive memory usage and workqueue stalls") introduced a semaphore to limit the maximum number of in-flight kcopyd (COW) jobs. The implementation of this throttling mechanism is prone to a deadlock: 1. One or more threads write to the origin device causing COW, which is performed by kcopyd. 2. At some point some of these threads might reach the s->cow_count semaphore limit and block in down(&s->cow_count), holding a read lock on _origins_lock. 3. Someone tries to acquire a write lock on _origins_lock, e.g., snapshot_ctr(), which blocks because the threads at step (2) already hold a read lock on it. 4. A COW operation completes and kcopyd runs dm-snapshot's completion callback, which ends up calling pending_complete(). pending_complete() tries to resubmit any deferred origin bios. This requires acquiring a read lock on _origins_lock, which blocks. This happens because the read-write semaphore implementation gives priority to writers, meaning that as soon as a writer tries to enter the critical section, no readers will be allowed in, until all writers have completed their work. So, pending_complete() waits for the writer at step (3) to acquire and release the lock. This writer waits for the readers at step (2) to release the read lock and those readers wait for pending_complete() (the kcopyd thread) to signal the s->cow_count semaphore: DEADLOCK. The above was thoroughly analyzed and documented by Nikos Tsironis as part of his initial proposal for fixing this deadlock, see: https://www.redhat.com/archives/dm-devel/2019-October/msg00001.html Fix this deadlock by reworking COW throttling so that it waits without holding any locks. Add a variable 'in_progress' that counts how many kcopyd jobs are running. A function wait_for_in_progress() will sleep if 'in_progress' is over the limit. It drops _origins_lock in order to avoid the deadlock. Reported-by: Guruswamy Basavaiah Reported-by: Nikos Tsironis Reviewed-by: Nikos Tsironis Tested-by: Nikos Tsironis Fixes: 721b1d98fb51 ("dm snapshot: Fix excessive memory usage and workqueue stalls") Cc: stable@vger.kernel.org # v5.0+ Depends-on: 4a3f111a73a8c ("dm snapshot: introduce account_start_copy() and account_end_copy()") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 78 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index da3bd1794ee0..4fb1a40e68a0 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -18,7 +18,6 @@ #include #include #include -#include #include "dm.h" @@ -107,8 +106,8 @@ struct dm_snapshot { /* The on disk metadata handler */ struct dm_exception_store *store; - /* Maximum number of in-flight COW jobs. */ - struct semaphore cow_count; + unsigned in_progress; + struct wait_queue_head in_progress_wait; struct dm_kcopyd_client *kcopyd_client; @@ -162,8 +161,8 @@ struct dm_snapshot { */ #define DEFAULT_COW_THRESHOLD 2048 -static int cow_threshold = DEFAULT_COW_THRESHOLD; -module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); +static unsigned cow_threshold = DEFAULT_COW_THRESHOLD; +module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644); MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, @@ -1327,7 +1326,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); + init_waitqueue_head(&s->in_progress_wait); s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { @@ -1509,17 +1508,54 @@ static void snapshot_dtr(struct dm_target *ti) dm_put_device(ti, s->origin); + WARN_ON(s->in_progress); + kfree(s); } static void account_start_copy(struct dm_snapshot *s) { - down(&s->cow_count); + spin_lock(&s->in_progress_wait.lock); + s->in_progress++; + spin_unlock(&s->in_progress_wait.lock); } static void account_end_copy(struct dm_snapshot *s) { - up(&s->cow_count); + spin_lock(&s->in_progress_wait.lock); + BUG_ON(!s->in_progress); + s->in_progress--; + if (likely(s->in_progress <= cow_threshold) && + unlikely(waitqueue_active(&s->in_progress_wait))) + wake_up_locked(&s->in_progress_wait); + spin_unlock(&s->in_progress_wait.lock); +} + +static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins) +{ + if (unlikely(s->in_progress > cow_threshold)) { + spin_lock(&s->in_progress_wait.lock); + if (likely(s->in_progress > cow_threshold)) { + /* + * NOTE: this throttle doesn't account for whether + * the caller is servicing an IO that will trigger a COW + * so excess throttling may result for chunks not required + * to be COW'd. But if cow_threshold was reached, extra + * throttling is unlikely to negatively impact performance. + */ + DECLARE_WAITQUEUE(wait, current); + __add_wait_queue(&s->in_progress_wait, &wait); + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&s->in_progress_wait.lock); + if (unlock_origins) + up_read(&_origins_lock); + io_schedule(); + remove_wait_queue(&s->in_progress_wait, &wait); + return false; + } + spin_unlock(&s->in_progress_wait.lock); + } + return true; } /* @@ -1537,7 +1573,7 @@ static void flush_bios(struct bio *bio) } } -static int do_origin(struct dm_dev *origin, struct bio *bio); +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit); /* * Flush a list of buffers. @@ -1550,7 +1586,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) while (bio) { n = bio->bi_next; bio->bi_next = NULL; - r = do_origin(s->origin, bio); + r = do_origin(s->origin, bio, false); if (r == DM_MAPIO_REMAPPED) generic_make_request(bio); bio = n; @@ -1926,6 +1962,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (!s->valid) return DM_MAPIO_KILL; + if (bio_data_dir(bio) == WRITE) { + while (unlikely(!wait_for_in_progress(s, false))) + ; /* wait_for_in_progress() has slept */ + } + down_read(&s->lock); dm_exception_table_lock(&lock); @@ -2122,7 +2163,7 @@ redirect_to_origin: if (bio_data_dir(bio) == WRITE) { up_write(&s->lock); - return do_origin(s->origin, bio); + return do_origin(s->origin, bio, false); } out_unlock: @@ -2497,15 +2538,24 @@ next_snapshot: /* * Called on a write from the origin driver. */ -static int do_origin(struct dm_dev *origin, struct bio *bio) +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit) { struct origin *o; int r = DM_MAPIO_REMAPPED; +again: down_read(&_origins_lock); o = __lookup_origin(origin->bdev); - if (o) + if (o) { + if (limit) { + struct dm_snapshot *s; + list_for_each_entry(s, &o->snapshots, list) + if (unlikely(!wait_for_in_progress(s, true))) + goto again; + } + r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio); + } up_read(&_origins_lock); return r; @@ -2618,7 +2668,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio) dm_accept_partial_bio(bio, available_sectors); /* Only tell snapshots if this is a write */ - return do_origin(o->dev, bio); + return do_origin(o->dev, bio, true); } /* From 11bcf5f78905b90baae8fb01e16650664ed0cb00 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 2 Apr 2019 11:30:37 +0800 Subject: [PATCH 180/572] drm/edid: Add 6 bpc quirk for SDC panel in Lenovo G50 Another panel that needs 6BPC quirk. BugLink: https://bugs.launchpad.net/bugs/1819968 Cc: # v4.8+ Reviewed-by: Alex Deucher Signed-off-by: Kai-Heng Feng Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20190402033037.21877-1-kai.heng.feng@canonical.com --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 82a4ceed3fcf..6b0177112e18 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -159,6 +159,9 @@ static const struct edid_quirk { /* Medion MD 30217 PG */ { "MED", 0x7b8, EDID_QUIRK_PREFER_LARGE_75 }, + /* Lenovo G50 */ + { "SDC", 18514, EDID_QUIRK_FORCE_6BPC }, + /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */ { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC }, From c3d79a83ca10d2026fd1cce096cde2d8e316592e Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Sun, 22 Sep 2019 15:49:00 +0800 Subject: [PATCH 181/572] dma-buf/resv: fix exclusive fence get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This causes kernel crash when testing lima driver. Cc: Christian König Fixes: b8c036dfc66f ("dma-buf: simplify reservation_object_get_fences_rcu a bit") Signed-off-by: Qiang Yu Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190922074900.853-1-yuq825@gmail.com Signed-off-by: Christian König --- drivers/dma-buf/dma-resv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 42a8f3f11681..709002515550 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -471,7 +471,7 @@ unlock: if (pfence_excl) *pfence_excl = fence_excl; else if (fence_excl) - shared[++shared_count] = fence_excl; + shared[shared_count++] = fence_excl; if (!shared_count) { kfree(shared); From f1e5aa6c13feb8ee47ff5d116bed0a10007686f1 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 21 Jul 2019 18:10:31 +0200 Subject: [PATCH 182/572] MAINTAINERS: Add BCM2711 to BCM2835 ARCH Clarify that BCM2711 belongs to the BCM2835 ARCH. Signed-off-by: Stefan Wahren Acked-by: Eric Anholt --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 296de2b51c83..14a939a9e1e7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3187,7 +3187,7 @@ N: bcm216* N: kona F: arch/arm/mach-bcm/ -BROADCOM BCM2835 ARM ARCHITECTURE +BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE M: Eric Anholt M: Stefan Wahren L: bcm-kernel-feedback-list@broadcom.com @@ -3195,6 +3195,7 @@ L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) T: git git://github.com/anholt/linux S: Maintained +N: bcm2711 N: bcm2835 F: drivers/staging/vc04_services From 851140ab0d083c78e5723a8b1cbd258f567a7aff Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 2 Oct 2019 11:28:02 +0100 Subject: [PATCH 183/572] ARM: 8908/1: add __always_inline to functions called from __get_user_check() KernelCI reports that bcm2835_defconfig is no longer booting since commit ac7c3e4ff401 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly") (https://lkml.org/lkml/2019/9/26/825). I also received a regression report from Nicolas Saenz Julienne (https://lkml.org/lkml/2019/9/27/263). This problem has cropped up on bcm2835_defconfig because it enables CONFIG_CC_OPTIMIZE_FOR_SIZE. The compiler tends to prefer not inlining functions with -Os. I was able to reproduce it with other boards and defconfig files by manually enabling CONFIG_CC_OPTIMIZE_FOR_SIZE. The __get_user_check() specifically uses r0, r1, r2 registers. So, uaccess_save_and_enable() and uaccess_restore() must be inlined. Otherwise, those register assignments would be entirely dropped, according to my analysis of the disassembly. Prior to commit 9012d011660e ("compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING"), the 'inline' marker was always enough for inlining functions, except on x86. Since that commit, all architectures can enable CONFIG_OPTIMIZE_INLINING. So, __always_inline is now the only guaranteed way of forcible inlining. I added __always_inline to 4 functions in the call-graph from the __get_user_check() macro. Fixes: 9012d011660e ("compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING") Reported-by: "kernelci.org bot" Reported-by: Nicolas Saenz Julienne Signed-off-by: Masahiro Yamada Tested-by: Nicolas Saenz Julienne Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 8 ++++---- arch/arm/include/asm/uaccess.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 567dbede4785..f1d0a7807cd0 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -82,7 +82,7 @@ #ifndef __ASSEMBLY__ #ifdef CONFIG_CPU_CP15_MMU -static inline unsigned int get_domain(void) +static __always_inline unsigned int get_domain(void) { unsigned int domain; @@ -94,7 +94,7 @@ static inline unsigned int get_domain(void) return domain; } -static inline void set_domain(unsigned val) +static __always_inline void set_domain(unsigned int val) { asm volatile( "mcr p15, 0, %0, c3, c0 @ set domain" @@ -102,12 +102,12 @@ static inline void set_domain(unsigned val) isb(); } #else -static inline unsigned int get_domain(void) +static __always_inline unsigned int get_domain(void) { return 0; } -static inline void set_domain(unsigned val) +static __always_inline void set_domain(unsigned int val) { } #endif diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 303248e5b990..98c6b91be4a8 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -22,7 +22,7 @@ * perform such accesses (eg, via list poison values) which could then * be exploited for priviledge escalation. */ -static inline unsigned int uaccess_save_and_enable(void) +static __always_inline unsigned int uaccess_save_and_enable(void) { #ifdef CONFIG_CPU_SW_DOMAIN_PAN unsigned int old_domain = get_domain(); @@ -37,7 +37,7 @@ static inline unsigned int uaccess_save_and_enable(void) #endif } -static inline void uaccess_restore(unsigned int flags) +static __always_inline void uaccess_restore(unsigned int flags) { #ifdef CONFIG_CPU_SW_DOMAIN_PAN /* Restore the user access mask */ From 4c0742f65b4ee466546fd24b71b56516cacd4613 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 10 Oct 2019 10:12:20 +0100 Subject: [PATCH 184/572] ARM: 8914/1: NOMMU: Fix exc_ret for XIP It was reported that 72cd4064fcca "NOMMU: Toggle only bits in EXC_RETURN we are really care of" breaks NOMMU+XIP combination. It happens because saved EXC_RETURN gets overwritten when data section is relocated. The fix is to propagate EXC_RETURN via register and let relocation code to commit that value into memory. Fixes: 72cd4064fcca ("ARM: 8830/1: NOMMU: Toggle only bits in EXC_RETURN we are really care of") Reported-by: afzal mohammed Tested-by: afzal mohammed Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/kernel/head-common.S | 5 +++-- arch/arm/kernel/head-nommu.S | 2 ++ arch/arm/mm/proc-v7m.S | 5 ++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index a7810be07da1..4a3982812a40 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -68,7 +68,7 @@ ENDPROC(__vet_atags) * The following fragment of code is executed with the MMU on in MMU mode, * and uses absolute addresses; this is not position independent. * - * r0 = cp#15 control register + * r0 = cp#15 control register (exc_ret for M-class) * r1 = machine ID * r2 = atags/dtb pointer * r9 = processor ID @@ -137,7 +137,8 @@ __mmap_switched_data: #ifdef CONFIG_CPU_CP15 .long cr_alignment @ r3 #else - .long 0 @ r3 +M_CLASS(.long exc_ret) @ r3 +AR_CLASS(.long 0) @ r3 #endif .size __mmap_switched_data, . - __mmap_switched_data diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index afa350f44dea..0fc814bbc34b 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -201,6 +201,8 @@ M_CLASS(streq r3, [r12, #PMSAv8_MAIR1]) bic r0, r0, #V7M_SCB_CCR_IC #endif str r0, [r12, V7M_SCB_CCR] + /* Pass exc_ret to __mmap_switched */ + mov r0, r10 #endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */ ret lr ENDPROC(__after_proc_init) diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 1448f144e7fb..efebf4120a0c 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -136,9 +136,8 @@ __v7m_setup_cont: cpsie i svc #0 1: cpsid i - ldr r0, =exc_ret - orr lr, lr, #EXC_RET_THREADMODE_PROCESSSTACK - str lr, [r0] + /* Calculate exc_ret */ + orr r10, lr, #EXC_RET_THREADMODE_PROCESSSTACK ldmia sp, {r0-r3, r12} str r5, [r12, #11 * 4] @ restore the original SVC vector entry mov lr, r6 @ restore LR From 5234c14531152702a9f3e575cb552b7e9cea9f94 Mon Sep 17 00:00:00 2001 From: Soeren Moch Date: Fri, 4 Oct 2019 22:32:13 +0200 Subject: [PATCH 185/572] arm64: dts: rockchip: fix RockPro64 sdmmc settings According to the RockPro64 schematic [1] the rk3399 sdmmc controller is connected to a microSD (TF card) slot. Remove the cap-mmc-highspeed property of the sdmmc controller, since no mmc card can be connected here. [1] http://files.pine64.org/doc/rockpro64/rockpro64_v21-SCH.pdf Fixes: e4f3fb490967 ("arm64: dts: rockchip: add initial dts support for Rockpro64") Signed-off-by: Soeren Moch Link: https://lore.kernel.org/r/20191004203213.4995-1-smoch@web.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts index aa06a6587a11..e544deb61d28 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts @@ -624,7 +624,6 @@ &sdmmc { bus-width = <4>; - cap-mmc-highspeed; cap-sd-highspeed; cd-gpios = <&gpio0 7 GPIO_ACTIVE_LOW>; disable-wp; From 389206e806d892d36de0df6e7b07721432957801 Mon Sep 17 00:00:00 2001 From: Vivek Unune Date: Sat, 28 Sep 2019 23:22:30 -0400 Subject: [PATCH 186/572] arm64: dts: rockchip: Fix usb-c on Hugsun X99 TV Box Fix usb-c on X99 TV Box. Tested with armbian w/ kernel 5.3 Signed-off-by: Vivek Unune Link: https://lore.kernel.org/r/20190929032230.24628-1-npcomplete13@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts b/arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts index 0d1f5f9a0de9..c133e8d64b2a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts @@ -644,7 +644,7 @@ status = "okay"; u2phy0_host: host-port { - phy-supply = <&vcc5v0_host>; + phy-supply = <&vcc5v0_typec>; status = "okay"; }; @@ -712,7 +712,7 @@ &usbdrd_dwc3_0 { status = "okay"; - dr_mode = "otg"; + dr_mode = "host"; }; &usbdrd3_1 { From a9082575f8d09a3a55f300d04f46ef5243e99688 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 8 Oct 2019 12:49:54 -0700 Subject: [PATCH 187/572] arm64: dts: rockchip: Fix override mode for rk3399-kevin panel When I re-posted Sean's original commit to add the override mode for the kevin panel, for some reason I didn't notice that the pixel clock wasn't quite right. Looking at /sys/kernel/debug/clk/clk_summary on downstream kernels it can be seen that the VOP clock is supposed to be 266,666,667 Hz achieved by dividing the 800 MHz PLL by 3. Looking at history, it seems that even Sean's first patch [1] had this funny clock rate. I'm not sure where it came from since the commit message specifically mentioned 26666 kHz and the Chrome OS tree [2] can be seen to request 266667 kHz. In any case, let's fix it up. This together with my patch [3] to do the proper rounding when setting the clock rate makes the VOP clock more proper as seen in /sys/kernel/debug/clk/clk_summary. [1] https://lore.kernel.org/r/20180206165626.37692-4-seanpaul@chromium.org [2] https://chromium.googlesource.com/chromiumos/third_party/kernel/+/chromeos-4.4/drivers/gpu/drm/panel/panel-simple.c#1172 [3] https://lkml.kernel.org/r/20191003114726.v2.1.Ib233b3e706cf6317858384264d5b0ed35657456e@changeid Fixes: 84ebd2da6d04 ("arm64: dts: rockchip: Specify override mode for kevin panel") Cc: Sean Paul Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20191008124949.1.I674acd441997dd0690c86c9003743aacda1cf5dd@changeid Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts index e152b0ca0290..b8066868a3fe 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts @@ -44,7 +44,7 @@ power-supply = <&pp3300_disp>; panel-timing { - clock-frequency = <266604720>; + clock-frequency = <266666667>; hactive = <2400>; hfront-porch = <48>; hback-porch = <84>; From 4ebcb113edcc498888394821bca2e60ef89c6ff3 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 9 Oct 2019 20:55:48 +0200 Subject: [PATCH 188/572] r8169: fix jumbo packet handling on resume from suspend Mariusz reported that invalid packets are sent after resume from suspend if jumbo packets are active. It turned out that his BIOS resets chip settings to non-jumbo on resume. Most chip settings are re-initialized on resume from suspend by calling rtl_hw_start(), so let's add configuring jumbo to this function. There's nothing wrong with the commit marked as fixed, it's just the first one where the patch applies cleanly. Fixes: 7366016d2d4c ("r8169: read common register for PCI commit") Reported-by: Mariusz Bialonczyk Tested-by: Mariusz Bialonczyk Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 35 +++++++---------------- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 74f81fe03810..350b0d949611 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4146,6 +4146,14 @@ static void rtl_hw_jumbo_disable(struct rtl8169_private *tp) rtl_lock_config_regs(tp); } +static void rtl_jumbo_config(struct rtl8169_private *tp, int mtu) +{ + if (mtu > ETH_DATA_LEN) + rtl_hw_jumbo_enable(tp); + else + rtl_hw_jumbo_disable(tp); +} + DECLARE_RTL_COND(rtl_chipcmd_cond) { return RTL_R8(tp, ChipCmd) & CmdReset; @@ -4442,11 +4450,6 @@ static void rtl8168g_set_pause_thresholds(struct rtl8169_private *tp, static void rtl_hw_start_8168bb(struct rtl8169_private *tp) { RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en); - - if (tp->dev->mtu <= ETH_DATA_LEN) { - rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B | - PCI_EXP_DEVCTL_NOSNOOP_EN); - } } static void rtl_hw_start_8168bef(struct rtl8169_private *tp) @@ -4462,9 +4465,6 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp) RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en); - if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - rtl_disable_clock_request(tp); } @@ -4490,9 +4490,6 @@ static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp) rtl_set_def_aspm_entry_latency(tp); RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en); - - if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); } static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp) @@ -4503,9 +4500,6 @@ static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp) /* Magic. */ RTL_W8(tp, DBG_REG, 0x20); - - if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); } static void rtl_hw_start_8168c_1(struct rtl8169_private *tp) @@ -4611,9 +4605,6 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp) rtl_ephy_init(tp, e_info_8168e_1); - if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - rtl_disable_clock_request(tp); /* Reset tx FIFO pointer */ @@ -4636,9 +4627,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) rtl_ephy_init(tp, e_info_8168e_2); - if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000); rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000); rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06); @@ -5485,6 +5473,8 @@ static void rtl_hw_start(struct rtl8169_private *tp) rtl_set_rx_tx_desc_registers(tp); rtl_lock_config_regs(tp); + rtl_jumbo_config(tp, tp->dev->mtu); + /* Initially a 10 us delay. Turned it into a PCI commit. - FR */ RTL_R16(tp, CPlusCmd); RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); @@ -5498,10 +5488,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu) { struct rtl8169_private *tp = netdev_priv(dev); - if (new_mtu > ETH_DATA_LEN) - rtl_hw_jumbo_enable(tp); - else - rtl_hw_jumbo_disable(tp); + rtl_jumbo_config(tp, new_mtu); dev->mtu = new_mtu; netdev_update_features(dev); From 2168da4594040529f6a6cb413c22668a4402f83c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 9 Oct 2019 12:18:31 -0700 Subject: [PATCH 189/572] net: update net_dim documentation after rename Commit 8960b38932be ("linux/dim: Rename externally used net_dim members") renamed the net_dim API, removing the "net_" prefix from the structures and functions. The patch didn't update the net_dim.txt documentation file. Fix the documentation so that its examples match the current code. Fixes: 8960b38932be ("linux/dim: Rename externally used net_dim members", 2019-06-25) Fixes: c002bd529d71 ("linux/dim: Rename externally exposed macros", 2019-06-25) Fixes: 4f75da3666c0 ("linux/dim: Move implementation to .c files") Cc: Tal Gilboa Signed-off-by: Jacob Keller Signed-off-by: Jakub Kicinski --- Documentation/networking/net_dim.txt | 36 ++++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Documentation/networking/net_dim.txt b/Documentation/networking/net_dim.txt index 9cb31c5e2dcd..9bdb7d5a3ba3 100644 --- a/Documentation/networking/net_dim.txt +++ b/Documentation/networking/net_dim.txt @@ -92,16 +92,16 @@ under some conditions. Part III: Registering a Network Device to DIM ============================================== -Net DIM API exposes the main function net_dim(struct net_dim *dim, -struct net_dim_sample end_sample). This function is the entry point to the Net +Net DIM API exposes the main function net_dim(struct dim *dim, +struct dim_sample end_sample). This function is the entry point to the Net DIM algorithm and has to be called every time the driver would like to check if it should change interrupt moderation parameters. The driver should provide two -data structures: struct net_dim and struct net_dim_sample. Struct net_dim +data structures: struct dim and struct dim_sample. Struct dim describes the state of DIM for a specific object (RX queue, TX queue, other queues, etc.). This includes the current selected profile, previous data samples, the callback function provided by the driver and more. -Struct net_dim_sample describes a data sample, which will be compared to the -data sample stored in struct net_dim in order to decide on the algorithm's next +Struct dim_sample describes a data sample, which will be compared to the +data sample stored in struct dim in order to decide on the algorithm's next step. The sample should include bytes, packets and interrupts, measured by the driver. @@ -110,9 +110,9 @@ main net_dim() function. The recommended method is to call net_dim() on each interrupt. Since Net DIM has a built-in moderation and it might decide to skip iterations under certain conditions, there is no need to moderate the net_dim() calls as well. As mentioned above, the driver needs to provide an object of type -struct net_dim to the net_dim() function call. It is advised for each entity -using Net DIM to hold a struct net_dim as part of its data structure and use it -as the main Net DIM API object. The struct net_dim_sample should hold the latest +struct dim to the net_dim() function call. It is advised for each entity +using Net DIM to hold a struct dim as part of its data structure and use it +as the main Net DIM API object. The struct dim_sample should hold the latest bytes, packets and interrupts count. No need to perform any calculations, just include the raw data. @@ -132,19 +132,19 @@ usage is not complete but it should make the outline of the usage clear. my_driver.c: -#include +#include /* Callback for net DIM to schedule on a decision to change moderation */ void my_driver_do_dim_work(struct work_struct *work) { - /* Get struct net_dim from struct work_struct */ - struct net_dim *dim = container_of(work, struct net_dim, - work); + /* Get struct dim from struct work_struct */ + struct dim *dim = container_of(work, struct dim, + work); /* Do interrupt moderation related stuff */ ... /* Signal net DIM work is done and it should move to next iteration */ - dim->state = NET_DIM_START_MEASURE; + dim->state = DIM_START_MEASURE; } /* My driver's interrupt handler */ @@ -152,13 +152,13 @@ int my_driver_handle_interrupt(struct my_driver_entity *my_entity, ...) { ... /* A struct to hold current measured data */ - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; ... /* Initiate data sample struct with current data */ - net_dim_sample(my_entity->events, - my_entity->packets, - my_entity->bytes, - &dim_sample); + dim_update_sample(my_entity->events, + my_entity->packets, + my_entity->bytes, + &dim_sample); /* Call net DIM */ net_dim(&my_entity->dim, dim_sample); ... From 29ee2701529e1905c0e948688f9688c68c8d4ea4 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 10 Oct 2019 10:16:09 +0200 Subject: [PATCH 190/572] net/smc: fix SMCD link group creation with VLAN id If creation of an SMCD link group with VLAN id fails, the initial smc_ism_get_vlan() step has to be reverted as well. Fixes: c6ba7c9ba43d ("net/smc: add base infrastructure for SMC-D and ISM") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 4ca50ddf8d16..88556f0251ab 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -213,7 +213,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); if (!lgr) { rc = SMC_CLC_DECL_MEM; - goto out; + goto ism_put_vlan; } lgr->is_smcd = ini->is_smcd; lgr->sync_err = 0; @@ -289,6 +289,9 @@ clear_llc_lnk: smc_llc_link_clear(lnk); free_lgr: kfree(lgr); +ism_put_vlan: + if (ini->is_smcd && ini->vlan_id) + smc_ism_put_vlan(ini->ism_dev, ini->vlan_id); out: if (rc < 0) { if (rc == -ENOMEM) From 882dcfe5a1785c20f45820cbe6fec4b8b647c946 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 10 Oct 2019 10:16:10 +0200 Subject: [PATCH 191/572] net/smc: receive returns without data smc_cdc_rxed_any_close_or_senddone() is used as an end condition for the receive loop. This conflicts with smc_cdc_msg_recv_action() which could run in parallel and set the bits checked by smc_cdc_rxed_any_close_or_senddone() before the receive is processed. In that case we could return from receive with no data, although data is available. The same applies to smc_rx_wait(). Fix this by checking for RCV_SHUTDOWN only, which is set in smc_cdc_msg_recv_action() after the receive was actually processed. Fixes: 952310ccf2d8 ("smc: receive data from RMBE") Reviewed-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_rx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 413a6abf227e..000002642288 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -211,8 +211,7 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo, rc = sk_wait_event(sk, timeo, sk->sk_err || sk->sk_shutdown & RCV_SHUTDOWN || - fcrit(conn) || - smc_cdc_rxed_any_close_or_senddone(conn), + fcrit(conn), &wait); remove_wait_queue(sk_sleep(sk), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); @@ -310,7 +309,6 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, smc_rx_update_cons(smc, 0); if (sk->sk_shutdown & RCV_SHUTDOWN || - smc_cdc_rxed_any_close_or_senddone(conn) || conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) break; From 107529e31a87acd475ff6a0f82745821b8f70fec Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 10 Oct 2019 10:16:11 +0200 Subject: [PATCH 192/572] net/smc: receive pending data after RCV_SHUTDOWN smc_rx_recvmsg() first checks if data is available, and then if RCV_SHUTDOWN is set. There is a race when smc_cdc_msg_recv_action() runs in between these 2 checks, receives data and sets RCV_SHUTDOWN. In that case smc_rx_recvmsg() would return from receive without to process the available data. Fix that with a final check for data available if RCV_SHUTDOWN is set. Move the check for data into a function and call it twice. And use the existing helper smc_rx_data_available(). Fixes: 952310ccf2d8 ("smc: receive data from RMBE") Reviewed-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_rx.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 000002642288..97e8369002d7 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -261,6 +261,18 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len, return -EAGAIN; } +static bool smc_rx_recvmsg_data_available(struct smc_sock *smc) +{ + struct smc_connection *conn = &smc->conn; + + if (smc_rx_data_available(conn)) + return true; + else if (conn->urg_state == SMC_URG_VALID) + /* we received a single urgent Byte - skip */ + smc_rx_update_cons(smc, 0); + return false; +} + /* smc_rx_recvmsg - receive data from RMBE * @msg: copy data to receive buffer * @pipe: copy data to pipe if set - indicates splice() call @@ -302,15 +314,18 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, if (read_done >= target || (pipe && read_done)) break; - if (atomic_read(&conn->bytes_to_rcv)) + if (smc_rx_recvmsg_data_available(smc)) goto copy; - else if (conn->urg_state == SMC_URG_VALID) - /* we received a single urgent Byte - skip */ - smc_rx_update_cons(smc, 0); if (sk->sk_shutdown & RCV_SHUTDOWN || - conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) + conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) { + /* smc_cdc_msg_recv_action() could have run after + * above smc_rx_recvmsg_data_available() + */ + if (smc_rx_recvmsg_data_available(smc)) + goto copy; break; + } if (read_done) { if (sk->sk_err || From c461e8df0c9bc46b3696dfb5d1df2f09d755af53 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 26 Sep 2019 13:43:10 -0400 Subject: [PATCH 193/572] tools/virtio: more stubs fix test module build. Signed-off-by: Michael S. Tsirkin --- tools/virtio/crypto/hash.h | 0 tools/virtio/linux/dma-mapping.h | 2 ++ 2 files changed, 2 insertions(+) create mode 100644 tools/virtio/crypto/hash.h diff --git a/tools/virtio/crypto/hash.h b/tools/virtio/crypto/hash.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h index f91aeb5fe571..8f41cd6bd5c0 100644 --- a/tools/virtio/linux/dma-mapping.h +++ b/tools/virtio/linux/dma-mapping.h @@ -29,4 +29,6 @@ enum dma_data_direction { #define dma_unmap_single(...) do { } while (0) #define dma_unmap_page(...) do { } while (0) +#define dma_max_mapping_size(...) SIZE_MAX + #endif From 86109a691a454e08cbe0356400268cb2a81f1997 Mon Sep 17 00:00:00 2001 From: Chris von Recklinghausen Date: Thu, 10 Oct 2019 13:22:47 -0400 Subject: [PATCH 194/572] arm64: Fix kcore macros after 52-bit virtual addressing fallout We export the entire kernel address space (i.e. the whole of the TTBR1 address range) via /proc/kcore. The kc_vaddr_to_offset() and kc_offset_to_vaddr() macros are intended to convert between a kernel virtual address and its offset relative to the start of the TTBR1 address space. Prior to commit: 14c127c957c1c607 ("arm64: mm: Flip kernel VA space") ... the offset was calculated relative to VA_START, which at the time was the start of the TTBR1 address space. At this time, PAGE_OFFSET pointed to the high half of the TTBR1 address space where arm64's linear map lived. That commit swapped the position of VA_START and PAGE_OFFSET, but failed to update kc_vaddr_to_offset() or kc_offset_to_vaddr(), so since then the two macros behave incorrectly. Note that VA_START was subsequently renamed to PAGE_END in commit: 77ad4ce69321abbe ("arm64: memory: rename VA_START to PAGE_END") As the generic implementations of the two macros calculate the offset relative to PAGE_OFFSET (which is now the start of the TTBR1 address space), we can delete the arm64 implementation and use those. Fixes: 14c127c957c1c607 ("arm64: mm: Flip kernel VA space") Reviewed-by: James Morse Reviewed-by: Mark Rutland Signed-off-by: Chris von Recklinghausen Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7576df00eb50..8330810f699e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -876,9 +876,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) -#define kc_vaddr_to_offset(v) ((v) & ~PAGE_END) -#define kc_offset_to_vaddr(o) ((o) | PAGE_END) - #ifdef CONFIG_ARM64_PA_BITS_52 #define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52) #else From 2aa85f246c181b1fa89f27e8e20c5636426be624 Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Tue, 24 Sep 2019 16:03:55 -0500 Subject: [PATCH 195/572] x86/boot/64: Make level2_kernel_pgt pages invalid outside kernel area Our hardware (UV aka Superdome Flex) has address ranges marked reserved by the BIOS. Access to these ranges is caught as an error, causing the BIOS to halt the system. Initial page tables mapped a large range of physical addresses that were not checked against the list of BIOS reserved addresses, and sometimes included reserved addresses in part of the mapped range. Including the reserved range in the map allowed processor speculative accesses to the reserved range, triggering a BIOS halt. Used early in booting, the page table level2_kernel_pgt addresses 1 GiB divided into 2 MiB pages, and it was set up to linearly map a full 1 GiB of physical addresses that included the physical address range of the kernel image, as chosen by KASLR. But this also included a large range of unused addresses on either side of the kernel image. And unlike the kernel image's physical address range, this extra mapped space was not checked against the BIOS tables of usable RAM addresses. So there were times when the addresses chosen by KASLR would result in processor accessible mappings of BIOS reserved physical addresses. The kernel code did not directly access any of this extra mapped space, but having it mapped allowed the processor to issue speculative accesses into reserved memory, causing system halts. This was encountered somewhat rarely on a normal system boot, and much more often when starting the crash kernel if "crashkernel=512M,high" was specified on the command line (this heavily restricts the physical address of the crash kernel, in our case usually within 1 GiB of reserved space). The solution is to invalidate the pages of this table outside the kernel image's space before the page table is activated. It fixes this problem on our hardware. [ bp: Touchups. ] Signed-off-by: Steve Wahl Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Acked-by: Kirill A. Shutemov Cc: Baoquan He Cc: Brijesh Singh Cc: dimitri.sivanich@hpe.com Cc: Feng Tang Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jordan Borgner Cc: Juergen Gross Cc: mike.travis@hpe.com Cc: russ.anderson@hpe.com Cc: stable@vger.kernel.org Cc: Thomas Gleixner Cc: x86-ml Cc: Zhenzhong Duan Link: https://lkml.kernel.org/r/9c011ee51b081534a7a15065b1681d200298b530.1569358539.git.steve.wahl@hpe.com --- arch/x86/kernel/head64.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 29ffa495bd1c..206a4b6144c2 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -222,13 +222,31 @@ unsigned long __head __startup_64(unsigned long physaddr, * we might write invalid pmds, when the kernel is relocated * cleanup_highmap() fixes this up along with the mappings * beyond _end. + * + * Only the region occupied by the kernel image has so far + * been checked against the table of usable memory regions + * provided by the firmware, so invalidate pages outside that + * region. A page table entry that maps to a reserved area of + * memory would allow processor speculation into that area, + * and on some hardware (particularly the UV platform) even + * speculative access to some reserved areas is caught as an + * error, causing the BIOS to halt the system. */ pmd = fixup_pointer(level2_kernel_pgt, physaddr); - for (i = 0; i < PTRS_PER_PMD; i++) { + + /* invalidate pages before the kernel image */ + for (i = 0; i < pmd_index((unsigned long)_text); i++) + pmd[i] &= ~_PAGE_PRESENT; + + /* fixup pages that are part of the kernel image */ + for (; i <= pmd_index((unsigned long)_end); i++) if (pmd[i] & _PAGE_PRESENT) pmd[i] += load_delta; - } + + /* invalidate pages after the kernel image */ + for (; i < PTRS_PER_PMD; i++) + pmd[i] &= ~_PAGE_PRESENT; /* * Fixup phys_base - remove the memory encryption mask to obtain From 1869dbe87cb94dc9a218ae1d9301dea3678bd4ff Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Tue, 24 Sep 2019 16:04:31 -0500 Subject: [PATCH 196/572] x86/boot/64: Round memory hole size up to next PMD page The kernel image map is created using PMD pages, which can include some extra space beyond what's actually needed. Round the size of the memory hole we search for up to the next PMD boundary, to be certain all of the space to be mapped is usable RAM and includes no reserved areas. Signed-off-by: Steve Wahl Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Acked-by: Kirill A. Shutemov Cc: Baoquan He Cc: Brijesh Singh Cc: dimitri.sivanich@hpe.com Cc: Feng Tang Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jordan Borgner Cc: Juergen Gross Cc: mike.travis@hpe.com Cc: russ.anderson@hpe.com Cc: Thomas Gleixner Cc: x86-ml Cc: Zhenzhong Duan Link: https://lkml.kernel.org/r/df4f49f05c0c27f108234eb93db5c613d09ea62e.1569358539.git.steve.wahl@hpe.com --- arch/x86/boot/compressed/misc.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 53ac0cb2396d..9652d5c2afda 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -345,6 +345,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, { const unsigned long kernel_total_size = VO__end - VO__text; unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + unsigned long needed_size; /* Retain x86 boot parameters pointer passed from startup_32/64. */ boot_params = rmode; @@ -379,26 +380,38 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; + /* + * The memory hole needed for the kernel is the larger of either + * the entire decompressed kernel plus relocation table, or the + * entire decompressed kernel plus .bss and .brk sections. + * + * On X86_64, the memory is mapped with PMD pages. Round the + * size up so that the full extent of PMD pages mapped is + * included in the check against the valid memory table + * entries. This ensures the full mapped area is usable RAM + * and doesn't include any reserved areas. + */ + needed_size = max(output_len, kernel_total_size); +#ifdef CONFIG_X86_64 + needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN); +#endif + /* Report initial kernel position details. */ debug_putaddr(input_data); debug_putaddr(input_len); debug_putaddr(output); debug_putaddr(output_len); debug_putaddr(kernel_total_size); + debug_putaddr(needed_size); #ifdef CONFIG_X86_64 /* Report address of 32-bit trampoline */ debug_putaddr(trampoline_32bit); #endif - /* - * The memory hole needed for the kernel is the larger of either - * the entire decompressed kernel plus relocation table, or the - * entire decompressed kernel plus .bss and .brk sections. - */ choose_random_location((unsigned long)input_data, input_len, (unsigned long *)&output, - max(output_len, kernel_total_size), + needed_size, &virt_addr); /* Validate memory location choices. */ From 44db1216efe37bf670f8d1019cdc41658d84baf5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 9 Oct 2019 17:43:45 +0100 Subject: [PATCH 197/572] Btrfs: add missing extents release on file extent cluster relocation error If we error out when finding a page at relocate_file_extent_cluster(), we need to release the outstanding extents counter on the relocation inode, set by the previous call to btrfs_delalloc_reserve_metadata(), otherwise the inode's block reserve size can never decrease to zero and metadata space is leaked. Therefore add a call to btrfs_delalloc_release_extents() in case we can't find the target page. Fixes: 8b62f87bad9c ("Btrfs: rework outstanding_extents") CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 00504657b602..7b883239fa7e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3277,6 +3277,8 @@ static int relocate_file_extent_cluster(struct inode *inode, if (!page) { btrfs_delalloc_release_metadata(BTRFS_I(inode), PAGE_SIZE, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), + PAGE_SIZE, true); ret = -ENOMEM; goto out; } From 78e31c42261779a01bc73472d0f65f15378e9de3 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 11 Oct 2019 06:39:39 -0700 Subject: [PATCH 198/572] drm/msm/dsi: Implement reset correctly On msm8998, vblank timeouts are observed because the DSI controller is not reset properly, which ends up stalling the MDP. This is because the reset logic is not correct per the hardware documentation. The documentation states that after asserting reset, software should wait some time (no indication of how long), or poll the status register until it returns 0 before deasserting reset. wmb() is insufficient for this purpose since it just ensures ordering, not timing between writes. Since asserting and deasserting reset occurs on the same register, ordering is already guaranteed by the architecture, making the wmb extraneous. Since we would define a timeout for polling the status register to avoid a possible infinite loop, lets just use a static delay of 20 ms, since 16.666 ms is the time available to process one frame at 60 fps. Fixes: a689554ba6ed ("drm/msm: Initial add DSI connector support") Cc: Hai Li Cc: Rob Clark Signed-off-by: Jeffrey Hugo Reviewed-by: Sean Paul [seanpaul renamed RESET_DELAY to DSI_RESET_TOGGLE_DELAY_MS] Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20191011133939.16551-1-jeffrey.l.hugo@gmail.com --- drivers/gpu/drm/msm/dsi/dsi_host.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 663ff9f4fac9..1e7b1be25bb0 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -26,6 +26,8 @@ #include "dsi_cfg.h" #include "msm_kms.h" +#define DSI_RESET_TOGGLE_DELAY_MS 20 + static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor) { u32 ver; @@ -986,7 +988,7 @@ static void dsi_sw_reset(struct msm_dsi_host *msm_host) wmb(); /* clocks need to be enabled before reset */ dsi_write(msm_host, REG_DSI_RESET, 1); - wmb(); /* make sure reset happen */ + msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */ dsi_write(msm_host, REG_DSI_RESET, 0); } @@ -1396,7 +1398,7 @@ static void dsi_sw_reset_restore(struct msm_dsi_host *msm_host) /* dsi controller can only be reset while clocks are running */ dsi_write(msm_host, REG_DSI_RESET, 1); - wmb(); /* make sure reset happen */ + msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */ dsi_write(msm_host, REG_DSI_RESET, 0); wmb(); /* controller out of reset */ dsi_write(msm_host, REG_DSI_CTRL, data0); From 4b654acdae850f48b8250b9a578a4eaa518c7a6f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 10 Oct 2019 10:39:26 +0800 Subject: [PATCH 199/572] btrfs: block-group: Fix a memory leak due to missing btrfs_put_block_group() In btrfs_read_block_groups(), if we have an invalid block group which has mixed type (DATA|METADATA) while the fs doesn't have MIXED_GROUPS feature, we error out without freeing the block group cache. This patch will add the missing btrfs_put_block_group() to prevent memory leak. Note for stable backports: the file to patch in versions <= 5.3 is fs/btrfs/extent-tree.c Fixes: 49303381f19a ("Btrfs: bail out if block group has different mixed flag") CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Anand Jain Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index bf7e3f23bba7..670700cb1110 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1761,6 +1761,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) btrfs_err(info, "bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups", cache->key.objectid); + btrfs_put_block_group(cache); ret = -EINVAL; goto error; } From 363c53875aef8fce69d4a2d0873919ccc7d9e2ad Mon Sep 17 00:00:00 2001 From: Evan Green Date: Fri, 11 Oct 2019 17:22:09 -0700 Subject: [PATCH 200/572] Input: synaptics-rmi4 - avoid processing unknown IRQs rmi_process_interrupt_requests() calls handle_nested_irq() for each interrupt status bit it finds. If the irq domain mapping for this bit had not yet been set up, then it ends up calling handle_nested_irq(0), which causes a NULL pointer dereference. There's already code that masks the irq_status bits coming out of the hardware with current_irq_mask, presumably to avoid this situation. However current_irq_mask seems to more reflect the actual mask set in the hardware rather than the IRQs software has set up and registered for. For example, in rmi_driver_reset_handler(), the current_irq_mask is initialized based on what is read from the hardware. If the reset value of this mask enables IRQs that Linux has not set up yet, then we end up in this situation. There appears to be a third unused bitmask that used to serve this purpose, fn_irq_bits. Use that bitmask instead of current_irq_mask to avoid calling handle_nested_irq() on IRQs that have not yet been set up. Signed-off-by: Evan Green Reviewed-by: Andrew Duggan Link: https://lore.kernel.org/r/20191008223657.163366-1-evgreen@chromium.org Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_driver.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 772493b1f665..190b9974526b 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -146,7 +146,7 @@ static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev) } mutex_lock(&data->irq_mutex); - bitmap_and(data->irq_status, data->irq_status, data->current_irq_mask, + bitmap_and(data->irq_status, data->irq_status, data->fn_irq_bits, data->irq_count); /* * At this point, irq_status has all bits that are set in the @@ -385,6 +385,8 @@ static int rmi_driver_set_irq_bits(struct rmi_device *rmi_dev, bitmap_copy(data->current_irq_mask, data->new_irq_mask, data->num_of_irq_regs); + bitmap_or(data->fn_irq_bits, data->fn_irq_bits, mask, data->irq_count); + error_unlock: mutex_unlock(&data->irq_mutex); return error; @@ -398,6 +400,8 @@ static int rmi_driver_clear_irq_bits(struct rmi_device *rmi_dev, struct device *dev = &rmi_dev->dev; mutex_lock(&data->irq_mutex); + bitmap_andnot(data->fn_irq_bits, + data->fn_irq_bits, mask, data->irq_count); bitmap_andnot(data->new_irq_mask, data->current_irq_mask, mask, data->irq_count); From 8d13c187c42e110625d60094668a8f778c092879 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Oct 2019 13:12:37 -0500 Subject: [PATCH 201/572] Revert "drm/radeon: Fix EEH during kexec" This reverts commit 6f7fe9a93e6c09bf988c5059403f5f88e17e21e6. This breaks some boards. Maybe just enable this on PPC for now? Bug: https://bugzilla.kernel.org/show_bug.cgi?id=205147 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_drv.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index d0bc91ed7c90..9e55076578c6 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -379,19 +379,11 @@ radeon_pci_remove(struct pci_dev *pdev) static void radeon_pci_shutdown(struct pci_dev *pdev) { - struct drm_device *ddev = pci_get_drvdata(pdev); - /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown */ if (radeon_device_is_virtual()) radeon_pci_remove(pdev); - - /* Some adapters need to be suspended before a - * shutdown occurs in order to prevent an error - * during kexec. - */ - radeon_suspend_kms(ddev, true, true, false); } static int radeon_pmops_suspend(struct device *dev) From 984d7a929ad68b7be9990fc9c5cfa5d5c9fc7942 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 10 Oct 2019 18:28:17 +0200 Subject: [PATCH 202/572] drm/amdgpu: Bail earlier when amdgpu.cik_/si_support is not set to 1 Bail from the pci_driver probe function instead of from the drm_driver load function. This avoid /dev/dri/card0 temporarily getting registered and then unregistered again, sending unwanted add / remove udev events to userspace. Specifically this avoids triggering the (userspace) bug fixed by this plymouth merge-request: https://gitlab.freedesktop.org/plymouth/plymouth/merge_requests/59 Note that despite that being a userspace bug, not sending unnecessary udev events is a good idea in general. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1490490 Reviewed-by: Daniel Vetter Signed-off-by: Hans de Goede Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 35 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 35 ------------------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6f8aaf655a9f..2a00a36106b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1048,6 +1048,41 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, return -ENODEV; } +#ifdef CONFIG_DRM_AMDGPU_SI + if (!amdgpu_si_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + dev_info(&pdev->dev, + "SI support provided by radeon.\n"); + dev_info(&pdev->dev, + "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + if (!amdgpu_cik_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + dev_info(&pdev->dev, + "CIK support provided by radeon.\n"); + dev_info(&pdev->dev, + "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif + /* Get rid of things like offb */ ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb"); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index f2c097983f48..d55f5baa83d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -144,41 +144,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) struct amdgpu_device *adev; int r, acpi_status; -#ifdef CONFIG_DRM_AMDGPU_SI - if (!amdgpu_si_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: - dev_info(dev->dev, - "SI support provided by radeon.\n"); - dev_info(dev->dev, - "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" - ); - return -ENODEV; - } - } -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - if (!amdgpu_cik_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: - dev_info(dev->dev, - "CIK support provided by radeon.\n"); - dev_info(dev->dev, - "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" - ); - return -ENODEV; - } - } -#endif - adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); if (adev == NULL) { return -ENOMEM; From d12c50857c6edc1d18aa7a60c5a4d6d943137bc0 Mon Sep 17 00:00:00 2001 From: Xiaojie Yuan Date: Thu, 10 Oct 2019 01:01:23 +0800 Subject: [PATCH 203/572] drm/amdgpu/sdma5: fix mask value of POLL_REGMEM packet for pipe sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sdma will hang once sequence number to be polled reaches 0x1000_0000 Reviewed-by: Christian König Signed-off-by: Xiaojie Yuan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index fa2f70ce2e2b..f6e81680dd7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1129,7 +1129,7 @@ static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); amdgpu_ring_write(ring, seq); /* reference */ - amdgpu_ring_write(ring, 0xfffffff); /* mask */ + amdgpu_ring_write(ring, 0xffffffff); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ } From f0308fb0708078d6c1d8a4d533941a7a191af634 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 Oct 2019 15:52:34 +0100 Subject: [PATCH 204/572] rxrpc: Fix possible NULL pointer access in ICMP handling If an ICMP packet comes in on the UDP socket backing an AF_RXRPC socket as the UDP socket is being shut down, rxrpc_error_report() may get called to deal with it after sk_user_data on the UDP socket has been cleared, leading to a NULL pointer access when this local endpoint record gets accessed. Fix this by just returning immediately if sk_user_data was NULL. The oops looks like the following: #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page ... RIP: 0010:rxrpc_error_report+0x1bd/0x6a9 ... Call Trace: ? sock_queue_err_skb+0xbd/0xde ? __udp4_lib_err+0x313/0x34d __udp4_lib_err+0x313/0x34d icmp_unreach+0x1ee/0x207 icmp_rcv+0x25b/0x28f ip_protocol_deliver_rcu+0x95/0x10e ip_local_deliver+0xe9/0x148 __netif_receive_skb_one_core+0x52/0x6e process_backlog+0xdc/0x177 net_rx_action+0xf9/0x270 __do_softirq+0x1b6/0x39a ? smpboot_register_percpu_thread+0xce/0xce run_ksoftirqd+0x1d/0x42 smpboot_thread_fn+0x19e/0x1b3 kthread+0xf1/0xf6 ? kthread_delayed_work_timer_fn+0x83/0x83 ret_from_fork+0x24/0x30 Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Reported-by: syzbot+611164843bd48cc2190c@syzkaller.appspotmail.com Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/peer_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index c97ebdc043e4..61451281d74a 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -151,6 +151,9 @@ void rxrpc_error_report(struct sock *sk) struct rxrpc_peer *peer; struct sk_buff *skb; + if (unlikely(!local)) + return; + _enter("%p{%d}", sk, local->debug_id); /* Clear the outstanding error value on the socket so that it doesn't From a91f757bda1a48317f692487addf832ebf8e93aa Mon Sep 17 00:00:00 2001 From: Simon Arlott Date: Thu, 22 Aug 2019 20:01:56 +0100 Subject: [PATCH 205/572] mailmap: Add Simon Arlott (replacement for expired email address) Add replacement email address for the one on my expired domain. Signed-off-by: Simon Arlott Signed-off-by: Stefan Wahren --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index edcac87e76c8..f652f3725772 100644 --- a/.mailmap +++ b/.mailmap @@ -229,6 +229,7 @@ Shuah Khan Shuah Khan Shuah Khan Shuah Khan +Simon Arlott Simon Kelley Stéphane Witzmann Stephen Hemminger From edc5774c097f6463e9fb2373832e7db726247809 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 13 Oct 2019 09:37:16 -0400 Subject: [PATCH 206/572] tools/virtio: xen stub Fixes test module build. Reported-by: Jan Kiszka Signed-off-by: Michael S. Tsirkin --- tools/virtio/xen/xen.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tools/virtio/xen/xen.h diff --git a/tools/virtio/xen/xen.h b/tools/virtio/xen/xen.h new file mode 100644 index 000000000000..f569387d1403 --- /dev/null +++ b/tools/virtio/xen/xen.h @@ -0,0 +1,6 @@ +#ifndef XEN_XEN_STUB_H +#define XEN_XEN_STUB_H + +#define xen_domain() 0 + +#endif From 245cdd9fbd396483d501db83047116e2530f245f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 7 Oct 2019 13:56:59 -0400 Subject: [PATCH 207/572] vhost/test: stop device before reset When device stop was moved out of reset, test device wasn't updated to stop before reset, this resulted in a use after free. Fix by invoking stop appropriately. Fixes: b211616d7125 ("vhost: move -net specific code out") Signed-off-by: Michael S. Tsirkin --- drivers/vhost/test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 7804869c6a31..056308008288 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -161,6 +161,7 @@ static int vhost_test_release(struct inode *inode, struct file *f) vhost_test_stop(n, &private); vhost_test_flush(n); + vhost_dev_stop(&n->dev); vhost_dev_cleanup(&n->dev); /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ @@ -237,6 +238,7 @@ static long vhost_test_reset_owner(struct vhost_test *n) } vhost_test_stop(n, &priv); vhost_test_flush(n); + vhost_dev_stop(&n->dev); vhost_dev_reset_owner(&n->dev, umem); done: mutex_unlock(&n->dev.mutex); From d983ea6f16b835dcde2ee9a58a1e764ce68bfccc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2019 20:17:38 -0700 Subject: [PATCH 208/572] tcp: add rcu protection around tp->fastopen_rsk Both tcp_v4_err() and tcp_v6_err() do the following operations while they do not own the socket lock : fastopen = tp->fastopen_rsk; snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; The problem is that without appropriate barrier, the compiler might reload tp->fastopen_rsk and trigger a NULL deref. request sockets are protected by RCU, we can simply add the missing annotations and barriers to solve the issue. Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 +++--- net/core/request_sock.c | 2 +- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/tcp.c | 11 ++++++++--- net/ipv4/tcp_fastopen.c | 2 +- net/ipv4/tcp_input.c | 13 +++++++++---- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_timer.c | 11 ++++++----- net/ipv6/tcp_ipv6.c | 2 +- 11 files changed, 35 insertions(+), 24 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 99617e528ea2..668e25a76d69 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -393,7 +393,7 @@ struct tcp_sock { /* fastopen_rsk points to request_sock that resulted in this big * socket. Used to retransmit SYNACKs etc. */ - struct request_sock *fastopen_rsk; + struct request_sock __rcu *fastopen_rsk; u32 *saved_syn; }; @@ -447,8 +447,8 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) static inline bool tcp_passive_fastopen(const struct sock *sk) { - return (sk->sk_state == TCP_SYN_RECV && - tcp_sk(sk)->fastopen_rsk != NULL); + return sk->sk_state == TCP_SYN_RECV && + rcu_access_pointer(tcp_sk(sk)->fastopen_rsk) != NULL; } static inline void fastopen_queue_tune(struct sock *sk, int backlog) diff --git a/net/core/request_sock.c b/net/core/request_sock.c index c9bb00008528..f35c2e998406 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -96,7 +96,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq; - tcp_sk(sk)->fastopen_rsk = NULL; + RCU_INIT_POINTER(tcp_sk(sk)->fastopen_rsk, NULL); spin_lock_bh(&fastopenq->lock); fastopenq->qlen--; tcp_rsk(req)->tfo_listener = false; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index dbcf34ec8dd2..eb30fc1770de 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -906,7 +906,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, percpu_counter_inc(sk->sk_prot->orphan_count); if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { - BUG_ON(tcp_sk(child)->fastopen_rsk != req); + BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); BUG_ON(sk != req->rsk_listener); /* Paranoid, to prevent race condition if @@ -915,7 +915,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, * Also to satisfy an assertion in * tcp_v4_destroy_sock(). */ - tcp_sk(child)->fastopen_rsk = NULL; + RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL); } inet_csk_destroy_sock(child); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8781a92ea4b6..c59d0bd29c5c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -543,7 +543,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Connected or passive Fast Open socket? */ if (state != TCP_SYN_SENT && - (state != TCP_SYN_RECV || tp->fastopen_rsk)) { + (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) { int target = sock_rcvlowat(sk, 0, INT_MAX); if (tp->urg_seq == tp->copied_seq && @@ -2487,7 +2487,10 @@ adjudge_to_death: } if (sk->sk_state == TCP_CLOSE) { - struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + struct request_sock *req; + + req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, + lockdep_sock_is_held(sk)); /* We could get here with a non-NULL req if the socket is * aborted (e.g., closed with unread data) before 3WHS * finishes. @@ -3831,8 +3834,10 @@ EXPORT_SYMBOL(tcp_md5_hash_key); void tcp_done(struct sock *sk) { - struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + struct request_sock *req; + req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, + lockdep_sock_is_held(sk)); if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 3fd451271a70..a915ade0c818 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -253,7 +253,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, */ tp = tcp_sk(child); - tp->fastopen_rsk = req; + rcu_assign_pointer(tp->fastopen_rsk, req); tcp_rsk(req)->tfo_listener = true; /* RFC1323: The window in SYN & SYN/ACK segments is never diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3578357abe30..5f9b102c3b55 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2666,7 +2666,7 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack, struct tcp_sock *tp = tcp_sk(sk); bool recovered = !before(tp->snd_una, tp->high_seq); - if ((flag & FLAG_SND_UNA_ADVANCED || tp->fastopen_rsk) && + if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) && tcp_try_undo_loss(sk, false)) return; @@ -2990,7 +2990,7 @@ void tcp_rearm_rto(struct sock *sk) /* If the retrans timer is currently being used by Fast Open * for SYN-ACK retrans purpose, stay put. */ - if (tp->fastopen_rsk) + if (rcu_access_pointer(tp->fastopen_rsk)) return; if (!tp->packets_out) { @@ -6087,6 +6087,8 @@ reset_and_undo: static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) { + struct request_sock *req; + tcp_try_undo_loss(sk, false); /* Reset rtx states to prevent spurious retransmits_timed_out() */ @@ -6096,7 +6098,9 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1, * we no longer need req so release it. */ - reqsk_fastopen_remove(sk, tcp_sk(sk)->fastopen_rsk, false); + req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, + lockdep_sock_is_held(sk)); + reqsk_fastopen_remove(sk, req, false); /* Re-arm the timer because data may have been sent out. * This is similar to the regular data transmission case @@ -6171,7 +6175,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_mstamp_refresh(tp); tp->rx_opt.saw_tstamp = 0; - req = tp->fastopen_rsk; + req = rcu_dereference_protected(tp->fastopen_rsk, + lockdep_sock_is_held(sk)); if (req) { bool req_stolen; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 492bf6a6b023..ffa366099eb2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -478,7 +478,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) icsk = inet_csk(sk); tp = tcp_sk(sk); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ - fastopen = tp->fastopen_rsk; + fastopen = rcu_dereference(tp->fastopen_rsk); snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && !between(seq, snd_una, tp->snd_nxt)) { @@ -2121,7 +2121,7 @@ void tcp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); - BUG_ON(tp->fastopen_rsk); + BUG_ON(rcu_access_pointer(tp->fastopen_rsk)); /* If socket is aborted during connect operation */ tcp_free_fastopen_req(tp); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index bb140a5db8c0..5401dbd39c8f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -541,7 +541,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rx_opt.mss_clamp = req->mss; tcp_ecn_openreq_child(newtp, req); newtp->fastopen_req = NULL; - newtp->fastopen_rsk = NULL; + RCU_INIT_POINTER(newtp->fastopen_rsk, NULL); __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fec6d67bfd14..84ae4d1449ea 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2482,7 +2482,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) /* Don't do any loss probe on a Fast Open connection before 3WHS * finishes. */ - if (tp->fastopen_rsk) + if (rcu_access_pointer(tp->fastopen_rsk)) return false; early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 05be564414e9..dd5a6317a801 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -386,15 +386,13 @@ abort: tcp_write_err(sk); * Timer for Fast Open socket to retransmit SYNACK. Note that the * sk here is the child socket, not the parent (listener) socket. */ -static void tcp_fastopen_synack_timer(struct sock *sk) +static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) { struct inet_connection_sock *icsk = inet_csk(sk); int max_retries = icsk->icsk_syn_retries ? : sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ struct tcp_sock *tp = tcp_sk(sk); - struct request_sock *req; - req = tcp_sk(sk)->fastopen_rsk; req->rsk_ops->syn_ack_timeout(req); if (req->num_timeout >= max_retries) { @@ -435,11 +433,14 @@ void tcp_retransmit_timer(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct request_sock *req; - if (tp->fastopen_rsk) { + req = rcu_dereference_protected(tp->fastopen_rsk, + lockdep_sock_is_held(sk)); + if (req) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); - tcp_fastopen_synack_timer(sk); + tcp_fastopen_synack_timer(sk, req); /* Before we receive ACK to our SYN-ACK don't retransmit * anything else (e.g., data or FIN segments). */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e3d9f4559c99..45a95e032bdf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -406,7 +406,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tp = tcp_sk(sk); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ - fastopen = tp->fastopen_rsk; + fastopen = rcu_dereference(tp->fastopen_rsk); snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && !between(seq, snd_una, tp->snd_nxt)) { From dba7d9b8c739df27ff3a234c81d6c6b23e3986fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2019 20:17:39 -0700 Subject: [PATCH 209/572] tcp: annotate tp->rcv_nxt lockless reads There are few places where we fetch tp->rcv_nxt while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write sides use corresponding WRITE_ONCE() to avoid store-tearing. Note that tcp_inq_hint() was already using READ_ONCE(tp->rcv_nxt) syzbot reported : BUG: KCSAN: data-race in tcp_poll / tcp_queue_rcv write to 0xffff888120425770 of 4 bytes by interrupt on cpu 0: tcp_rcv_nxt_update net/ipv4/tcp_input.c:3365 [inline] tcp_queue_rcv+0x180/0x380 net/ipv4/tcp_input.c:4638 tcp_rcv_established+0xbf1/0xf50 net/ipv4/tcp_input.c:5616 tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1542 tcp_v4_rcv+0x1a03/0x1bf0 net/ipv4/tcp_ipv4.c:1923 ip_protocol_deliver_rcu+0x51/0x470 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5004 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5118 netif_receive_skb_internal+0x59/0x190 net/core/dev.c:5208 napi_skb_finish net/core/dev.c:5671 [inline] napi_gro_receive+0x28f/0x330 net/core/dev.c:5704 receive_buf+0x284/0x30b0 drivers/net/virtio_net.c:1061 read to 0xffff888120425770 of 4 bytes by task 7254 on cpu 1: tcp_stream_is_readable net/ipv4/tcp.c:480 [inline] tcp_poll+0x204/0x6b0 net/ipv4/tcp.c:554 sock_poll+0xed/0x250 net/socket.c:1256 vfs_poll include/linux/poll.h:90 [inline] ep_item_poll.isra.0+0x90/0x190 fs/eventpoll.c:892 ep_send_events_proc+0x113/0x5c0 fs/eventpoll.c:1749 ep_scan_ready_list.constprop.0+0x189/0x500 fs/eventpoll.c:704 ep_send_events fs/eventpoll.c:1793 [inline] ep_poll+0xe3/0x900 fs/eventpoll.c:1930 do_epoll_wait+0x162/0x180 fs/eventpoll.c:2294 __do_sys_epoll_pwait fs/eventpoll.c:2325 [inline] __se_sys_epoll_pwait fs/eventpoll.c:2311 [inline] __x64_sys_epoll_pwait+0xcd/0x170 fs/eventpoll.c:2311 do_syscall_64+0xcf/0x2f0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 7254 Comm: syz-fuzzer Not tainted 5.3.0+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_diag.c | 2 +- net/ipv4/tcp_input.c | 6 +++--- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv4/tcp_minisocks.c | 7 +++++-- net/ipv6/tcp_ipv6.c | 3 ++- 6 files changed, 15 insertions(+), 10 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c59d0bd29c5c..883ee863db43 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -477,7 +477,7 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, int target, struct sock *sk) { - return (tp->rcv_nxt - tp->copied_seq >= target) || + return (READ_ONCE(tp->rcv_nxt) - tp->copied_seq >= target) || (sk->sk_prot->stream_memory_read ? sk->sk_prot->stream_memory_read(sk) : false); } @@ -2935,7 +2935,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else if (tp->repair_queue == TCP_SEND_QUEUE) tp->write_seq = val; else if (tp->repair_queue == TCP_RECV_QUEUE) - tp->rcv_nxt = val; + WRITE_ONCE(tp->rcv_nxt, val); else err = -EINVAL; break; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 81a8221d650a..cd219161f106 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -26,7 +26,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } else if (sk->sk_type == SOCK_STREAM) { const struct tcp_sock *tp = tcp_sk(sk); - r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); + r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - tp->copied_seq, 0); r->idiag_wqueue = tp->write_seq - tp->snd_una; } if (info) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5f9b102c3b55..5b7c8768ed5f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3362,7 +3362,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) sock_owned_by_me((struct sock *)tp); tp->bytes_received += delta; - tp->rcv_nxt = seq; + WRITE_ONCE(tp->rcv_nxt, seq); } /* Update our send window. @@ -5932,7 +5932,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, /* Ok.. it's good. Set up sequence numbers and * move to established. */ - tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1); tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; /* RFC1323: The window in SYN & SYN/ACK segments is @@ -6035,7 +6035,7 @@ discard: tp->tcp_header_len = sizeof(struct tcphdr); } - tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1); tp->copied_seq = tp->rcv_nxt; tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ffa366099eb2..5089dd6bee0f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2455,7 +2455,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) /* Because we don't lock the socket, * we might find a transient negative value. */ - rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); + rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - + tp->copied_seq, 0); seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 5401dbd39c8f..adc6ce486a38 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -462,6 +462,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk; struct tcp_sock *oldtp, *newtp; + u32 seq; if (!newsk) return NULL; @@ -475,8 +476,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, /* Now setup tcp_sock */ newtp->pred_flags = 0; - newtp->rcv_wup = newtp->copied_seq = - newtp->rcv_nxt = treq->rcv_isn + 1; + seq = treq->rcv_isn + 1; + newtp->rcv_wup = seq; + newtp->copied_seq = seq; + WRITE_ONCE(newtp->rcv_nxt, seq); newtp->segs_in = 1; newtp->snd_sml = newtp->snd_una = diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 45a95e032bdf..89ea0a7018b5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1895,7 +1895,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) /* Because we don't lock the socket, * we might find a transient negative value. */ - rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); + rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - + tp->copied_seq, 0); seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " From 7db48e983930285b765743ebd665aecf9850582b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2019 20:17:40 -0700 Subject: [PATCH 210/572] tcp: annotate tp->copied_seq lockless reads There are few places where we fetch tp->copied_seq while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write sides use corresponding WRITE_ONCE() to avoid store-tearing. Note that tcp_inq_hint() was already using READ_ONCE(tp->copied_seq) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 20 ++++++++++---------- net/ipv4/tcp_diag.c | 3 ++- net/ipv4/tcp_input.c | 6 +++--- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 7 files changed, 19 insertions(+), 18 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 883ee863db43..c322ad071e17 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -477,7 +477,7 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, int target, struct sock *sk) { - return (READ_ONCE(tp->rcv_nxt) - tp->copied_seq >= target) || + return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) || (sk->sk_prot->stream_memory_read ? sk->sk_prot->stream_memory_read(sk) : false); } @@ -546,7 +546,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) { int target = sock_rcvlowat(sk, 0, INT_MAX); - if (tp->urg_seq == tp->copied_seq && + if (tp->urg_seq == READ_ONCE(tp->copied_seq) && !sock_flag(sk, SOCK_URGINLINE) && tp->urg_data) target++; @@ -607,7 +607,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) unlock_sock_fast(sk, slow); break; case SIOCATMARK: - answ = tp->urg_data && tp->urg_seq == tp->copied_seq; + answ = tp->urg_data && tp->urg_seq == READ_ONCE(tp->copied_seq); break; case SIOCOUTQ: if (sk->sk_state == TCP_LISTEN) @@ -1668,9 +1668,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_eat_skb(sk, skb); if (!desc->count) break; - tp->copied_seq = seq; + WRITE_ONCE(tp->copied_seq, seq); } - tp->copied_seq = seq; + WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk); @@ -1819,7 +1819,7 @@ static int tcp_zerocopy_receive(struct sock *sk, out: up_read(¤t->mm->mmap_sem); if (length) { - tp->copied_seq = seq; + WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ @@ -2117,7 +2117,7 @@ found_ok_skb: if (urg_offset < used) { if (!urg_offset) { if (!sock_flag(sk, SOCK_URGINLINE)) { - ++*seq; + WRITE_ONCE(*seq, *seq + 1); urg_hole++; offset++; used--; @@ -2139,7 +2139,7 @@ found_ok_skb: } } - *seq += used; + WRITE_ONCE(*seq, *seq + used); copied += used; len -= used; @@ -2166,7 +2166,7 @@ skip_copy: found_fin_ok: /* Process the FIN. */ - ++*seq; + WRITE_ONCE(*seq, *seq + 1); if (!(flags & MSG_PEEK)) sk_eat_skb(sk, skb); break; @@ -2588,7 +2588,7 @@ int tcp_disconnect(struct sock *sk, int flags) __kfree_skb(sk->sk_rx_skb_cache); sk->sk_rx_skb_cache = NULL; } - tp->copied_seq = tp->rcv_nxt; + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); tp->urg_data = 0; tcp_write_queue_purge(sk); tcp_fastopen_active_disable_ofo_check(sk); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index cd219161f106..66273c8a55c2 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -26,7 +26,8 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } else if (sk->sk_type == SOCK_STREAM) { const struct tcp_sock *tp = tcp_sk(sk); - r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - tp->copied_seq, 0); + r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - + READ_ONCE(tp->copied_seq), 0); r->idiag_wqueue = tp->write_seq - tp->snd_una; } if (info) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5b7c8768ed5f..a30aae3a6a18 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5961,7 +5961,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, /* Remember, tcp_poll() does not lock socket! * Change state from SYN-SENT only after copied_seq * is initialized. */ - tp->copied_seq = tp->rcv_nxt; + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); smc_check_reset_syn(tp); @@ -6036,7 +6036,7 @@ discard: } WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1); - tp->copied_seq = tp->rcv_nxt; + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; /* RFC1323: The window in SYN & SYN/ACK segments is @@ -6216,7 +6216,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_try_undo_spurious_syn(sk); tp->retrans_stamp = 0; tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); - tp->copied_seq = tp->rcv_nxt; + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); } smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5089dd6bee0f..39560f482e0b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2456,7 +2456,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) * we might find a transient negative value. */ rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - - tp->copied_seq, 0); + READ_ONCE(tp->copied_seq), 0); seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index adc6ce486a38..c4731d26ab4a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -478,7 +478,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, seq = treq->rcv_isn + 1; newtp->rcv_wup = seq; - newtp->copied_seq = seq; + WRITE_ONCE(newtp->copied_seq, seq); WRITE_ONCE(newtp->rcv_nxt, seq); newtp->segs_in = 1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 84ae4d1449ea..7dda12720169 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3433,7 +3433,7 @@ static void tcp_connect_init(struct sock *sk) else tp->rcv_tstamp = tcp_jiffies32; tp->rcv_wup = tp->rcv_nxt; - tp->copied_seq = tp->rcv_nxt; + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); inet_csk(sk)->icsk_rto = tcp_timeout_init(sk); inet_csk(sk)->icsk_retransmits = 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 89ea0a7018b5..a62c7042fc4a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1896,7 +1896,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) * we might find a transient negative value. */ rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - - tp->copied_seq, 0); + READ_ONCE(tp->copied_seq), 0); seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " From 0f31746452e6793ad6271337438af8f4defb8940 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2019 20:17:41 -0700 Subject: [PATCH 211/572] tcp: annotate tp->write_seq lockless reads There are few places where we fetch tp->write_seq while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write sides use corresponding WRITE_ONCE() to avoid store-tearing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 20 ++++++++++++-------- net/ipv4/tcp_diag.c | 2 +- net/ipv4/tcp_ipv4.c | 21 ++++++++++++--------- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 4 ++-- net/ipv6/tcp_ipv6.c | 13 +++++++------ 7 files changed, 36 insertions(+), 28 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 35f6f7e0fdc2..8e7c3f6801a9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1917,7 +1917,7 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) { const struct tcp_sock *tp = tcp_sk(sk); - u32 notsent_bytes = tp->write_seq - tp->snd_nxt; + u32 notsent_bytes = READ_ONCE(tp->write_seq) - tp->snd_nxt; return (notsent_bytes << wake) < tcp_notsent_lowat(tp); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c322ad071e17..96dd65cbeb85 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -616,7 +616,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; else - answ = tp->write_seq - tp->snd_una; + answ = READ_ONCE(tp->write_seq) - tp->snd_una; break; case SIOCOUTQNSD: if (sk->sk_state == TCP_LISTEN) @@ -625,7 +625,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; else - answ = tp->write_seq - tp->snd_nxt; + answ = READ_ONCE(tp->write_seq) - tp->snd_nxt; break; default: return -ENOIOCTLCMD; @@ -1035,7 +1035,7 @@ new_segment: sk->sk_wmem_queued += copy; sk_mem_charge(sk, copy); skb->ip_summed = CHECKSUM_PARTIAL; - tp->write_seq += copy; + WRITE_ONCE(tp->write_seq, tp->write_seq + copy); TCP_SKB_CB(skb)->end_seq += copy; tcp_skb_pcount_set(skb, 0); @@ -1362,7 +1362,7 @@ new_segment: if (!copied) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; - tp->write_seq += copy; + WRITE_ONCE(tp->write_seq, tp->write_seq + copy); TCP_SKB_CB(skb)->end_seq += copy; tcp_skb_pcount_set(skb, 0); @@ -2562,6 +2562,7 @@ int tcp_disconnect(struct sock *sk, int flags) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int old_state = sk->sk_state; + u32 seq; if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); @@ -2604,9 +2605,12 @@ int tcp_disconnect(struct sock *sk, int flags) tp->srtt_us = 0; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); tp->rcv_rtt_last_tsecr = 0; - tp->write_seq += tp->max_window + 2; - if (tp->write_seq == 0) - tp->write_seq = 1; + + seq = tp->write_seq + tp->max_window + 2; + if (!seq) + seq = 1; + WRITE_ONCE(tp->write_seq, seq); + icsk->icsk_backoff = 0; tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; @@ -2933,7 +2937,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (sk->sk_state != TCP_CLOSE) err = -EPERM; else if (tp->repair_queue == TCP_SEND_QUEUE) - tp->write_seq = val; + WRITE_ONCE(tp->write_seq, val); else if (tp->repair_queue == TCP_RECV_QUEUE) WRITE_ONCE(tp->rcv_nxt, val); else diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 66273c8a55c2..549506162dde 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -28,7 +28,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq), 0); - r->idiag_wqueue = tp->write_seq - tp->snd_una; + r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una; } if (info) tcp_get_info(sk, info); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 39560f482e0b..6be568334848 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -164,9 +164,11 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) * without appearing to create any others. */ if (likely(!tp->repair)) { - tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; - if (tp->write_seq == 0) - tp->write_seq = 1; + u32 seq = tcptw->tw_snd_nxt + 65535 + 2; + + if (!seq) + seq = 1; + WRITE_ONCE(tp->write_seq, seq); tp->rx_opt.ts_recent = tcptw->tw_ts_recent; tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; } @@ -253,7 +255,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; if (likely(!tp->repair)) - tp->write_seq = 0; + WRITE_ONCE(tp->write_seq, 0); } inet->inet_dport = usin->sin_port; @@ -291,10 +293,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (likely(!tp->repair)) { if (!tp->write_seq) - tp->write_seq = secure_tcp_seq(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port); + WRITE_ONCE(tp->write_seq, + secure_tcp_seq(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port)); tp->tsoffset = secure_tcp_ts_off(sock_net(sk), inet->inet_saddr, inet->inet_daddr); @@ -2461,7 +2464,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", i, src, srcp, dest, destp, state, - tp->write_seq - tp->snd_una, + READ_ONCE(tp->write_seq) - tp->snd_una, rx_queue, timer_active, jiffies_delta_to_clock_t(timer_expires - jiffies), diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index c4731d26ab4a..339944690329 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -498,7 +498,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->total_retrans = req->num_retrans; tcp_init_xmit_timers(newsk); - newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; + WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1); if (sock_flag(newsk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(newsk, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7dda12720169..c17c2a78809d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1196,7 +1196,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); /* Advance write_seq and place onto the write_queue. */ - tp->write_seq = TCP_SKB_CB(skb)->end_seq; + WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq); __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); sk->sk_wmem_queued += skb->truesize; @@ -3449,7 +3449,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) __skb_header_release(skb); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); - tp->write_seq = tcb->end_seq; + WRITE_ONCE(tp->write_seq, tcb->end_seq); tp->packets_out += tcp_skb_pcount(skb); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a62c7042fc4a..4804b6dc5e65 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -215,7 +215,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; - tp->write_seq = 0; + WRITE_ONCE(tp->write_seq, 0); } sk->sk_v6_daddr = usin->sin6_addr; @@ -311,10 +311,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (likely(!tp->repair)) { if (!tp->write_seq) - tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport); + WRITE_ONCE(tp->write_seq, + secure_tcpv6_seq(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport)); tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32); @@ -1907,7 +1908,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, state, - tp->write_seq - tp->snd_una, + READ_ONCE(tp->write_seq) - tp->snd_una, rx_queue, timer_active, jiffies_delta_to_clock_t(timer_expires - jiffies), From e0d694d638dba768b47be31c22e1a9b4f862f561 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2019 20:17:42 -0700 Subject: [PATCH 212/572] tcp: annotate tp->snd_nxt lockless reads There are few places where we fetch tp->snd_nxt while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write sides use corresponding WRITE_ONCE() to avoid store-tearing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 ++- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_minisocks.c | 6 ++++-- net/ipv4/tcp_output.c | 10 +++++----- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 8e7c3f6801a9..e1d08f69fd39 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1917,7 +1917,8 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) { const struct tcp_sock *tp = tcp_sk(sk); - u32 notsent_bytes = READ_ONCE(tp->write_seq) - tp->snd_nxt; + u32 notsent_bytes = READ_ONCE(tp->write_seq) - + READ_ONCE(tp->snd_nxt); return (notsent_bytes << wake) < tcp_notsent_lowat(tp); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 96dd65cbeb85..652568750cb1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -625,7 +625,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; else - answ = READ_ONCE(tp->write_seq) - tp->snd_nxt; + answ = READ_ONCE(tp->write_seq) - + READ_ONCE(tp->snd_nxt); break; default: return -ENOIOCTLCMD; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 339944690329..c802bc80c400 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -482,8 +482,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, WRITE_ONCE(newtp->rcv_nxt, seq); newtp->segs_in = 1; - newtp->snd_sml = newtp->snd_una = - newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; + seq = treq->snt_isn + 1; + newtp->snd_sml = newtp->snd_una = seq; + WRITE_ONCE(newtp->snd_nxt, seq); + newtp->snd_up = seq; INIT_LIST_HEAD(&newtp->tsq_node); INIT_LIST_HEAD(&newtp->tsorted_sent_queue); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c17c2a78809d..a115a991dfb5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -67,7 +67,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; - tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; + WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(skb)->end_seq); __skb_unlink(skb, &sk->sk_write_queue); tcp_rbtree_insert(&sk->tcp_rtx_queue, skb); @@ -3142,7 +3142,7 @@ void tcp_send_fin(struct sock *sk) * if FIN had been sent. This is because retransmit path * does not change tp->snd_nxt. */ - tp->snd_nxt++; + WRITE_ONCE(tp->snd_nxt, tp->snd_nxt + 1); return; } } else { @@ -3426,7 +3426,7 @@ static void tcp_connect_init(struct sock *sk) tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; tp->snd_up = tp->write_seq; - tp->snd_nxt = tp->write_seq; + WRITE_ONCE(tp->snd_nxt, tp->write_seq); if (likely(!tp->repair)) tp->rcv_nxt = 0; @@ -3586,11 +3586,11 @@ int tcp_connect(struct sock *sk) /* We change tp->snd_nxt after the tcp_transmit_skb() call * in order to make this packet get counted in tcpOutSegs. */ - tp->snd_nxt = tp->write_seq; + WRITE_ONCE(tp->snd_nxt, tp->write_seq); tp->pushed_seq = tp->write_seq; buff = tcp_send_head(sk); if (unlikely(buff)) { - tp->snd_nxt = TCP_SKB_CB(buff)->seq; + WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(buff)->seq); tp->pushed_seq = TCP_SKB_CB(buff)->seq; } TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); From d9b55bf7b6788ec0bd1db1acefbc4feb1399144a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2019 20:17:43 -0700 Subject: [PATCH 213/572] tcp: annotate tp->urg_seq lockless reads There two places where we fetch tp->urg_seq while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write side use corresponding WRITE_ONCE() to avoid store-tearing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++-- net/ipv4/tcp_input.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 652568750cb1..577a8c6eef9f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -546,7 +546,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) { int target = sock_rcvlowat(sk, 0, INT_MAX); - if (tp->urg_seq == READ_ONCE(tp->copied_seq) && + if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) && !sock_flag(sk, SOCK_URGINLINE) && tp->urg_data) target++; @@ -607,7 +607,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) unlock_sock_fast(sk, slow); break; case SIOCATMARK: - answ = tp->urg_data && tp->urg_seq == READ_ONCE(tp->copied_seq); + answ = tp->urg_data && + READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq); break; case SIOCOUTQ: if (sk->sk_state == TCP_LISTEN) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a30aae3a6a18..16342e043ab3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5356,7 +5356,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) } tp->urg_data = TCP_URG_NOTYET; - tp->urg_seq = ptr; + WRITE_ONCE(tp->urg_seq, ptr); /* Disable header prediction. */ tp->pred_flags = 0; From ebb3b78db7bf842270a46fd4fe7cc45c78fa5ed6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2019 20:17:44 -0700 Subject: [PATCH 214/572] tcp: annotate sk->sk_rcvbuf lockless reads For the sake of tcp_poll(), there are few places where we fetch sk->sk_rcvbuf while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write sides use corresponding WRITE_ONCE() to avoid store-tearing. Note that other transports probably need similar fixes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++-- include/trace/events/sock.h | 2 +- net/core/filter.c | 3 ++- net/core/skbuff.c | 2 +- net/core/sock.c | 5 +++-- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_input.c | 7 ++++--- 7 files changed, 15 insertions(+), 12 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index e1d08f69fd39..ab4eb5eb5d07 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1380,14 +1380,14 @@ static inline int tcp_win_from_space(const struct sock *sk, int space) /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { - return tcp_win_from_space(sk, sk->sk_rcvbuf - + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - READ_ONCE(sk->sk_backlog.len) - atomic_read(&sk->sk_rmem_alloc)); } static inline int tcp_full_space(const struct sock *sk) { - return tcp_win_from_space(sk, sk->sk_rcvbuf); + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } extern void tcp_openreq_init_rwin(struct request_sock *req, diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index a0c4b8a30966..f720c32e7dfd 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -82,7 +82,7 @@ TRACE_EVENT(sock_rcvqueue_full, TP_fast_assign( __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); __entry->truesize = skb->truesize; - __entry->sk_rcvbuf = sk->sk_rcvbuf; + __entry->sk_rcvbuf = READ_ONCE(sk->sk_rcvbuf); ), TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d", diff --git a/net/core/filter.c b/net/core/filter.c index a50c0b6846f2..7deceaeeed7b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4252,7 +4252,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, case SO_RCVBUF: val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); + WRITE_ONCE(sk->sk_rcvbuf, + max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_SNDBUF: val = min_t(u32, val, sysctl_wmem_max); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 529133611ea2..8c178703467b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4415,7 +4415,7 @@ static void skb_set_err_queue(struct sk_buff *skb) int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned int)sk->sk_rcvbuf) + (unsigned int)READ_ONCE(sk->sk_rcvbuf)) return -ENOMEM; skb_orphan(skb); diff --git a/net/core/sock.c b/net/core/sock.c index 2a053999df11..8c8f61e70141 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -831,7 +831,8 @@ set_rcvbuf: * returning the value we actually used in getsockopt * is the most desirable behavior. */ - sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); + WRITE_ONCE(sk->sk_rcvbuf, + max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_RCVBUFFORCE: @@ -3204,7 +3205,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); - mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; + mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 577a8c6eef9f..bc0481aa6633 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -451,7 +451,7 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; - sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; + WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); sk_sockets_allocated_inc(sk); sk->sk_route_forced_caps = NETIF_F_GSO; @@ -1711,7 +1711,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) val <<= 1; if (val > sk->sk_rcvbuf) { - sk->sk_rcvbuf = val; + WRITE_ONCE(sk->sk_rcvbuf, val); tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val); } return 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 16342e043ab3..6995df20710a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -483,8 +483,9 @@ static void tcp_clamp_window(struct sock *sk) !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { - sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), - net->ipv4.sysctl_tcp_rmem[2]); + WRITE_ONCE(sk->sk_rcvbuf, + min(atomic_read(&sk->sk_rmem_alloc), + net->ipv4.sysctl_tcp_rmem[2])); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); @@ -648,7 +649,7 @@ void tcp_rcv_space_adjust(struct sock *sk) rcvbuf = min_t(u64, rcvwin * rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); if (rcvbuf > sk->sk_rcvbuf) { - sk->sk_rcvbuf = rcvbuf; + WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); /* Make the window clamp follow along. */ tp->window_clamp = tcp_win_from_space(sk, rcvbuf); From e292f05e0df73f9fcc93329663936e1ded97a988 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2019 20:17:45 -0700 Subject: [PATCH 215/572] tcp: annotate sk->sk_sndbuf lockless reads For the sake of tcp_poll(), there are few places where we fetch sk->sk_sndbuf while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write sides use corresponding WRITE_ONCE() to avoid store-tearing. Note that other transports probably need similar fixes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 18 +++++++++++------- net/core/filter.c | 3 ++- net/core/sock.c | 15 +++++++++------ net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 3 ++- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 79f54e1f8827..3d1e7502333e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -883,7 +883,7 @@ static inline int sk_stream_min_wspace(const struct sock *sk) static inline int sk_stream_wspace(const struct sock *sk) { - return sk->sk_sndbuf - sk->sk_wmem_queued; + return READ_ONCE(sk->sk_sndbuf) - sk->sk_wmem_queued; } void sk_stream_write_space(struct sock *sk); @@ -1207,7 +1207,7 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { - if (sk->sk_wmem_queued >= sk->sk_sndbuf) + if (sk->sk_wmem_queued >= READ_ONCE(sk->sk_sndbuf)) return false; return sk->sk_prot->stream_memory_free ? @@ -2220,10 +2220,14 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band) static inline void sk_stream_moderate_sndbuf(struct sock *sk) { - if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) { - sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); - sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF); - } + u32 val; + + if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) + return; + + val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); + + WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF)); } struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, @@ -2251,7 +2255,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); */ static inline bool sock_writeable(const struct sock *sk) { - return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); + return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1); } static inline gfp_t gfp_any(void) diff --git a/net/core/filter.c b/net/core/filter.c index 7deceaeeed7b..3fed5755494b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4258,7 +4258,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, case SO_SNDBUF: val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); + WRITE_ONCE(sk->sk_sndbuf, + max_t(int, val * 2, SOCK_MIN_SNDBUF)); break; case SO_MAX_PACING_RATE: /* 32bit version */ if (val != ~0U) diff --git a/net/core/sock.c b/net/core/sock.c index 8c8f61e70141..cd075bc86407 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -785,7 +785,8 @@ set_sndbuf: */ val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); + WRITE_ONCE(sk->sk_sndbuf, + max_t(int, val * 2, SOCK_MIN_SNDBUF)); /* Wake up sending tasks if we upped the value. */ sk->sk_write_space(sk); break; @@ -2089,8 +2090,10 @@ EXPORT_SYMBOL(sock_i_ino); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority) { - if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { + if (force || + refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) { struct sk_buff *skb = alloc_skb(size, priority); + if (skb) { skb_set_owner_w(skb, sk); return skb; @@ -2191,7 +2194,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) + if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) break; if (sk->sk_shutdown & SEND_SHUTDOWN) break; @@ -2226,7 +2229,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, if (sk->sk_shutdown & SEND_SHUTDOWN) goto failure; - if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf) + if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf)) break; sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -2807,7 +2810,7 @@ static void sock_def_write_space(struct sock *sk) /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ - if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | @@ -3207,7 +3210,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); - mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; + mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bc0481aa6633..111853262972 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -450,7 +450,7 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; + WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); sk_sockets_allocated_inc(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6995df20710a..a2e52ad7cdab 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -359,7 +359,8 @@ static void tcp_sndbuf_expand(struct sock *sk) sndmem *= nr_segs * per_mss; if (sk->sk_sndbuf < sndmem) - sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]); + WRITE_ONCE(sk->sk_sndbuf, + min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2])); } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) From ab4e846a82d0ae00176de19f2db3c5c64f8eb5f2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2019 20:17:46 -0700 Subject: [PATCH 216/572] tcp: annotate sk->sk_wmem_queued lockless reads For the sake of tcp_poll(), there are few places where we fetch sk->sk_wmem_queued while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write sides use corresponding WRITE_ONCE() to avoid store-tearing. sk_wmem_queued_add() helper is added so that we can in the future convert to ADD_ONCE() or equivalent if/when available. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 15 ++++++++++----- include/trace/events/sock.h | 2 +- net/core/datagram.c | 2 +- net/core/sock.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_output.c | 14 +++++++------- net/sched/em_meta.c | 2 +- 8 files changed, 24 insertions(+), 19 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 3d1e7502333e..f69b58bff7e5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -878,12 +878,17 @@ static inline bool sk_acceptq_is_full(const struct sock *sk) */ static inline int sk_stream_min_wspace(const struct sock *sk) { - return sk->sk_wmem_queued >> 1; + return READ_ONCE(sk->sk_wmem_queued) >> 1; } static inline int sk_stream_wspace(const struct sock *sk) { - return READ_ONCE(sk->sk_sndbuf) - sk->sk_wmem_queued; + return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued); +} + +static inline void sk_wmem_queued_add(struct sock *sk, int val) +{ + WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val); } void sk_stream_write_space(struct sock *sk); @@ -1207,7 +1212,7 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { - if (sk->sk_wmem_queued >= READ_ONCE(sk->sk_sndbuf)) + if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) return false; return sk->sk_prot->stream_memory_free ? @@ -1467,7 +1472,7 @@ DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { sock_set_flag(sk, SOCK_QUEUE_SHRUNK); - sk->sk_wmem_queued -= skb->truesize; + sk_wmem_queued_add(sk, -skb->truesize); sk_mem_uncharge(sk, skb->truesize); if (static_branch_unlikely(&tcp_tx_skb_cache_key) && !sk->sk_tx_skb_cache && !skb_cloned(skb)) { @@ -2014,7 +2019,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro skb->len += copy; skb->data_len += copy; skb->truesize += copy; - sk->sk_wmem_queued += copy; + sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); return 0; } diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index f720c32e7dfd..51fe9f6719eb 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -115,7 +115,7 @@ TRACE_EVENT(sock_exceed_buf_limit, __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); __entry->sysctl_wmem = sk_get_wmem0(sk, prot); __entry->wmem_alloc = refcount_read(&sk->sk_wmem_alloc); - __entry->wmem_queued = sk->sk_wmem_queued; + __entry->wmem_queued = READ_ONCE(sk->sk_wmem_queued); __entry->kind = kind; ), diff --git a/net/core/datagram.c b/net/core/datagram.c index 4cc8dc5db2b7..c210fc116103 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -640,7 +640,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, skb->len += copied; skb->truesize += truesize; if (sk && sk->sk_type == SOCK_STREAM) { - sk->sk_wmem_queued += truesize; + sk_wmem_queued_add(sk, truesize); sk_mem_charge(sk, truesize); } else { refcount_add(truesize, &skb->sk->sk_wmem_alloc); diff --git a/net/core/sock.c b/net/core/sock.c index cd075bc86407..a515392ba84b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3212,7 +3212,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; - mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; + mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index bbb005eb5218..7dc79b973e6e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -193,7 +193,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), - .idiag_wmem = sk->sk_wmem_queued, + .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), .idiag_fmem = sk->sk_forward_alloc, .idiag_tmem = sk_wmem_alloc_get(sk), }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 111853262972..b2ac4f074e2d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -659,7 +659,7 @@ static void skb_entail(struct sock *sk, struct sk_buff *skb) tcb->sacked = 0; __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); - sk->sk_wmem_queued += skb->truesize; + sk_wmem_queued_add(sk, skb->truesize); sk_mem_charge(sk, skb->truesize); if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; @@ -1034,7 +1034,7 @@ new_segment: skb->len += copy; skb->data_len += copy; skb->truesize += copy; - sk->sk_wmem_queued += copy; + sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); skb->ip_summed = CHECKSUM_PARTIAL; WRITE_ONCE(tp->write_seq, tp->write_seq + copy); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a115a991dfb5..0488607c5cd3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1199,7 +1199,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq); __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); - sk->sk_wmem_queued += skb->truesize; + sk_wmem_queued_add(sk, skb->truesize); sk_mem_charge(sk, skb->truesize); } @@ -1333,7 +1333,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, return -ENOMEM; /* We'll just try again later. */ skb_copy_decrypted(buff, skb); - sk->sk_wmem_queued += buff->truesize; + sk_wmem_queued_add(sk, buff->truesize); sk_mem_charge(sk, buff->truesize); nlen = skb->len - len - nsize; buff->truesize += nlen; @@ -1443,7 +1443,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) if (delta_truesize) { skb->truesize -= delta_truesize; - sk->sk_wmem_queued -= delta_truesize; + sk_wmem_queued_add(sk, -delta_truesize); sk_mem_uncharge(sk, delta_truesize); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); } @@ -1888,7 +1888,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, return -ENOMEM; skb_copy_decrypted(buff, skb); - sk->sk_wmem_queued += buff->truesize; + sk_wmem_queued_add(sk, buff->truesize); sk_mem_charge(sk, buff->truesize); buff->truesize += nlen; skb->truesize -= nlen; @@ -2152,7 +2152,7 @@ static int tcp_mtu_probe(struct sock *sk) nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); if (!nskb) return -1; - sk->sk_wmem_queued += nskb->truesize; + sk_wmem_queued_add(sk, nskb->truesize); sk_mem_charge(sk, nskb->truesize); skb = tcp_send_head(sk); @@ -3222,7 +3222,7 @@ int tcp_send_synack(struct sock *sk) tcp_rtx_queue_unlink_and_free(skb, sk); __skb_header_release(nskb); tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb); - sk->sk_wmem_queued += nskb->truesize; + sk_wmem_queued_add(sk, nskb->truesize); sk_mem_charge(sk, nskb->truesize); skb = nskb; } @@ -3447,7 +3447,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) tcb->end_seq += skb->len; __skb_header_release(skb); - sk->sk_wmem_queued += skb->truesize; + sk_wmem_queued_add(sk, skb->truesize); sk_mem_charge(sk, skb->truesize); WRITE_ONCE(tp->write_seq, tcb->end_seq); tp->packets_out += tcp_skb_pcount(skb); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 4c9122fc35c9..3177dcb17316 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -446,7 +446,7 @@ META_COLLECTOR(int_sk_wmem_queued) *err = -1; return; } - dst->value = sk->sk_wmem_queued; + dst->value = READ_ONCE(sk->sk_wmem_queued); } META_COLLECTOR(int_sk_fwd_alloc) From c23936fad79ea2bea749db19b8921bb4dc524905 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 10 Oct 2019 22:46:06 +0200 Subject: [PATCH 217/572] net: lpc_eth: avoid resetting twice __lpc_eth_shutdown is called after __lpc_eth_reset but it is already calling __lpc_eth_reset. Avoid resetting the IP twice. Signed-off-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/nxp/lpc_eth.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 141571e2ec11..544012a67221 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1356,9 +1356,6 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) if (!is_valid_ether_addr(ndev->dev_addr)) eth_hw_addr_random(ndev); - /* Reset the ethernet controller */ - __lpc_eth_reset(pldat); - /* then shut everything down to save power */ __lpc_eth_shutdown(pldat); From 11d49ce9f7946dfed4dcf5dbde865c78058b50ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Fri, 11 Oct 2019 07:52:54 +0200 Subject: [PATCH 218/572] net/ibmvnic: Fix EOI when running in XIVE mode. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pSeries machines on POWER9 processors can run with the XICS (legacy) interrupt mode or with the XIVE exploitation interrupt mode. These interrupt contollers have different interfaces for interrupt management : XICS uses hcalls and XIVE loads and stores on a page. H_EOI being a XICS interface the enable_scrq_irq() routine can fail when the machine runs in XIVE mode. Fix that by calling the EOI handler of the interrupt chip. Fixes: f23e0643cd0b ("ibmvnic: Clear pending interrupt after device reset") Signed-off-by: Cédric Le Goater Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2b073a3c0b84..f59d9a8e35e2 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2878,12 +2878,10 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter, if (test_bit(0, &adapter->resetting) && adapter->reset_reason == VNIC_RESET_MOBILITY) { - u64 val = (0xff000000) | scrq->hw_irq; + struct irq_desc *desc = irq_to_desc(scrq->irq); + struct irq_chip *chip = irq_desc_get_chip(desc); - rc = plpar_hcall_norets(H_EOI, val); - if (rc) - dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", - val, rc); + chip->irq_eoi(&desc->irq_data); } rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, From 33902b4a4227877896dd9368ac10f4ca0d100de5 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 11 Oct 2019 17:46:53 +0800 Subject: [PATCH 219/572] netdevsim: Fix error handling in nsim_fib_init and nsim_fib_exit In nsim_fib_init(), if register_fib_notifier failed, nsim_fib_net_ops should be unregistered before return. In nsim_fib_exit(), unregister_fib_notifier should be called before nsim_fib_net_ops be unregistered, otherwise may cause use-after-free: BUG: KASAN: use-after-free in nsim_fib_event_nb+0x342/0x570 [netdevsim] Read of size 8 at addr ffff8881daaf4388 by task kworker/0:3/3499 CPU: 0 PID: 3499 Comm: kworker/0:3 Not tainted 5.3.0-rc7+ #30 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Workqueue: ipv6_addrconf addrconf_dad_work [ipv6] Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xa9/0x10e lib/dump_stack.c:113 print_address_description+0x65/0x380 mm/kasan/report.c:351 __kasan_report+0x149/0x18d mm/kasan/report.c:482 kasan_report+0xe/0x20 mm/kasan/common.c:618 nsim_fib_event_nb+0x342/0x570 [netdevsim] notifier_call_chain+0x52/0xf0 kernel/notifier.c:95 __atomic_notifier_call_chain+0x78/0x140 kernel/notifier.c:185 call_fib_notifiers+0x30/0x60 net/core/fib_notifier.c:30 call_fib6_entry_notifiers+0xc1/0x100 [ipv6] fib6_add+0x92e/0x1b10 [ipv6] __ip6_ins_rt+0x40/0x60 [ipv6] ip6_ins_rt+0x84/0xb0 [ipv6] __ipv6_ifa_notify+0x4b6/0x550 [ipv6] ipv6_ifa_notify+0xa5/0x180 [ipv6] addrconf_dad_completed+0xca/0x640 [ipv6] addrconf_dad_work+0x296/0x960 [ipv6] process_one_work+0x5c0/0xc00 kernel/workqueue.c:2269 worker_thread+0x5c/0x670 kernel/workqueue.c:2415 kthread+0x1d7/0x200 kernel/kthread.c:255 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Allocated by task 3388: save_stack+0x19/0x80 mm/kasan/common.c:69 set_track mm/kasan/common.c:77 [inline] __kasan_kmalloc.constprop.3+0xa0/0xd0 mm/kasan/common.c:493 kmalloc include/linux/slab.h:557 [inline] kzalloc include/linux/slab.h:748 [inline] ops_init+0xa9/0x220 net/core/net_namespace.c:127 __register_pernet_operations net/core/net_namespace.c:1135 [inline] register_pernet_operations+0x1d4/0x420 net/core/net_namespace.c:1212 register_pernet_subsys+0x24/0x40 net/core/net_namespace.c:1253 nsim_fib_init+0x12/0x70 [netdevsim] veth_get_link_ksettings+0x2b/0x50 [veth] do_one_initcall+0xd4/0x454 init/main.c:939 do_init_module+0xe0/0x330 kernel/module.c:3490 load_module+0x3c2f/0x4620 kernel/module.c:3841 __do_sys_finit_module+0x163/0x190 kernel/module.c:3931 do_syscall_64+0x72/0x2e0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 3534: save_stack+0x19/0x80 mm/kasan/common.c:69 set_track mm/kasan/common.c:77 [inline] __kasan_slab_free+0x130/0x180 mm/kasan/common.c:455 slab_free_hook mm/slub.c:1423 [inline] slab_free_freelist_hook mm/slub.c:1474 [inline] slab_free mm/slub.c:3016 [inline] kfree+0xe9/0x2d0 mm/slub.c:3957 ops_free net/core/net_namespace.c:151 [inline] ops_free_list.part.7+0x156/0x220 net/core/net_namespace.c:184 ops_free_list net/core/net_namespace.c:182 [inline] __unregister_pernet_operations net/core/net_namespace.c:1165 [inline] unregister_pernet_operations+0x221/0x2a0 net/core/net_namespace.c:1224 unregister_pernet_subsys+0x1d/0x30 net/core/net_namespace.c:1271 nsim_fib_exit+0x11/0x20 [netdevsim] nsim_module_exit+0x16/0x21 [netdevsim] __do_sys_delete_module kernel/module.c:1015 [inline] __se_sys_delete_module kernel/module.c:958 [inline] __x64_sys_delete_module+0x244/0x330 kernel/module.c:958 do_syscall_64+0x72/0x2e0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reported-by: Hulk Robot Fixes: 59c84b9fcf42 ("netdevsim: Restore per-network namespace accounting for fib entries") Signed-off-by: YueHaibing Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/netdevsim/fib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index f61d094746c0..1a251f76d09b 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -241,8 +241,8 @@ static struct pernet_operations nsim_fib_net_ops = { void nsim_fib_exit(void) { - unregister_pernet_subsys(&nsim_fib_net_ops); unregister_fib_notifier(&nsim_fib_nb); + unregister_pernet_subsys(&nsim_fib_net_ops); } int nsim_fib_init(void) @@ -258,6 +258,7 @@ int nsim_fib_init(void) err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent); if (err < 0) { pr_err("Failed to register fib notifier\n"); + unregister_pernet_subsys(&nsim_fib_net_ops); goto err_out; } From 07159f67c77134dfdfdbdf3d8f657f5890de5b7f Mon Sep 17 00:00:00 2001 From: Ran Wang Date: Tue, 17 Sep 2019 15:33:56 +0800 Subject: [PATCH 220/572] arm64: dts: lx2160a: Correct CPU core idle state name lx2160a support PW15 but not PW20, correct name to avoid confusing. Signed-off-by: Ran Wang Fixes: 00c5ce8ac023 ("arm64: dts: lx2160a: add cpu idle support") Acked-by: Li Yang Signed-off-by: Shawn Guo --- .../arm64/boot/dts/freescale/fsl-lx2160a.dtsi | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index 408e0ecdce6a..b032f3890c8c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -33,7 +33,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster0_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@1 { @@ -49,7 +49,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster0_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@100 { @@ -65,7 +65,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster1_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@101 { @@ -81,7 +81,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster1_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@200 { @@ -97,7 +97,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster2_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@201 { @@ -113,7 +113,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster2_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@300 { @@ -129,7 +129,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster3_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@301 { @@ -145,7 +145,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster3_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@400 { @@ -161,7 +161,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster4_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@401 { @@ -177,7 +177,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster4_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@500 { @@ -193,7 +193,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster5_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@501 { @@ -209,7 +209,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster5_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@600 { @@ -225,7 +225,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster6_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@601 { @@ -241,7 +241,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster6_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@700 { @@ -257,7 +257,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster7_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@701 { @@ -273,7 +273,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster7_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cluster0_l2: l2-cache0 { @@ -340,9 +340,9 @@ cache-level = <2>; }; - cpu_pw20: cpu-pw20 { + cpu_pw15: cpu-pw15 { compatible = "arm,idle-state"; - idle-state-name = "PW20"; + idle-state-name = "PW15"; arm,psci-suspend-param = <0x0>; entry-latency-us = <2000>; exit-latency-us = <2000>; From d79749f7716d9dc32fa2d5075f6ec29aac63c76d Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Thu, 29 Aug 2019 10:45:12 +0800 Subject: [PATCH 221/572] ath10k: fix latency issue for QCA988x (kvalo: cherry picked from commit 1340cc631bd00431e2f174525c971f119df9efa1 in wireless-drivers-next to wireless-drivers as this a frequently reported regression) Bad latency is found on QCA988x, the issue was introduced by commit 4504f0e5b571 ("ath10k: sdio: workaround firmware UART pin configuration bug"). If uart_pin_workaround is false, this change will set uart pin even if uart_print is false. Tested HW: QCA9880 Tested FW: 10.2.4-1.0-00037 Fixes: 4504f0e5b571 ("ath10k: sdio: workaround firmware UART pin configuration bug") Signed-off-by: Miaoqing Pan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index dc45d16e8d21..383d4fa555a8 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -2118,12 +2118,15 @@ static int ath10k_init_uart(struct ath10k *ar) return ret; } - if (!uart_print && ar->hw_params.uart_pin_workaround) { - ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin, - ar->hw_params.uart_pin); - if (ret) { - ath10k_warn(ar, "failed to set UART TX pin: %d", ret); - return ret; + if (!uart_print) { + if (ar->hw_params.uart_pin_workaround) { + ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin, + ar->hw_params.uart_pin); + if (ret) { + ath10k_warn(ar, "failed to set UART TX pin: %d", + ret); + return ret; + } } return 0; From 52f4d4043d1edc4e9e66ec79cae3e32cfe0e44d6 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 1 Oct 2019 19:20:29 -0500 Subject: [PATCH 222/572] ARM: dts: imx6q-logicpd: Re-Enable SNVS power key A previous patch disabled the SNVS power key by default which breaks the ability for the imx6q-logicpd board to wake from sleep. This patch re-enables this feature for this board. Fixes: 770856f0da5d ("ARM: dts: imx6qdl: Enable SNVS power key according to board design") Signed-off-by: Adam Ford Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6-logicpd-som.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/imx6-logicpd-som.dtsi b/arch/arm/boot/dts/imx6-logicpd-som.dtsi index 7ceae3573248..547fb141ec0c 100644 --- a/arch/arm/boot/dts/imx6-logicpd-som.dtsi +++ b/arch/arm/boot/dts/imx6-logicpd-som.dtsi @@ -207,6 +207,10 @@ vin-supply = <&sw1c_reg>; }; +&snvs_poweroff { + status = "okay"; +}; + &iomuxc { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_hog>; From 73a88e4ce31055c415f1ddb55e3f08c9393cf4e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 23 Sep 2019 10:47:22 +0200 Subject: [PATCH 223/572] drm/ttm: fix busy reference in ttm_mem_evict_first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The busy BO might actually be already deleted, so grab only a list reference. Signed-off-by: Christian König Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/332877/ --- drivers/gpu/drm/ttm/ttm_bo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 20ff56f27aa4..ea59459c4128 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -878,11 +878,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, if (!bo) { if (busy_bo) - ttm_bo_get(busy_bo); + kref_get(&busy_bo->list_kref); spin_unlock(&glob->lru_lock); ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) - ttm_bo_put(busy_bo); + kref_put(&busy_bo->list_kref, ttm_bo_release_list); return ret; } From 941f2f72dbbe0cf8c2d6e0b180a8021a0ec477fa Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 12 Sep 2019 20:38:54 +0200 Subject: [PATCH 224/572] drm/ttm: Restore ttm prefaulting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4daa4fba3a38 ("gpu: drm: ttm: Adding new return type vm_fault_t") broke TTM prefaulting. Since vmf_insert_mixed() typically always returns VM_FAULT_NOPAGE, prefaulting stops after the second PTE. Restore (almost) the original behaviour. Unfortunately we can no longer with the new vm_fault_t return type determine whether a prefaulting PTE insertion hit an already populated PTE, and terminate the insertion loop. Instead we continue with the pre-determined number of prefaults. Fixes: 4daa4fba3a38 ("gpu: drm: ttm: Adding new return type vm_fault_t") Cc: Souptick Joarder Cc: Christian König Signed-off-by: Thomas Hellstrom Reviewed-by: Christian König Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/330387/ --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 76eedb963693..46dc3de7e81b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -278,15 +278,13 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) else ret = vmf_insert_pfn(&cvma, address, pfn); - /* - * Somebody beat us to this PTE or prefaulting to - * an already populated PTE, or prefaulting error. - */ - - if (unlikely((ret == VM_FAULT_NOPAGE && i > 0))) - break; - else if (unlikely(ret & VM_FAULT_ERROR)) - goto out_io_unlock; + /* Never error on prefaulted PTEs */ + if (unlikely((ret & VM_FAULT_ERROR))) { + if (i == 0) + goto out_io_unlock; + else + break; + } address += PAGE_SIZE; if (unlikely(++page_offset >= page_last)) From 7fbc899ddeaa9aaef0ba646529089149572c84ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 10 Oct 2019 13:24:17 +0200 Subject: [PATCH 225/572] drm/ttm: fix handling in ttm_bo_add_mem_to_lru MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should not add the BO to the swap LRU when the new mem is fixed and the TTM object about to be destroyed. Signed-off-by: Christian König Reviewed-by: Kevin Wang Link: https://patchwork.freedesktop.org/patch/335246/ --- drivers/gpu/drm/ttm/ttm_bo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index ea59459c4128..98819462f025 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -185,8 +185,9 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo, list_add_tail(&bo->lru, &man->lru[bo->priority]); kref_get(&bo->list_kref); - if (bo->ttm && !(bo->ttm->page_flags & - (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) { + if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm && + !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | + TTM_PAGE_FLAG_SWAPPED))) { list_add_tail(&bo->swap, &bdev->glob->swap_lru[bo->priority]); kref_get(&bo->list_kref); } From 71936a6d18c33c63b4e9e0359fb987306cbe9fae Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 3 Oct 2019 22:41:15 -0700 Subject: [PATCH 226/572] ARM: dts: vf610-zii-scu4-aib: Specify 'i2c-mux-idle-disconnect' Specify 'i2c-mux-idle-disconnect' for both I2C switches present on the board, since both are connected to the same parent bus and all of their children have the same I2C address. Fixes: ca4b4d373fcc ("ARM: dts: vf610: Add ZII SCU4 AIB board") Signed-off-by: Andrey Smirnov Cc: Shawn Guo Cc: Chris Healy Cc: Cory Tusar Cc: Jeff White Cc: Rick Ramstetter Cc: Lucas Stach Cc: Fabio Estevam Cc: linux-arm-kernel@lists.infradead.org Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Tested-by: Chris Healy Signed-off-by: Shawn Guo --- arch/arm/boot/dts/vf610-zii-scu4-aib.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/vf610-zii-scu4-aib.dts b/arch/arm/boot/dts/vf610-zii-scu4-aib.dts index dc8a5f37a1ef..c8ebb23c4e02 100644 --- a/arch/arm/boot/dts/vf610-zii-scu4-aib.dts +++ b/arch/arm/boot/dts/vf610-zii-scu4-aib.dts @@ -602,6 +602,7 @@ #address-cells = <1>; #size-cells = <0>; reg = <0x70>; + i2c-mux-idle-disconnect; sff0_i2c: i2c@1 { #address-cells = <1>; @@ -640,6 +641,7 @@ reg = <0x71>; #address-cells = <1>; #size-cells = <0>; + i2c-mux-idle-disconnect; sff5_i2c: i2c@1 { #address-cells = <1>; From 252b9e21bcf46b0d16f733f2e42b21fdc60addee Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 7 Oct 2019 08:43:42 +0800 Subject: [PATCH 227/572] ARM: dts: imx7s: Correct GPT's ipg clock source i.MX7S/D's GPT ipg clock should be from GPT clock root and controlled by CCM's GPT CCGR, using correct clock source for GPT ipg clock instead of IMX7D_CLK_DUMMY. Fixes: 3ef79ca6bd1d ("ARM: dts: imx7d: use imx7s.dtsi as base device tree") Signed-off-by: Anson Huang Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7s.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 710f850e785c..e2e604d6ba0b 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -448,7 +448,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302d0000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT1_ROOT_CLK>, <&clks IMX7D_GPT1_ROOT_CLK>; clock-names = "ipg", "per"; }; @@ -457,7 +457,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302e0000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT2_ROOT_CLK>, <&clks IMX7D_GPT2_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; @@ -467,7 +467,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302f0000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT3_ROOT_CLK>, <&clks IMX7D_GPT3_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; @@ -477,7 +477,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x30300000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT4_ROOT_CLK>, <&clks IMX7D_GPT4_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; From 832392db9747b9c95724d37fc6a5dadd3d4ec514 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 11 Oct 2019 13:22:58 +0300 Subject: [PATCH 228/572] platform/x86: i2c-multi-instantiate: Fail the probe if no IRQ provided For APIC case of interrupt we don't fail a ->probe() of the driver, which makes kernel to print a lot of warnings from the children. We have two options here: - switch to platform_get_irq_optional(), though it won't stop children to be probed and failed - fail the ->probe() of i2c-multi-instantiate Since the in reality we never had devices in the wild where IRQ resource is optional, the latter solution suits the best. Fixes: 799d3379a672 ("platform/x86: i2c-multi-instantiate: Introduce IOAPIC IRQ support") Reported-by: Ammy Yi Cc: Heikki Krogerus Cc: Hans de Goede Signed-off-by: Andy Shevchenko Reviewed-by: Heikki Krogerus --- drivers/platform/x86/i2c-multi-instantiate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/i2c-multi-instantiate.c b/drivers/platform/x86/i2c-multi-instantiate.c index ea68f6ed66ae..ffb8d5d1eb5f 100644 --- a/drivers/platform/x86/i2c-multi-instantiate.c +++ b/drivers/platform/x86/i2c-multi-instantiate.c @@ -108,6 +108,7 @@ static int i2c_multi_inst_probe(struct platform_device *pdev) if (ret < 0) { dev_dbg(dev, "Error requesting irq at index %d: %d\n", inst_data[i].irq_idx, ret); + goto error; } board_info.irq = ret; break; From b0759297f2c8dda455ff78a1d1ac95e261300ae3 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 8 Oct 2019 08:55:43 +0800 Subject: [PATCH 229/572] arm64: dts: imx8mq: Use correct clock for usdhc's ipg clk On i.MX8MQ, usdhc's ipg clock is from IMX8MQ_CLK_IPG_ROOT, assign it explicitly instead of using IMX8MQ_CLK_DUMMY. Fixes: 748f908cc882 ("arm64: add basic DTS for i.MX8MQ") Signed-off-by: Anson Huang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mq.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 04115ca6bfb5..55a3d1c4bdf0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -850,7 +850,7 @@ "fsl,imx7d-usdhc"; reg = <0x30b40000 0x10000>; interrupts = ; - clocks = <&clk IMX8MQ_CLK_DUMMY>, + clocks = <&clk IMX8MQ_CLK_IPG_ROOT>, <&clk IMX8MQ_CLK_NAND_USDHC_BUS>, <&clk IMX8MQ_CLK_USDHC1_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -867,7 +867,7 @@ "fsl,imx7d-usdhc"; reg = <0x30b50000 0x10000>; interrupts = ; - clocks = <&clk IMX8MQ_CLK_DUMMY>, + clocks = <&clk IMX8MQ_CLK_IPG_ROOT>, <&clk IMX8MQ_CLK_NAND_USDHC_BUS>, <&clk IMX8MQ_CLK_USDHC2_ROOT>; clock-names = "ipg", "ahb", "per"; From a6a40d5688f2264afd40574ee1c92e5f824b34ba Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 8 Oct 2019 08:55:44 +0800 Subject: [PATCH 230/572] arm64: dts: imx8mm: Use correct clock for usdhc's ipg clk On i.MX8MM, usdhc's ipg clock is from IMX8MM_CLK_IPG_ROOT, assign it explicitly instead of using IMX8MM_CLK_DUMMY. Fixes: a05ea40eb384 ("arm64: dts: imx: Add i.mx8mm dtsi support") Signed-off-by: Anson Huang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index 5f9d0da196e1..58b8cd06cae7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -694,7 +694,7 @@ compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b40000 0x10000>; interrupts = ; - clocks = <&clk IMX8MM_CLK_DUMMY>, + clocks = <&clk IMX8MM_CLK_IPG_ROOT>, <&clk IMX8MM_CLK_NAND_USDHC_BUS>, <&clk IMX8MM_CLK_USDHC1_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -710,7 +710,7 @@ compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b50000 0x10000>; interrupts = ; - clocks = <&clk IMX8MM_CLK_DUMMY>, + clocks = <&clk IMX8MM_CLK_IPG_ROOT>, <&clk IMX8MM_CLK_NAND_USDHC_BUS>, <&clk IMX8MM_CLK_USDHC2_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -724,7 +724,7 @@ compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b60000 0x10000>; interrupts = ; - clocks = <&clk IMX8MM_CLK_DUMMY>, + clocks = <&clk IMX8MM_CLK_IPG_ROOT>, <&clk IMX8MM_CLK_NAND_USDHC_BUS>, <&clk IMX8MM_CLK_USDHC3_ROOT>; clock-names = "ipg", "ahb", "per"; From ea65aba85e8143c3854b66f40be09b6b79215006 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 8 Oct 2019 08:55:45 +0800 Subject: [PATCH 231/572] arm64: dts: imx8mn: Use correct clock for usdhc's ipg clk On i.MX8MN, usdhc's ipg clock is from IMX8MN_CLK_IPG_ROOT, assign it explicitly instead of using IMX8MN_CLK_DUMMY. Fixes: 6c3debcbae47 ("arm64: dts: freescale: Add i.MX8MN dtsi support") Signed-off-by: Anson Huang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 785f4c420fa4..98496f570720 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -569,7 +569,7 @@ compatible = "fsl,imx8mn-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b40000 0x10000>; interrupts = ; - clocks = <&clk IMX8MN_CLK_DUMMY>, + clocks = <&clk IMX8MN_CLK_IPG_ROOT>, <&clk IMX8MN_CLK_NAND_USDHC_BUS>, <&clk IMX8MN_CLK_USDHC1_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -585,7 +585,7 @@ compatible = "fsl,imx8mn-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b50000 0x10000>; interrupts = ; - clocks = <&clk IMX8MN_CLK_DUMMY>, + clocks = <&clk IMX8MN_CLK_IPG_ROOT>, <&clk IMX8MN_CLK_NAND_USDHC_BUS>, <&clk IMX8MN_CLK_USDHC2_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -599,7 +599,7 @@ compatible = "fsl,imx8mn-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b60000 0x10000>; interrupts = ; - clocks = <&clk IMX8MN_CLK_DUMMY>, + clocks = <&clk IMX8MN_CLK_IPG_ROOT>, <&clk IMX8MN_CLK_NAND_USDHC_BUS>, <&clk IMX8MN_CLK_USDHC3_ROOT>; clock-names = "ipg", "ahb", "per"; From 95993238b29b3f0f9a5eb9db84e0e38e5bfe76d8 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 8 Oct 2019 14:38:13 -0300 Subject: [PATCH 232/572] ARM: imx_v6_v7_defconfig: Enable CONFIG_DRM_MSM Since commit 2eba69071b4b ("drm/msm: Remove Kconfig default") the CONFIG_DRM_MSM option is no longer selected by default on i.MX5. Explicitly select CONFIG_DRM_MSM so that we can get GPU support by default on i.MX51 and i.MX53. Fixes: 2eba69071b4b ("drm/msm: Remove Kconfig default") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/configs/imx_v6_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 9bfffbe22d53..0f7381ee0c37 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -276,6 +276,7 @@ CONFIG_VIDEO_OV5640=m CONFIG_VIDEO_OV5645=m CONFIG_IMX_IPUV3_CORE=y CONFIG_DRM=y +CONFIG_DRM_MSM=y CONFIG_DRM_PANEL_LVDS=y CONFIG_DRM_PANEL_SIMPLE=y CONFIG_DRM_PANEL_SEIKO_43WVF1G=y From 153c5d8191c26165dbbd2646448ca7207f7796d0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 14 Oct 2019 12:02:01 +0100 Subject: [PATCH 233/572] staging: wlan-ng: fix exit return when sme->key_idx >= NUM_WEPKEYS Currently the exit return path when sme->key_idx >= NUM_WEPKEYS is via label 'exit' and this checks if result is non-zero, however result has not been initialized and contains garbage. Fix this by replacing the goto with a return with the error code. Addresses-Coverity: ("Uninitialized scalar variable") Fixes: 0ca6d8e74489 ("Staging: wlan-ng: replace switch-case statements with macro") Signed-off-by: Colin Ian King Cc: stable Link: https://lore.kernel.org/r/20191014110201.9874-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/cfg80211.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c index eee1998c4b18..fac38c842ac5 100644 --- a/drivers/staging/wlan-ng/cfg80211.c +++ b/drivers/staging/wlan-ng/cfg80211.c @@ -469,10 +469,8 @@ static int prism2_connect(struct wiphy *wiphy, struct net_device *dev, /* Set the encryption - we only support wep */ if (is_wep) { if (sme->key) { - if (sme->key_idx >= NUM_WEPKEYS) { - err = -EINVAL; - goto exit; - } + if (sme->key_idx >= NUM_WEPKEYS) + return -EINVAL; result = prism2_domibset_uint32(wlandev, DIDMIB_DOT11SMT_PRIVACYTABLE_WEPDEFAULTKEYID, From ff229eee3d897f52bd001c841f2d3cce8853ecdc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Oct 2019 10:32:04 -0700 Subject: [PATCH 234/572] hrtimer: Annotate lockless access to timer->base Followup to commit dd2261ed45aa ("hrtimer: Protect lockless access to timer->base") lock_hrtimer_base() fetches timer->base without lock exclusion. Compiler is allowed to read timer->base twice (even if considered dumb) which could end up trying to lock migration_base and return &migration_base. base = timer->base; if (likely(base != &migration_base)) { /* compiler reads timer->base again, and now (base == &migration_base) raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); if (likely(base == timer->base)) return base; /* == &migration_base ! */ Similarly the write sides must use WRITE_ONCE() to avoid store tearing. Signed-off-by: Eric Dumazet Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20191008173204.180879-1-edumazet@google.com --- kernel/time/hrtimer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 0d4dc241c0fb..65605530ee34 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -164,7 +164,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, struct hrtimer_clock_base *base; for (;;) { - base = timer->base; + base = READ_ONCE(timer->base); if (likely(base != &migration_base)) { raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); if (likely(base == timer->base)) @@ -244,7 +244,7 @@ again: return base; /* See the comment in lock_hrtimer_base() */ - timer->base = &migration_base; + WRITE_ONCE(timer->base, &migration_base); raw_spin_unlock(&base->cpu_base->lock); raw_spin_lock(&new_base->cpu_base->lock); @@ -253,10 +253,10 @@ again: raw_spin_unlock(&new_base->cpu_base->lock); raw_spin_lock(&base->cpu_base->lock); new_cpu_base = this_cpu_base; - timer->base = base; + WRITE_ONCE(timer->base, base); goto again; } - timer->base = new_base; + WRITE_ONCE(timer->base, new_base); } else { if (new_cpu_base != this_cpu_base && hrtimer_check_target(timer, new_base)) { From 770597ecb2075390c01c425b8b1f551347f1bd70 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 5 Sep 2019 07:52:33 -0600 Subject: [PATCH 235/572] nvme-pci: Free tagset if no IO queues If a controller becomes degraded after a reset, we will not be able to perform any IO. We currently teardown previously created request queues and namespaces, but we had kept the unusable tagset. Free it after all queues using it have been released. Tested-by: Edmund Nadolski Reviewed-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 78e403823646..5c04581899f4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2490,14 +2490,20 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) dma_pool_destroy(dev->prp_small_pool); } +static void nvme_free_tagset(struct nvme_dev *dev) +{ + if (dev->tagset.tags) + blk_mq_free_tag_set(&dev->tagset); + dev->ctrl.tagset = NULL; +} + static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); nvme_dbbuf_dma_free(dev); put_device(dev->dev); - if (dev->tagset.tags) - blk_mq_free_tag_set(&dev->tagset); + nvme_free_tagset(dev); if (dev->ctrl.admin_q) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); @@ -2616,6 +2622,7 @@ static void nvme_reset_work(struct work_struct *work) nvme_kill_queues(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); new_state = NVME_CTRL_ADMIN_ONLY; + nvme_free_tagset(dev); } else { nvme_start_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); From 5d02a5c1d6e14534ca4729b055c89a2cd022ca00 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 3 Sep 2019 09:22:24 -0600 Subject: [PATCH 236/572] nvme: Remove ADMIN_ONLY state The admin only state was intended to fence off actions that don't apply to a non-IO capable controller. The only actual user of this is the scan_work, and pci was the only transport to ever set this state. The consequence of having this state is placing an additional burden on every other action that applies to both live and admin only controllers. Remove the admin only state and place the admin only burden on the only place that actually cares: scan_work. This also prepares to make it easier to temporarily pause a LIVE state so that we don't need to remember which state the controller had been in prior to the pause. Tested-by: Edmund Nadolski Reviewed-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 23 ++++------------------- drivers/nvme/host/fabrics.h | 3 +-- drivers/nvme/host/nvme.h | 1 - drivers/nvme/host/pci.c | 21 ++++++--------------- 4 files changed, 11 insertions(+), 37 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ef1d8f81f69e..e451e77005dc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -116,7 +116,7 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl) /* * Only new queue scan work when admin and IO queues are both alive */ - if (ctrl->state == NVME_CTRL_LIVE) + if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset) queue_work(nvme_wq, &ctrl->scan_work); } @@ -137,8 +137,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) ret = nvme_reset_ctrl(ctrl); if (!ret) { flush_work(&ctrl->reset_work); - if (ctrl->state != NVME_CTRL_LIVE && - ctrl->state != NVME_CTRL_ADMIN_ONLY) + if (ctrl->state != NVME_CTRL_LIVE) ret = -ENETRESET; } @@ -315,15 +314,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, old_state = ctrl->state; switch (new_state) { - case NVME_CTRL_ADMIN_ONLY: - switch (old_state) { - case NVME_CTRL_CONNECTING: - changed = true; - /* FALLTHRU */ - default: - break; - } - break; case NVME_CTRL_LIVE: switch (old_state) { case NVME_CTRL_NEW: @@ -339,7 +329,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, switch (old_state) { case NVME_CTRL_NEW: case NVME_CTRL_LIVE: - case NVME_CTRL_ADMIN_ONLY: changed = true; /* FALLTHRU */ default: @@ -359,7 +348,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_DELETING: switch (old_state) { case NVME_CTRL_LIVE: - case NVME_CTRL_ADMIN_ONLY: case NVME_CTRL_RESETTING: case NVME_CTRL_CONNECTING: changed = true; @@ -2873,7 +2861,6 @@ static int nvme_dev_open(struct inode *inode, struct file *file) switch (ctrl->state) { case NVME_CTRL_LIVE: - case NVME_CTRL_ADMIN_ONLY: break; default: return -EWOULDBLOCK; @@ -3167,7 +3154,6 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, static const char *const state_name[] = { [NVME_CTRL_NEW] = "new", [NVME_CTRL_LIVE] = "live", - [NVME_CTRL_ADMIN_ONLY] = "only-admin", [NVME_CTRL_RESETTING] = "resetting", [NVME_CTRL_CONNECTING] = "connecting", [NVME_CTRL_DELETING] = "deleting", @@ -3678,11 +3664,10 @@ static void nvme_scan_work(struct work_struct *work) struct nvme_id_ctrl *id; unsigned nn; - if (ctrl->state != NVME_CTRL_LIVE) + /* No tagset on a live ctrl means IO queues could not created */ + if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) return; - WARN_ON_ONCE(!ctrl->tagset); - if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { dev_info(ctrl->device, "rescanning namespaces.\n"); nvme_clear_changed_ns_log(ctrl); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 93f08d77c896..a0ec40ab62ee 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -182,8 +182,7 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live) { - if (likely(ctrl->state == NVME_CTRL_LIVE || - ctrl->state == NVME_CTRL_ADMIN_ONLY)) + if (likely(ctrl->state == NVME_CTRL_LIVE)) return true; return __nvmf_check_ready(ctrl, rq, queue_live); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 38a83ef5bcd3..2ba577271ada 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -161,7 +161,6 @@ static inline u16 nvme_req_qid(struct request *req) enum nvme_ctrl_state { NVME_CTRL_NEW, NVME_CTRL_LIVE, - NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */ NVME_CTRL_RESETTING, NVME_CTRL_CONNECTING, NVME_CTRL_DELETING, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5c04581899f4..e7e79d6af9ba 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2263,10 +2263,7 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) return true; } -/* - * return error value only when tagset allocation failed - */ -static int nvme_dev_add(struct nvme_dev *dev) +static void nvme_dev_add(struct nvme_dev *dev) { int ret; @@ -2296,7 +2293,7 @@ static int nvme_dev_add(struct nvme_dev *dev) if (ret) { dev_warn(dev->ctrl.device, "IO queues tagset allocation failed %d\n", ret); - return ret; + return; } dev->ctrl.tagset = &dev->tagset; } else { @@ -2307,7 +2304,6 @@ static int nvme_dev_add(struct nvme_dev *dev) } nvme_dbbuf_set(dev); - return 0; } static int nvme_pci_enable(struct nvme_dev *dev) @@ -2527,7 +2523,6 @@ static void nvme_reset_work(struct work_struct *work) container_of(work, struct nvme_dev, ctrl.reset_work); bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result; - enum nvme_ctrl_state new_state = NVME_CTRL_LIVE; if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) { result = -ENODEV; @@ -2621,14 +2616,11 @@ static void nvme_reset_work(struct work_struct *work) dev_warn(dev->ctrl.device, "IO queues not created\n"); nvme_kill_queues(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); - new_state = NVME_CTRL_ADMIN_ONLY; nvme_free_tagset(dev); } else { nvme_start_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); - /* hit this only when allocate tagset fails */ - if (nvme_dev_add(dev)) - new_state = NVME_CTRL_ADMIN_ONLY; + nvme_dev_add(dev); nvme_unfreeze(&dev->ctrl); } @@ -2636,9 +2628,9 @@ static void nvme_reset_work(struct work_struct *work) * If only admin queue live, keep it to do further investigation or * recovery. */ - if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) { + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { dev_warn(dev->ctrl.device, - "failed to mark controller state %d\n", new_state); + "failed to mark controller live state\n"); result = -ENODEV; goto out; } @@ -2945,8 +2937,7 @@ static int nvme_suspend(struct device *dev) nvme_wait_freeze(ctrl); nvme_sync_queues(ctrl); - if (ctrl->state != NVME_CTRL_LIVE && - ctrl->state != NVME_CTRL_ADMIN_ONLY) + if (ctrl->state != NVME_CTRL_LIVE) goto unfreeze; ret = nvme_get_power_state(ctrl, &ndev->last_ps); From 92b98e88d59ab17e65f261fbb5db272143ccf414 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 5 Sep 2019 08:09:33 -0600 Subject: [PATCH 237/572] nvme: Restart request timers in resetting state A controller in the resetting state has not yet completed its recovery actions. The pci and fc transports were already handling this, so update the remaining transports to not attempt additional recovery in this state. Instead, just restart the request timer. Tested-by: Edmund Nadolski Reviewed-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 8 ++++++++ drivers/nvme/host/tcp.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4d280160dd3f..f19a28b4e997 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1701,6 +1701,14 @@ nvme_rdma_timeout(struct request *rq, bool reserved) dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n", rq->tag, nvme_rdma_queue_idx(queue)); + /* + * Restart the timer if a controller reset is already scheduled. Any + * timed out commands would be handled before entering the connecting + * state. + */ + if (ctrl->ctrl.state == NVME_CTRL_RESETTING) + return BLK_EH_RESET_TIMER; + if (ctrl->ctrl.state != NVME_CTRL_LIVE) { /* * Teardown immediately if controller times out while starting diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 385a5212c10f..33de2fddfbb2 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2044,6 +2044,14 @@ nvme_tcp_timeout(struct request *rq, bool reserved) struct nvme_tcp_ctrl *ctrl = req->queue->ctrl; struct nvme_tcp_cmd_pdu *pdu = req->pdu; + /* + * Restart the timer if a controller reset is already scheduled. Any + * timed out commands would be handled before entering the connecting + * state. + */ + if (ctrl->ctrl.state == NVME_CTRL_RESETTING) + return BLK_EH_RESET_TIMER; + dev_warn(ctrl->ctrl.device, "queue %d: timeout request %#x type %d\n", nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type); From 4c75f877853cfa81b12374a07208e07b077f39b8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 6 Sep 2019 11:23:08 -0600 Subject: [PATCH 238/572] nvme: Prevent resets during paused controller state A paused controller is doing critical internal activation work in the background. Prevent subsequent controller resets from occurring during this period by setting the controller state to RESETTING first. A helper function, nvme_try_sched_reset_work(), is introduced for these paths so they may continue with scheduling the reset_work after they've completed their uninterruptible critical section. Tested-by: Edmund Nadolski Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e451e77005dc..e94fa693dd4b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -120,6 +120,21 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl) queue_work(nvme_wq, &ctrl->scan_work); } +/* + * Use this function to proceed with scheduling reset_work for a controller + * that had previously been set to the resetting state. This is intended for + * code paths that can't be interrupted by other reset attempts. A hot removal + * may prevent this from succeeding. + */ +static int nvme_try_sched_reset(struct nvme_ctrl *ctrl) +{ + if (ctrl->state != NVME_CTRL_RESETTING) + return -EBUSY; + if (!queue_work(nvme_reset_wq, &ctrl->reset_work)) + return -EBUSY; + return 0; +} + int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -3828,13 +3843,13 @@ static void nvme_fw_act_work(struct work_struct *work) if (time_after(jiffies, fw_act_timeout)) { dev_warn(ctrl->device, "Fw activation timeout, reset controller\n"); - nvme_reset_ctrl(ctrl); - break; + nvme_try_sched_reset(ctrl); + return; } msleep(100); } - if (ctrl->state != NVME_CTRL_LIVE) + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) return; nvme_start_queues(ctrl); @@ -3854,7 +3869,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) nvme_queue_scan(ctrl); break; case NVME_AER_NOTICE_FW_ACT_STARTING: - queue_work(nvme_wq, &ctrl->fw_act_work); + /* + * We are (ab)using the RESETTING state to prevent subsequent + * recovery actions from interfering with the controller's + * firmware activation. + */ + if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) + queue_work(nvme_wq, &ctrl->fw_act_work); break; #ifdef CONFIG_NVME_MULTIPATH case NVME_AER_NOTICE_ANA: From c1ac9a4b0797ca8bb4470f863a5f78ef1ab13bed Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 4 Sep 2019 10:06:11 -0600 Subject: [PATCH 239/572] nvme: Wait for reset state when required Prevent simultaneous controller disabling/enabling tasks from interfering with each other through a function to wait until the task successfully transitioned the controller to the RESETTING state. This ensures disabling the controller will not be interrupted by another reset path, otherwise a concurrent reset may leave the controller in the wrong state. Tested-by: Edmund Nadolski Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 41 +++++++++++++++++++++++++++++++++-- drivers/nvme/host/nvme.h | 4 ++++ drivers/nvme/host/pci.c | 46 +++++++++++++++++++++++++++------------- 3 files changed, 74 insertions(+), 17 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e94fa693dd4b..fa7ba09dca77 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -126,7 +126,7 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl) * code paths that can't be interrupted by other reset attempts. A hot removal * may prevent this from succeeding. */ -static int nvme_try_sched_reset(struct nvme_ctrl *ctrl) +int nvme_try_sched_reset(struct nvme_ctrl *ctrl) { if (ctrl->state != NVME_CTRL_RESETTING) return -EBUSY; @@ -134,6 +134,7 @@ static int nvme_try_sched_reset(struct nvme_ctrl *ctrl) return -EBUSY; return 0; } +EXPORT_SYMBOL_GPL(nvme_try_sched_reset); int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { @@ -384,8 +385,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, break; } - if (changed) + if (changed) { ctrl->state = new_state; + wake_up_all(&ctrl->state_wq); + } spin_unlock_irqrestore(&ctrl->lock, flags); if (changed && ctrl->state == NVME_CTRL_LIVE) @@ -394,6 +397,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, } EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); +/* + * Returns true for sink states that can't ever transition back to live. + */ +static bool nvme_state_terminal(struct nvme_ctrl *ctrl) +{ + switch (ctrl->state) { + case NVME_CTRL_NEW: + case NVME_CTRL_LIVE: + case NVME_CTRL_RESETTING: + case NVME_CTRL_CONNECTING: + return false; + case NVME_CTRL_DELETING: + case NVME_CTRL_DEAD: + return true; + default: + WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state); + return true; + } +} + +/* + * Waits for the controller state to be resetting, or returns false if it is + * not possible to ever transition to that state. + */ +bool nvme_wait_reset(struct nvme_ctrl *ctrl) +{ + wait_event(ctrl->state_wq, + nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) || + nvme_state_terminal(ctrl)); + return ctrl->state == NVME_CTRL_RESETTING; +} +EXPORT_SYMBOL_GPL(nvme_wait_reset); + static void nvme_free_ns_head(struct kref *ref) { struct nvme_ns_head *head = @@ -3998,6 +4034,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work); + init_waitqueue_head(&ctrl->state_wq); INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work); memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2ba577271ada..22e8401352c2 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -198,6 +199,7 @@ struct nvme_ctrl { struct cdev cdev; struct work_struct reset_work; struct work_struct delete_work; + wait_queue_head_t state_wq; struct nvme_subsystem *subsys; struct list_head subsys_entry; @@ -448,6 +450,7 @@ void nvme_complete_rq(struct request *req); bool nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); +bool nvme_wait_reset(struct nvme_ctrl *ctrl); int nvme_disable_ctrl(struct nvme_ctrl *ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); @@ -498,6 +501,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); +int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e7e79d6af9ba..4b181969c432 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2463,6 +2463,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) mutex_unlock(&dev->shutdown_lock); } +static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown) +{ + if (!nvme_wait_reset(&dev->ctrl)) + return -EBUSY; + nvme_dev_disable(dev, shutdown); + return 0; +} + static int nvme_setup_prp_pools(struct nvme_dev *dev) { dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, @@ -2510,6 +2518,11 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) static void nvme_remove_dead_ctrl(struct nvme_dev *dev) { + /* + * Set state to deleting now to avoid blocking nvme_wait_reset(), which + * may be holding this pci_dev's device lock. + */ + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); nvme_get_ctrl(&dev->ctrl); nvme_dev_disable(dev, false); nvme_kill_queues(&dev->ctrl); @@ -2835,19 +2848,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) static void nvme_reset_prepare(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - nvme_dev_disable(dev, false); + + /* + * We don't need to check the return value from waiting for the reset + * state as pci_dev device lock is held, making it impossible to race + * with ->remove(). + */ + nvme_disable_prepare_reset(dev, false); + nvme_sync_queues(&dev->ctrl); } static void nvme_reset_done(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - nvme_reset_ctrl_sync(&dev->ctrl); + + if (!nvme_try_sched_reset(&dev->ctrl)) + flush_work(&dev->ctrl.reset_work); } static void nvme_shutdown(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - nvme_dev_disable(dev, true); + nvme_disable_prepare_reset(dev, true); } /* @@ -2900,7 +2922,7 @@ static int nvme_resume(struct device *dev) if (ndev->last_ps == U32_MAX || nvme_set_power_state(ctrl, ndev->last_ps) != 0) - nvme_reset_ctrl(ctrl); + return nvme_try_sched_reset(&ndev->ctrl); return 0; } @@ -2928,10 +2950,8 @@ static int nvme_suspend(struct device *dev) */ if (pm_suspend_via_firmware() || !ctrl->npss || !pcie_aspm_enabled(pdev) || - (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) { - nvme_dev_disable(ndev, true); - return 0; - } + (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) + return nvme_disable_prepare_reset(ndev, true); nvme_start_freeze(ctrl); nvme_wait_freeze(ctrl); @@ -2963,9 +2983,8 @@ static int nvme_suspend(struct device *dev) * Clearing npss forces a controller reset on resume. The * correct value will be resdicovered then. */ - nvme_dev_disable(ndev, true); + ret = nvme_disable_prepare_reset(ndev, true); ctrl->npss = 0; - ret = 0; } unfreeze: nvme_unfreeze(ctrl); @@ -2975,9 +2994,7 @@ unfreeze: static int nvme_simple_suspend(struct device *dev) { struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); - - nvme_dev_disable(ndev, true); - return 0; + return nvme_disable_prepare_reset(ndev, true); } static int nvme_simple_resume(struct device *dev) @@ -2985,8 +3002,7 @@ static int nvme_simple_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); - nvme_reset_ctrl(&ndev->ctrl); - return 0; + return nvme_try_sched_reset(&ndev->ctrl); } static const struct dev_pm_ops nvme_dev_pm_ops = { From ac1c4e18858cb1da6abe1932a7ca001c37f62372 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Oct 2019 17:34:12 +0200 Subject: [PATCH 240/572] nvme-tcp: Initialize sk->sk_ll_usec only with NET_RX_BUSY_POLL The access to sk->sk_ll_usec should be hidden behind CONFIG_NET_RX_BUSY_POLL like the definition of sk_ll_usec. Put access to ->sk_ll_usec behind CONFIG_NET_RX_BUSY_POLL. Fixes: 1a9460cef5711 ("nvme-tcp: support simple polling") Reviewed-by: Christoph Hellwig Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 33de2fddfbb2..820dac10fa9e 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1386,7 +1386,9 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; queue->sock->sk->sk_state_change = nvme_tcp_state_change; queue->sock->sk->sk_write_space = nvme_tcp_write_space; +#ifdef CONFIG_NET_RX_BUSY_POLL queue->sock->sk->sk_ll_usec = 1; +#endif write_unlock_bh(&queue->sock->sk->sk_callback_lock); return 0; From 2abd839aa7e615f2bbc50c8ba7deb9e40d186768 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 4 Oct 2019 14:46:24 +0100 Subject: [PATCH 241/572] kmemleak: Do not corrupt the object_list during clean-up In case of an error (e.g. memory pool too small), kmemleak disables itself and cleans up the already allocated metadata objects. However, if this happens early before the RCU callback mechanism is available, put_object() skips call_rcu() and frees the object directly. This is not safe with the RCU list traversal in __kmemleak_do_cleanup(). Change the list traversal in __kmemleak_do_cleanup() to list_for_each_entry_safe() and remove the rcu_read_{lock,unlock} since the kmemleak is already disabled at this point. In addition, avoid an unnecessary metadata object rb-tree look-up since it already has the struct kmemleak_object pointer. Fixes: c5665868183f ("mm: kmemleak: use the memory pool for early allocations") Reported-by: Alexey Kardashevskiy Reported-by: Marc Dionne Reported-by: Ted Ts'o Cc: Andrew Morton Signed-off-by: Catalin Marinas Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 03a8d84badad..244607663363 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -526,6 +526,16 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias) return object; } +/* + * Remove an object from the object_tree_root and object_list. Must be called + * with the kmemleak_lock held _if_ kmemleak is still enabled. + */ +static void __remove_object(struct kmemleak_object *object) +{ + rb_erase(&object->rb_node, &object_tree_root); + list_del_rcu(&object->object_list); +} + /* * Look up an object in the object search tree and remove it from both * object_tree_root and object_list. The returned object's use_count should be @@ -538,10 +548,8 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali write_lock_irqsave(&kmemleak_lock, flags); object = lookup_object(ptr, alias); - if (object) { - rb_erase(&object->rb_node, &object_tree_root); - list_del_rcu(&object->object_list); - } + if (object) + __remove_object(object); write_unlock_irqrestore(&kmemleak_lock, flags); return object; @@ -1834,12 +1842,16 @@ static const struct file_operations kmemleak_fops = { static void __kmemleak_do_cleanup(void) { - struct kmemleak_object *object; + struct kmemleak_object *object, *tmp; - rcu_read_lock(); - list_for_each_entry_rcu(object, &object_list, object_list) - delete_object_full(object->pointer); - rcu_read_unlock(); + /* + * Kmemleak has already been disabled, no need for RCU list traversal + * or kmemleak_lock held. + */ + list_for_each_entry_safe(object, tmp, &object_list, object_list) { + __remove_object(object); + __delete_object(object); + } } /* From 6595d144decec396bf2e2efee27e50634a4b627f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 9 Oct 2019 20:21:05 +0100 Subject: [PATCH 242/572] xtensa: fix {get,put}_user() for 64bit values First of all, on short copies __copy_{to,from}_user() return the amount of bytes left uncopied, *not* -EFAULT. get_user() and put_user() are expected to return -EFAULT on failure. Another problem is get_user(v32, (__u64 __user *)p); that should fetch 64bit value and the assign it to v32, truncating it in process. Current code, OTOH, reads 8 bytes of data and stores them at the address of v32, stomping on the 4 bytes that follow v32 itself. Signed-off-by: Al Viro Signed-off-by: Max Filippov --- arch/xtensa/include/asm/uaccess.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index 6792928ba84a..f568c00392ec 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -100,7 +100,7 @@ do { \ case 4: __put_user_asm(x, ptr, retval, 4, "s32i", __cb); break; \ case 8: { \ __typeof__(*ptr) __v64 = x; \ - retval = __copy_to_user(ptr, &__v64, 8); \ + retval = __copy_to_user(ptr, &__v64, 8) ? -EFAULT : 0; \ break; \ } \ default: __put_user_bad(); \ @@ -198,7 +198,16 @@ do { \ case 1: __get_user_asm(x, ptr, retval, 1, "l8ui", __cb); break;\ case 2: __get_user_asm(x, ptr, retval, 2, "l16ui", __cb); break;\ case 4: __get_user_asm(x, ptr, retval, 4, "l32i", __cb); break;\ - case 8: retval = __copy_from_user(&x, ptr, 8); break; \ + case 8: { \ + u64 __x; \ + if (unlikely(__copy_from_user(&__x, ptr, 8))) { \ + retval = -EFAULT; \ + (x) = 0; \ + } else { \ + (x) = *(__force __typeof__((ptr)))&__x; \ + } \ + break; \ + } \ default: (x) = __get_user_bad(); \ } \ } while (0) From eda6d764ac33d9ce8d656fa381f05b3feae09085 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 14 Oct 2019 16:15:15 +0100 Subject: [PATCH 243/572] drm/panfrost: Add missing GPU feature registers Three feature registers were declared but never actually read from the GPU. Add THREAD_MAX_THREADS, THREAD_MAX_WORKGROUP_SIZE and THREAD_MAX_BARRIER_SIZE so that the complete set are available. Fixes: 4bced8bea094 ("drm/panfrost: Export all GPU feature registers") Signed-off-by: Steven Price Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20191014151515.13839-1-steven.price@arm.com --- drivers/gpu/drm/panfrost/panfrost_gpu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index f67ed925c0ef..8822ec13a0d6 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -208,6 +208,9 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev) pfdev->features.mem_features = gpu_read(pfdev, GPU_MEM_FEATURES); pfdev->features.mmu_features = gpu_read(pfdev, GPU_MMU_FEATURES); pfdev->features.thread_features = gpu_read(pfdev, GPU_THREAD_FEATURES); + pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS); + pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE); + pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE); pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES); for (i = 0; i < 4; i++) pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i)); From 2f01b7864188b895e0f18250b650328da4fe3cb2 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 23 Sep 2019 08:45:17 +0800 Subject: [PATCH 244/572] riscv: remove the switch statement in do_trap_break() To make the code more straightforward, replace the switch statement with an if statement. Suggested-by: Paul Walmsley Signed-off-by: Vincent Chen [paul.walmsley@sifive.com: cleaned up patch description; updated to apply] Link: https://lore.kernel.org/linux-riscv/20190927224711.GI4700@infradead.org/ Link: https://lore.kernel.org/linux-riscv/CABvJ_xiHJSB7P5QekuLRP=LBPzXXghAfuUpPUYb=a_HbnOQ6BA@mail.gmail.com/ Link: https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org/thread/VDCU2WOB6KQISREO4V5DTXEI2M7VOV55/ Cc: Christoph Hellwig Signed-off-by: Paul Walmsley --- arch/riscv/kernel/traps.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 93742df9067f..1ac75f7d0bff 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -124,24 +124,24 @@ static inline unsigned long get_break_insn_length(unsigned long pc) asmlinkage void do_trap_break(struct pt_regs *regs) { - if (!user_mode(regs)) { + if (user_mode(regs)) { + force_sig_fault(SIGTRAP, TRAP_BRKPT, + (void __user *)(regs->sepc)); + return; + } +#ifdef CONFIG_GENERIC_BUG + { enum bug_trap_type type; type = report_bug(regs->sepc, regs); - switch (type) { -#ifdef CONFIG_GENERIC_BUG - case BUG_TRAP_TYPE_WARN: + if (type == BUG_TRAP_TYPE_WARN) { regs->sepc += get_break_insn_length(regs->sepc); return; - case BUG_TRAP_TYPE_BUG: -#endif /* CONFIG_GENERIC_BUG */ - default: - die(regs, "Kernel BUG"); } - } else { - force_sig_fault(SIGTRAP, TRAP_BRKPT, - (void __user *)(regs->sepc)); } +#endif /* CONFIG_GENERIC_BUG */ + + die(regs, "Kernel BUG"); } #ifdef CONFIG_GENERIC_BUG From 2993c9b04e616df0848b655d7202a707a70fc876 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Wed, 9 Oct 2019 12:42:44 -0700 Subject: [PATCH 245/572] riscv: dts: HiFive Unleashed: add default chosen/stdout-path Add a default "stdout-path" to the kernel DTS file, as is present in many of the board DTS files elsewhere in the kernel tree. With this line present, earlyconsole can be enabled by simply passing "earlycon" on the kernel command line. No specific device details are necessary, since the kernel will use the stdout-path as the default. Signed-off-by: Paul Walmsley Reviewed-by: Atish Patra --- arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 104d334511cd..88cfcb96bf23 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -13,6 +13,7 @@ compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000"; chosen { + stdout-path = "serial0"; }; cpus { From 4c8eb19cf9dc5fcc489757acbf93be90baf25848 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 10 Oct 2019 15:57:58 -0700 Subject: [PATCH 246/572] riscv: tlbflush: remove confusing comment on local_flush_tlb_all() Remove a confusing comment on our local_flush_tlb_all() implementation. Per an internal discussion with Andrew, while it's true that the fence.i is not necessary, it's not the case that an sfence.vma implies a fence.i. We also drop the section about "flush[ing] the entire local TLB" to better align with the language in section 4.2.1 "Supervisor Memory-Management Fence Instruction" of the RISC-V Privileged Specification v20190608. Fixes: c901e45a999a1 ("RISC-V: `sfence.vma` orderes the instruction cache") Reported-by: Alan Kao Cc: Palmer Dabbelt Cc: Andrew Waterman Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/tlbflush.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 37ae4e367ad2..f02188a5b0f4 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -10,10 +10,6 @@ #include #include -/* - * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction - * cache as well, so a 'fence.i' is not necessary. - */ static inline void local_flush_tlb_all(void) { __asm__ __volatile__ ("sfence.vma" : : : "memory"); From 81dde26de9c08bb04c4962a15608778aaffb3cf9 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Oct 2019 21:41:24 +0200 Subject: [PATCH 247/572] firmware: dmi: Fix unlikely out-of-bounds read in save_mem_devices Before reading the Extended Size field, we should ensure it fits in the DMI record. There is already a record length check but it does not cover that field. It would take a seriously corrupted DMI table to hit that bug, so no need to worry, but we should still fix it. Signed-off-by: Jean Delvare Fixes: 6deae96b42eb ("firmware, DMI: Add function to look up a handle and return DIMM size") Cc: Tony Luck Cc: Borislav Petkov --- drivers/firmware/dmi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 35ed56b9c34f..1e21fc3e9851 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -408,7 +408,7 @@ static void __init save_mem_devices(const struct dmi_header *dm, void *v) bytes = ~0ull; else if (size & 0x8000) bytes = (u64)(size & 0x7fff) << 10; - else if (size != 0x7fff) + else if (size != 0x7fff || dm->length < 0x20) bytes = (u64)size << 20; else bytes = (u64)get_unaligned((u32 *)&d[0x1C]) << 20; From b67114db64adcb0aaff0fe0bb2a84ede0a99aaf2 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 4 Oct 2019 13:10:09 +0200 Subject: [PATCH 248/572] parisc: sysctl.c: Use CONFIG_PARISC instead of __hppa_ define Signed-off-by: Helge Deller --- kernel/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 00fcea236eba..b6f2f35d0bcf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -163,7 +163,7 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); #ifdef CONFIG_SPARC #endif -#ifdef __hppa__ +#ifdef CONFIG_PARISC extern int pwrsw_enabled; #endif @@ -620,7 +620,7 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#ifdef __hppa__ +#ifdef CONFIG_PARISC { .procname = "soft-power", .data = &pwrsw_enabled, From 0703ad217ebd441dd730af71f8d9cdbf144fbc03 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 12 Aug 2019 14:50:36 -0700 Subject: [PATCH 249/572] parisc: prefer __section from compiler_attributes.h Reported-by: Sedat Dilek Suggested-by: Josh Poimboeuf Signed-off-by: Nick Desaulniers Signed-off-by: Helge Deller --- arch/parisc/include/asm/cache.h | 2 +- arch/parisc/include/asm/ldcw.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 73ca89a47f49..e5de3f897633 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -22,7 +22,7 @@ #define ARCH_DMA_MINALIGN L1_CACHE_BYTES -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(.data..read_mostly) void parisc_cache_init(void); /* initializes cache-flushing */ void disable_sr_hashing_asm(int); /* low level support for above */ diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index 3eb4bfc1fb36..e080143e79a3 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -52,7 +52,7 @@ }) #ifdef CONFIG_SMP -# define __lock_aligned __attribute__((__section__(".data..lock_aligned"))) +# define __lock_aligned __section(.data..lock_aligned) #endif #endif /* __PARISC_LDCW_H */ From 513f7f747e1cba81f28a436911fba0b485878ebd Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 4 Oct 2019 19:23:37 +0200 Subject: [PATCH 250/572] parisc: Fix vmap memory leak in ioremap()/iounmap() Sven noticed that calling ioremap() and iounmap() multiple times leads to a vmap memory leak: vmap allocation for size 4198400 failed: use vmalloc= to increase size It seems we missed calling vunmap() in iounmap(). Signed-off-by: Helge Deller Noticed-by: Sven Schnelle Cc: # v3.16+ --- arch/parisc/mm/ioremap.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c index 92a9b5f12f98..f29f682352f0 100644 --- a/arch/parisc/mm/ioremap.c +++ b/arch/parisc/mm/ioremap.c @@ -3,7 +3,7 @@ * arch/parisc/mm/ioremap.c * * (C) Copyright 1995 1996 Linus Torvalds - * (C) Copyright 2001-2006 Helge Deller + * (C) Copyright 2001-2019 Helge Deller * (C) Copyright 2005 Kyle McMartin */ @@ -84,7 +84,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l addr = (void __iomem *) area->addr; if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, phys_addr, pgprot)) { - vfree(addr); + vunmap(addr); return NULL; } @@ -92,9 +92,11 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l } EXPORT_SYMBOL(__ioremap); -void iounmap(const volatile void __iomem *addr) +void iounmap(const volatile void __iomem *io_addr) { - if (addr > high_memory) - return vfree((void *) (PAGE_MASK & (unsigned long __force) addr)); + unsigned long addr = (unsigned long)io_addr & PAGE_MASK; + + if (is_vmalloc_addr((void *)addr)) + vunmap((void *)addr); } EXPORT_SYMBOL(iounmap); From c32c47aa364096124c9c69c1a44918433832562b Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 24 Sep 2019 17:01:31 +0200 Subject: [PATCH 251/572] parisc: Remove 32-bit DMA enforcement from sba_iommu This breaks booting from sata_sil24 with the recent DMA change. According to James Bottomley this was in to improve performance by kicking the device into 32 bit descriptors, which are usually more efficient, especially with older dual descriptor format cards like we have on parisc systems. Remove it for now to make DMA working again. Fixes: dcc02c19cc06 ("sata_sil24: use dma_set_mask_and_coherent") Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- drivers/parisc/sba_iommu.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index ed50502cc65a..de8e4e347249 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -678,14 +678,6 @@ static int sba_dma_supported( struct device *dev, u64 mask) return(0); } - /* Documentation/DMA-API-HOWTO.txt tells drivers to try 64-bit - * first, then fall back to 32-bit if that fails. - * We are just "encouraging" 32-bit DMA masks here since we can - * never allow IOMMU bypass unless we add special support for ZX1. - */ - if (mask > ~0U) - return 0; - ioc = GET_IOC(dev); if (!ioc) return 0; From 7a7c5e715e722c86d602c56a09e77f000364e263 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 9 Oct 2019 07:39:54 +0900 Subject: [PATCH 252/572] block: Fix elv_support_iosched() A BIO based request queue does not have a tag_set, which prevent testing for the flag BLK_MQ_F_NO_SCHED indicating that the queue does not require an elevator. This leads to an incorrect initialization of a default elevator in some cases such as BIO based null_blk (queue_mode == BIO) with zoned mode enabled as the default elevator in this case is mq-deadline instead of "none". Fix this by testing for a NULL queue mq_ops field which indicates that the queue is BIO based and should not have an elevator. Reported-by: Shinichiro Kawasaki Reviewed-by: Bob Liu Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/elevator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/elevator.c b/block/elevator.c index 5437059c9261..076ba7308e65 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -616,7 +616,8 @@ out: static inline bool elv_support_iosched(struct request_queue *q) { - if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)) + if (!q->mq_ops || + (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))) return false; return true; } From c0437642966fd32b827034af6f00eecd80b89325 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 9 Oct 2019 19:41:24 -0700 Subject: [PATCH 253/572] xtensa: clean up assembly arguments in uaccess macros Numeric assembly arguments are hard to understand and assembly code that uses them is hard to modify. Use named arguments in __check_align_*, __get_user_asm and __put_user_asm. Modify macro parameter names so that they don't affect argument names. Use '+' constraint for the [err] argument instead of having it as both input and output. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/uaccess.h | 42 +++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index f568c00392ec..da4d35445063 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -132,14 +132,14 @@ do { \ #define __check_align_1 "" #define __check_align_2 \ - " _bbci.l %3, 0, 1f \n" \ - " movi %0, %4 \n" \ + " _bbci.l %[addr], 0, 1f \n" \ + " movi %[err], %[efault] \n" \ " _j 2f \n" #define __check_align_4 \ - " _bbsi.l %3, 0, 0f \n" \ - " _bbci.l %3, 1, 1f \n" \ - "0: movi %0, %4 \n" \ + " _bbsi.l %[addr], 0, 0f \n" \ + " _bbci.l %[addr], 1, 1f \n" \ + "0: movi %[err], %[efault] \n" \ " _j 2f \n" @@ -151,24 +151,24 @@ do { \ * WARNING: If you modify this macro at all, verify that the * __check_align_* macros still work. */ -#define __put_user_asm(x, addr, err, align, insn, cb) \ +#define __put_user_asm(x_, addr_, err_, align, insn, cb)\ __asm__ __volatile__( \ __check_align_##align \ - "1: "insn" %2, %3, 0 \n" \ + "1: "insn" %[x], %[addr], 0 \n" \ "2: \n" \ " .section .fixup,\"ax\" \n" \ " .align 4 \n" \ " .literal_position \n" \ "5: \n" \ - " movi %1, 2b \n" \ - " movi %0, %4 \n" \ - " jx %1 \n" \ + " movi %[tmp], 2b \n" \ + " movi %[err], %[efault] \n" \ + " jx %[tmp] \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ " .long 1b, 5b \n" \ " .previous" \ - :"=r" (err), "=r" (cb) \ - :"r" ((int)(x)), "r" (addr), "i" (-EFAULT), "0" (err)) + :[err] "+r"(err_), [tmp] "=r"(cb) \ + :[x] "r"(x_), [addr] "r"(addr_), [efault] "i"(-EFAULT)) #define __get_user_nocheck(x, ptr, size) \ ({ \ @@ -217,25 +217,25 @@ do { \ * WARNING: If you modify this macro at all, verify that the * __check_align_* macros still work. */ -#define __get_user_asm(x, addr, err, align, insn, cb) \ -__asm__ __volatile__( \ +#define __get_user_asm(x_, addr_, err_, align, insn, cb) \ +__asm__ __volatile__( \ __check_align_##align \ - "1: "insn" %2, %3, 0 \n" \ + "1: "insn" %[x], %[addr], 0 \n" \ "2: \n" \ " .section .fixup,\"ax\" \n" \ " .align 4 \n" \ " .literal_position \n" \ "5: \n" \ - " movi %1, 2b \n" \ - " movi %2, 0 \n" \ - " movi %0, %4 \n" \ - " jx %1 \n" \ + " movi %[tmp], 2b \n" \ + " movi %[x], 0 \n" \ + " movi %[err], %[efault] \n" \ + " jx %[tmp] \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ " .long 1b, 5b \n" \ " .previous" \ - :"=r" (err), "=r" (cb), "=r" (x) \ - :"r" (addr), "i" (-EFAULT), "0" (err)) + :[err] "+r"(err_), [tmp] "=r"(cb), [x] "=r"(x_)\ + :[addr] "r"(addr_), [efault] "i"(-EFAULT)) /* From c9c63f3c7a9081e4768291514991d3208ae8a697 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 10 Oct 2019 20:55:35 -0700 Subject: [PATCH 254/572] xtensa: fix type conversion in __get_user_[no]check __get_user_[no]check uses temporary buffer of type long to store result of __get_user_size and do sign extension on it when necessary. This doesn't work correctly for 64-bit data. Fix it by moving temporary buffer/sign extension logic to __get_user_asm. Don't do assignment of __get_user_bad result to (x) as it may not always be integer-compatible now and issue warning even when it's going to be optimized. Instead do (x) = 0; and call __get_user_bad separately. Zero initialize __x in __get_user_asm and use '+' constraint for its assembly argument, so that its value is preserved in error cases. This may add at most 1 cycle to the fast path, but saves an instruction and two padding bytes in the fixup section for each use of this macro and works for both misaligned store and store exception. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/uaccess.h | 55 ++++++++++++++++--------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index da4d35445063..3f80386f1883 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -172,19 +172,19 @@ __asm__ __volatile__( \ #define __get_user_nocheck(x, ptr, size) \ ({ \ - long __gu_err, __gu_val; \ - __get_user_size(__gu_val, (ptr), (size), __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + long __gu_err; \ + __get_user_size((x), (ptr), (size), __gu_err); \ __gu_err; \ }) #define __get_user_check(x, ptr, size) \ ({ \ - long __gu_err = -EFAULT, __gu_val = 0; \ + long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) *__gu_addr = (ptr); \ - if (access_ok(__gu_addr, size)) \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + if (access_ok(__gu_addr, size)) \ + __get_user_size((x), __gu_addr, (size), __gu_err); \ + else \ + (x) = 0; \ __gu_err; \ }) @@ -208,7 +208,7 @@ do { \ } \ break; \ } \ - default: (x) = __get_user_bad(); \ + default: (x) = 0; __get_user_bad(); \ } \ } while (0) @@ -218,24 +218,27 @@ do { \ * __check_align_* macros still work. */ #define __get_user_asm(x_, addr_, err_, align, insn, cb) \ -__asm__ __volatile__( \ - __check_align_##align \ - "1: "insn" %[x], %[addr], 0 \n" \ - "2: \n" \ - " .section .fixup,\"ax\" \n" \ - " .align 4 \n" \ - " .literal_position \n" \ - "5: \n" \ - " movi %[tmp], 2b \n" \ - " movi %[x], 0 \n" \ - " movi %[err], %[efault] \n" \ - " jx %[tmp] \n" \ - " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " .long 1b, 5b \n" \ - " .previous" \ - :[err] "+r"(err_), [tmp] "=r"(cb), [x] "=r"(x_)\ - :[addr] "r"(addr_), [efault] "i"(-EFAULT)) +do { \ + u32 __x = 0; \ + __asm__ __volatile__( \ + __check_align_##align \ + "1: "insn" %[x], %[addr], 0 \n" \ + "2: \n" \ + " .section .fixup,\"ax\" \n" \ + " .align 4 \n" \ + " .literal_position \n" \ + "5: \n" \ + " movi %[tmp], 2b \n" \ + " movi %[err], %[efault] \n" \ + " jx %[tmp] \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " .long 1b, 5b \n" \ + " .previous" \ + :[err] "+r"(err_), [tmp] "=r"(cb), [x] "+r"(__x) \ + :[addr] "r"(addr_), [efault] "i"(-EFAULT)); \ + (x_) = (__force __typeof__(*(addr_)))__x; \ +} while (0) /* From 5556cfe8d994d5e7b4d50fd91597b8dc0b3a82fd Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Oct 2019 14:11:40 -0700 Subject: [PATCH 255/572] mm, page_owner: fix off-by-one error in __set_page_owner_handle() Patch series "followups to debug_pagealloc improvements through page_owner", v3. These are followups to [1] which made it to Linus meanwhile. Patches 1 and 3 are based on Kirill's review, patch 2 on KASAN request [2]. It would be nice if all of this made it to 5.4 with [1] already there (or at least Patch 1). This patch (of 3): As noted by Kirill, commit 7e2f2a0cd17c ("mm, page_owner: record page owner for each subpage") has introduced an off-by-one error in __set_page_owner_handle() when looking up page_ext for subpages. As a result, the head page page_owner info is set twice, while for the last tail page, it's not set at all. Fix this and also make the code more efficient by advancing the page_ext pointer we already have, instead of calling lookup_page_ext() for each subpage. Since the full size of struct page_ext is not known at compile time, we can't use a simple page_ext++ statement, so introduce a page_ext_next() inline function for that. Link: http://lkml.kernel.org/r/20190930122916.14969-2-vbabka@suse.cz Fixes: 7e2f2a0cd17c ("mm, page_owner: record page owner for each subpage") Signed-off-by: Vlastimil Babka Reported-by: Kirill A. Shutemov Reported-by: Miles Chen Acked-by: Kirill A. Shutemov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Walter Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_ext.h | 8 ++++++++ mm/page_ext.c | 23 +++++++++-------------- mm/page_owner.c | 15 +++++++-------- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 682fd465df06..5e856512bafb 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -36,6 +36,7 @@ struct page_ext { unsigned long flags; }; +extern unsigned long page_ext_size; extern void pgdat_page_ext_init(struct pglist_data *pgdat); #ifdef CONFIG_SPARSEMEM @@ -52,6 +53,13 @@ static inline void page_ext_init(void) struct page_ext *lookup_page_ext(const struct page *page); +static inline struct page_ext *page_ext_next(struct page_ext *curr) +{ + void *next = curr; + next += page_ext_size; + return next; +} + #else /* !CONFIG_PAGE_EXTENSION */ struct page_ext; diff --git a/mm/page_ext.c b/mm/page_ext.c index 5f5769c7db3b..4ade843ff588 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -67,8 +67,9 @@ static struct page_ext_operations *page_ext_ops[] = { #endif }; +unsigned long page_ext_size = sizeof(struct page_ext); + static unsigned long total_usage; -static unsigned long extra_mem; static bool __init invoke_need_callbacks(void) { @@ -78,9 +79,8 @@ static bool __init invoke_need_callbacks(void) for (i = 0; i < entries; i++) { if (page_ext_ops[i]->need && page_ext_ops[i]->need()) { - page_ext_ops[i]->offset = sizeof(struct page_ext) + - extra_mem; - extra_mem += page_ext_ops[i]->size; + page_ext_ops[i]->offset = page_ext_size; + page_ext_size += page_ext_ops[i]->size; need = true; } } @@ -99,14 +99,9 @@ static void __init invoke_init_callbacks(void) } } -static unsigned long get_entry_size(void) -{ - return sizeof(struct page_ext) + extra_mem; -} - static inline struct page_ext *get_entry(void *base, unsigned long index) { - return base + get_entry_size() * index; + return base + page_ext_size * index; } #if !defined(CONFIG_SPARSEMEM) @@ -156,7 +151,7 @@ static int __init alloc_node_page_ext(int nid) !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES)) nr_pages += MAX_ORDER_NR_PAGES; - table_size = get_entry_size() * nr_pages; + table_size = page_ext_size * nr_pages; base = memblock_alloc_try_nid( table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), @@ -234,7 +229,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid) if (section->page_ext) return 0; - table_size = get_entry_size() * PAGES_PER_SECTION; + table_size = page_ext_size * PAGES_PER_SECTION; base = alloc_page_ext(table_size, nid); /* @@ -254,7 +249,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid) * we need to apply a mask. */ pfn &= PAGE_SECTION_MASK; - section->page_ext = (void *)base - get_entry_size() * pfn; + section->page_ext = (void *)base - page_ext_size * pfn; total_usage += table_size; return 0; } @@ -267,7 +262,7 @@ static void free_page_ext(void *addr) struct page *page = virt_to_page(addr); size_t table_size; - table_size = get_entry_size() * PAGES_PER_SECTION; + table_size = page_ext_size * PAGES_PER_SECTION; BUG_ON(PageReserved(page)); kmemleak_free(addr); diff --git a/mm/page_owner.c b/mm/page_owner.c index dee931184788..d3cf5d336ccf 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -156,10 +156,10 @@ void __reset_page_owner(struct page *page, unsigned int order) handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); #endif + page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; for (i = 0; i < (1 << order); i++) { - page_ext = lookup_page_ext(page + i); - if (unlikely(!page_ext)) - continue; __clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags); #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled()) { @@ -167,6 +167,7 @@ void __reset_page_owner(struct page *page, unsigned int order) page_owner->free_handle = handle; } #endif + page_ext = page_ext_next(page_ext); } } @@ -186,7 +187,7 @@ static inline void __set_page_owner_handle(struct page *page, __set_bit(PAGE_EXT_OWNER, &page_ext->flags); __set_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags); - page_ext = lookup_page_ext(page + i); + page_ext = page_ext_next(page_ext); } } @@ -224,12 +225,10 @@ void __split_page_owner(struct page *page, unsigned int order) if (unlikely(!page_ext)) return; - page_owner = get_page_owner(page_ext); - page_owner->order = 0; - for (i = 1; i < (1 << order); i++) { - page_ext = lookup_page_ext(page + i); + for (i = 0; i < (1 << order); i++) { page_owner = get_page_owner(page_ext); page_owner->order = 0; + page_ext = page_ext_next(page_ext); } } From 0fe9a448a029a11d7211fcc2ebe9023d7fd31792 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Oct 2019 14:11:44 -0700 Subject: [PATCH 256/572] mm, page_owner: decouple freeing stack trace from debug_pagealloc Commit 8974558f49a6 ("mm, page_owner, debug_pagealloc: save and dump freeing stack trace") enhanced page_owner to also store freeing stack trace, when debug_pagealloc is also enabled. KASAN would also like to do this [1] to improve error reports to debug e.g. UAF issues. Kirill has suggested that the freeing stack trace saving should be also possible to be enabled separately from KASAN or debug_pagealloc, i.e. with an extra boot option. Qian argued that we have enough options already, and avoiding the extra overhead is not worth the complications in the case of a debugging option. Kirill noted that the extra stack handle in struct page_owner requires 0.1% of memory. This patch therefore enables free stack saving whenever page_owner is enabled, regardless of whether debug_pagealloc or KASAN is also enabled. KASAN kernels booted with page_owner=on will thus benefit from the improved error reports. [1] https://bugzilla.kernel.org/show_bug.cgi?id=203967 [vbabka@suse.cz: v3] Link: http://lkml.kernel.org/r/20191007091808.7096-3-vbabka@suse.cz Link: http://lkml.kernel.org/r/20190930122916.14969-3-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: Qian Cai Suggested-by: Dmitry Vyukov Suggested-by: Walter Wu Suggested-by: Andrey Ryabinin Suggested-by: Kirill A. Shutemov Suggested-by: Qian Cai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kasan.rst | 3 +++ mm/page_owner.c | 28 +++++++--------------------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index b72d07d70239..525296121d89 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -41,6 +41,9 @@ smaller binary while the latter is 1.1 - 2 times faster. Both KASAN modes work with both SLUB and SLAB memory allocators. For better bug detection and nicer reporting, enable CONFIG_STACKTRACE. +To augment reports with last allocation and freeing stack of the physical page, +it is recommended to enable also CONFIG_PAGE_OWNER and boot with page_owner=on. + To disable instrumentation for specific files or directories, add a line similar to the following to the respective kernel Makefile: diff --git a/mm/page_owner.c b/mm/page_owner.c index d3cf5d336ccf..de1916ac3e24 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -24,12 +24,10 @@ struct page_owner { short last_migrate_reason; gfp_t gfp_mask; depot_stack_handle_t handle; -#ifdef CONFIG_DEBUG_PAGEALLOC depot_stack_handle_t free_handle; -#endif }; -static bool page_owner_disabled = true; +static bool page_owner_enabled = false; DEFINE_STATIC_KEY_FALSE(page_owner_inited); static depot_stack_handle_t dummy_handle; @@ -44,7 +42,7 @@ static int __init early_page_owner_param(char *buf) return -EINVAL; if (strcmp(buf, "on") == 0) - page_owner_disabled = false; + page_owner_enabled = true; return 0; } @@ -52,10 +50,7 @@ early_param("page_owner", early_page_owner_param); static bool need_page_owner(void) { - if (page_owner_disabled) - return false; - - return true; + return page_owner_enabled; } static __always_inline depot_stack_handle_t create_dummy_stack(void) @@ -84,7 +79,7 @@ static noinline void register_early_stack(void) static void init_page_owner(void) { - if (page_owner_disabled) + if (!page_owner_enabled) return; register_dummy_stack(); @@ -148,25 +143,18 @@ void __reset_page_owner(struct page *page, unsigned int order) { int i; struct page_ext *page_ext; -#ifdef CONFIG_DEBUG_PAGEALLOC depot_stack_handle_t handle = 0; struct page_owner *page_owner; - if (debug_pagealloc_enabled()) - handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); -#endif + handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); page_ext = lookup_page_ext(page); if (unlikely(!page_ext)) return; for (i = 0; i < (1 << order); i++) { __clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags); -#ifdef CONFIG_DEBUG_PAGEALLOC - if (debug_pagealloc_enabled()) { - page_owner = get_page_owner(page_ext); - page_owner->free_handle = handle; - } -#endif + page_owner = get_page_owner(page_ext); + page_owner->free_handle = handle; page_ext = page_ext_next(page_ext); } } @@ -450,7 +438,6 @@ void __dump_page_owner(struct page *page) stack_trace_print(entries, nr_entries, 0); } -#ifdef CONFIG_DEBUG_PAGEALLOC handle = READ_ONCE(page_owner->free_handle); if (!handle) { pr_alert("page_owner free stack trace missing\n"); @@ -459,7 +446,6 @@ void __dump_page_owner(struct page *page) pr_alert("page last free stack trace:\n"); stack_trace_print(entries, nr_entries, 0); } -#endif if (page_owner->last_migrate_reason != -1) pr_alert("page has been migrated, last migrate reason: %s\n", From fdf3bf809162592b54c278b9b0e84f3e126f8844 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Oct 2019 14:11:47 -0700 Subject: [PATCH 257/572] mm, page_owner: rename flag indicating that page is allocated Commit 37389167a281 ("mm, page_owner: keep owner info when freeing the page") has introduced a flag PAGE_EXT_OWNER_ACTIVE to indicate that page is tracked as being allocated. Kirril suggested naming it PAGE_EXT_OWNER_ALLOCATED to make it more clear, as "active is somewhat loaded term for a page". Link: http://lkml.kernel.org/r/20190930122916.14969-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Suggested-by: Kirill A. Shutemov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Kirill A. Shutemov Cc: Walter Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_ext.h | 2 +- mm/page_owner.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 5e856512bafb..cfce186f0c4e 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -18,7 +18,7 @@ struct page_ext_operations { enum page_ext_flags { PAGE_EXT_OWNER, - PAGE_EXT_OWNER_ACTIVE, + PAGE_EXT_OWNER_ALLOCATED, #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) PAGE_EXT_YOUNG, PAGE_EXT_IDLE, diff --git a/mm/page_owner.c b/mm/page_owner.c index de1916ac3e24..e327bcd0380e 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -152,7 +152,7 @@ void __reset_page_owner(struct page *page, unsigned int order) if (unlikely(!page_ext)) return; for (i = 0; i < (1 << order); i++) { - __clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags); + __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); page_owner = get_page_owner(page_ext); page_owner->free_handle = handle; page_ext = page_ext_next(page_ext); @@ -173,7 +173,7 @@ static inline void __set_page_owner_handle(struct page *page, page_owner->gfp_mask = gfp_mask; page_owner->last_migrate_reason = -1; __set_bit(PAGE_EXT_OWNER, &page_ext->flags); - __set_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags); + __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); page_ext = page_ext_next(page_ext); } @@ -247,7 +247,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) * the new page, which will be freed. */ __set_bit(PAGE_EXT_OWNER, &new_ext->flags); - __set_bit(PAGE_EXT_OWNER_ACTIVE, &new_ext->flags); + __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags); } void pagetypeinfo_showmixedcount_print(struct seq_file *m, @@ -307,7 +307,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, if (unlikely(!page_ext)) continue; - if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags)) + if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) continue; page_owner = get_page_owner(page_ext); @@ -422,7 +422,7 @@ void __dump_page_owner(struct page *page) return; } - if (test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags)) + if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) pr_alert("page_owner tracks the page as allocated\n"); else pr_alert("page_owner tracks the page as freed\n"); @@ -512,7 +512,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) * Although we do have the info about past allocation of free * pages, it's not relevant for current memory usage. */ - if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags)) + if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) continue; page_owner = get_page_owner(page_ext); From e4f8e513c3d353c134ad4eef9fd0bba12406c7c8 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Mon, 14 Oct 2019 14:11:51 -0700 Subject: [PATCH 258/572] mm/slub: fix a deadlock in show_slab_objects() A long time ago we fixed a similar deadlock in show_slab_objects() [1]. However, it is apparently due to the commits like 01fb58bcba63 ("slab: remove synchronous synchronize_sched() from memcg cache deactivation path") and 03afc0e25f7f ("slab: get_online_mems for kmem_cache_{create,destroy,shrink}"), this kind of deadlock is back by just reading files in /sys/kernel/slab which will generate a lockdep splat below. Since the "mem_hotplug_lock" here is only to obtain a stable online node mask while racing with NUMA node hotplug, in the worst case, the results may me miscalculated while doing NUMA node hotplug, but they shall be corrected by later reads of the same files. WARNING: possible circular locking dependency detected ------------------------------------------------------ cat/5224 is trying to acquire lock: ffff900012ac3120 (mem_hotplug_lock.rw_sem){++++}, at: show_slab_objects+0x94/0x3a8 but task is already holding lock: b8ff009693eee398 (kn->count#45){++++}, at: kernfs_seq_start+0x44/0xf0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (kn->count#45){++++}: lock_acquire+0x31c/0x360 __kernfs_remove+0x290/0x490 kernfs_remove+0x30/0x44 sysfs_remove_dir+0x70/0x88 kobject_del+0x50/0xb0 sysfs_slab_unlink+0x2c/0x38 shutdown_cache+0xa0/0xf0 kmemcg_cache_shutdown_fn+0x1c/0x34 kmemcg_workfn+0x44/0x64 process_one_work+0x4f4/0x950 worker_thread+0x390/0x4bc kthread+0x1cc/0x1e8 ret_from_fork+0x10/0x18 -> #1 (slab_mutex){+.+.}: lock_acquire+0x31c/0x360 __mutex_lock_common+0x16c/0xf78 mutex_lock_nested+0x40/0x50 memcg_create_kmem_cache+0x38/0x16c memcg_kmem_cache_create_func+0x3c/0x70 process_one_work+0x4f4/0x950 worker_thread+0x390/0x4bc kthread+0x1cc/0x1e8 ret_from_fork+0x10/0x18 -> #0 (mem_hotplug_lock.rw_sem){++++}: validate_chain+0xd10/0x2bcc __lock_acquire+0x7f4/0xb8c lock_acquire+0x31c/0x360 get_online_mems+0x54/0x150 show_slab_objects+0x94/0x3a8 total_objects_show+0x28/0x34 slab_attr_show+0x38/0x54 sysfs_kf_seq_show+0x198/0x2d4 kernfs_seq_show+0xa4/0xcc seq_read+0x30c/0x8a8 kernfs_fop_read+0xa8/0x314 __vfs_read+0x88/0x20c vfs_read+0xd8/0x10c ksys_read+0xb0/0x120 __arm64_sys_read+0x54/0x88 el0_svc_handler+0x170/0x240 el0_svc+0x8/0xc other info that might help us debug this: Chain exists of: mem_hotplug_lock.rw_sem --> slab_mutex --> kn->count#45 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(kn->count#45); lock(slab_mutex); lock(kn->count#45); lock(mem_hotplug_lock.rw_sem); *** DEADLOCK *** 3 locks held by cat/5224: #0: 9eff00095b14b2a0 (&p->lock){+.+.}, at: seq_read+0x4c/0x8a8 #1: 0eff008997041480 (&of->mutex){+.+.}, at: kernfs_seq_start+0x34/0xf0 #2: b8ff009693eee398 (kn->count#45){++++}, at: kernfs_seq_start+0x44/0xf0 stack backtrace: Call trace: dump_backtrace+0x0/0x248 show_stack+0x20/0x2c dump_stack+0xd0/0x140 print_circular_bug+0x368/0x380 check_noncircular+0x248/0x250 validate_chain+0xd10/0x2bcc __lock_acquire+0x7f4/0xb8c lock_acquire+0x31c/0x360 get_online_mems+0x54/0x150 show_slab_objects+0x94/0x3a8 total_objects_show+0x28/0x34 slab_attr_show+0x38/0x54 sysfs_kf_seq_show+0x198/0x2d4 kernfs_seq_show+0xa4/0xcc seq_read+0x30c/0x8a8 kernfs_fop_read+0xa8/0x314 __vfs_read+0x88/0x20c vfs_read+0xd8/0x10c ksys_read+0xb0/0x120 __arm64_sys_read+0x54/0x88 el0_svc_handler+0x170/0x240 el0_svc+0x8/0xc I think it is important to mention that this doesn't expose the show_slab_objects to use-after-free. There is only a single path that might really race here and that is the slab hotplug notifier callback __kmem_cache_shrink (via slab_mem_going_offline_callback) but that path doesn't really destroy kmem_cache_node data structures. [1] http://lkml.iu.edu/hypermail/linux/kernel/1101.0/02850.html [akpm@linux-foundation.org: add comment explaining why we don't need mem_hotplug_lock] Link: http://lkml.kernel.org/r/1570192309-10132-1-git-send-email-cai@lca.pw Fixes: 01fb58bcba63 ("slab: remove synchronous synchronize_sched() from memcg cache deactivation path") Fixes: 03afc0e25f7f ("slab: get_online_mems for kmem_cache_{create,destroy,shrink}") Signed-off-by: Qian Cai Acked-by: Michal Hocko Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Tejun Heo Cc: Vladimir Davydov Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 3d63ae320d31..442f111d1e98 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4846,7 +4846,17 @@ static ssize_t show_slab_objects(struct kmem_cache *s, } } - get_online_mems(); + /* + * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex" + * already held which will conflict with an existing lock order: + * + * mem_hotplug_lock->slab_mutex->kernfs_mutex + * + * We don't really need mem_hotplug_lock (to hold off + * slab_mem_going_offline_callback) here because slab's memory hot + * unplug code doesn't destroy the kmem_cache->node[] data. + */ + #ifdef CONFIG_SLUB_DEBUG if (flags & SO_ALL) { struct kmem_cache_node *n; @@ -4887,7 +4897,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s, x += sprintf(buf + x, " N%d=%lu", node, nodes[node]); #endif - put_online_mems(); kfree(nodes); return x + sprintf(buf + x, "\n"); } From 3c52b0af059e11a063970aed1ad143b9284a79c7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 14 Oct 2019 14:11:54 -0700 Subject: [PATCH 259/572] lib/generic-radix-tree.c: add kmemleak annotations Kmemleak is falsely reporting a leak of the slab allocation in sctp_stream_init_ext(): BUG: memory leak unreferenced object 0xffff8881114f5d80 (size 96): comm "syz-executor934", pid 7160, jiffies 4294993058 (age 31.950s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000ce7a1326>] kmemleak_alloc_recursive include/linux/kmemleak.h:55 [inline] [<00000000ce7a1326>] slab_post_alloc_hook mm/slab.h:439 [inline] [<00000000ce7a1326>] slab_alloc mm/slab.c:3326 [inline] [<00000000ce7a1326>] kmem_cache_alloc_trace+0x13d/0x280 mm/slab.c:3553 [<000000007abb7ac9>] kmalloc include/linux/slab.h:547 [inline] [<000000007abb7ac9>] kzalloc include/linux/slab.h:742 [inline] [<000000007abb7ac9>] sctp_stream_init_ext+0x2b/0xa0 net/sctp/stream.c:157 [<0000000048ecb9c1>] sctp_sendmsg_to_asoc+0x946/0xa00 net/sctp/socket.c:1882 [<000000004483ca2b>] sctp_sendmsg+0x2a8/0x990 net/sctp/socket.c:2102 [...] But it's freed later. Kmemleak misses the allocation because its pointer is stored in the generic radix tree sctp_stream::out, and the generic radix tree uses raw pages which aren't tracked by kmemleak. Fix this by adding the kmemleak hooks to the generic radix tree code. Link: http://lkml.kernel.org/r/20191004065039.727564-1-ebiggers@kernel.org Signed-off-by: Eric Biggers Reported-by: Reviewed-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Reviewed-by: Catalin Marinas Cc: Kent Overstreet Cc: Vlad Yasevich Cc: Xin Long Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/generic-radix-tree.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c index ae25e2fa2187..f25eb111c051 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -2,6 +2,7 @@ #include #include #include +#include #define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *)) #define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY) @@ -75,6 +76,27 @@ void *__genradix_ptr(struct __genradix *radix, size_t offset) } EXPORT_SYMBOL(__genradix_ptr); +static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask) +{ + struct genradix_node *node; + + node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO); + + /* + * We're using pages (not slab allocations) directly for kernel data + * structures, so we need to explicitly inform kmemleak of them in order + * to avoid false positive memory leak reports. + */ + kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask); + return node; +} + +static inline void genradix_free_node(struct genradix_node *node) +{ + kmemleak_free(node); + free_page((unsigned long)node); +} + /* * Returns pointer to the specified byte @offset within @radix, allocating it if * necessary - newly allocated slots are always zeroed out: @@ -97,8 +119,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, break; if (!new_node) { - new_node = (void *) - __get_free_page(gfp_mask|__GFP_ZERO); + new_node = genradix_alloc_node(gfp_mask); if (!new_node) return NULL; } @@ -121,8 +142,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, n = READ_ONCE(*p); if (!n) { if (!new_node) { - new_node = (void *) - __get_free_page(gfp_mask|__GFP_ZERO); + new_node = genradix_alloc_node(gfp_mask); if (!new_node) return NULL; } @@ -133,7 +153,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, } if (new_node) - free_page((unsigned long) new_node); + genradix_free_node(new_node); return &n->data[offset]; } @@ -191,7 +211,7 @@ static void genradix_free_recurse(struct genradix_node *n, unsigned level) genradix_free_recurse(n->children[i], level - 1); } - free_page((unsigned long) n); + genradix_free_node(n); } int __genradix_prealloc(struct __genradix *radix, size_t size, From 0f181f9fbea8bc7ea2f7e13ae7f8c256b39e254c Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 14 Oct 2019 14:11:57 -0700 Subject: [PATCH 260/572] mm/slub.c: init_on_free=1 should wipe freelist ptr for bulk allocations slab_alloc_node() already zeroed out the freelist pointer if init_on_free was on. Thibaut Sautereau noticed that the same needs to be done for kmem_cache_alloc_bulk(), which performs the allocations separately. kmem_cache_alloc_bulk() is currently used in two places in the kernel, so this change is unlikely to have a major performance impact. SLAB doesn't require a similar change, as auto-initialization makes the allocator store the freelist pointers off-slab. Link: http://lkml.kernel.org/r/20191007091605.30530-1-glider@google.com Fixes: 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options") Signed-off-by: Alexander Potapenko Reported-by: Thibaut Sautereau Reported-by: Kees Cook Cc: Christoph Lameter Cc: Laura Abbott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 442f111d1e98..b25c807a111f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2671,6 +2671,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, return p; } +/* + * If the object has been wiped upon free, make sure it's fully initialized by + * zeroing out freelist pointer. + */ +static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, + void *obj) +{ + if (unlikely(slab_want_init_on_free(s)) && obj) + memset((void *)((char *)obj + s->offset), 0, sizeof(void *)); +} + /* * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) * have the fastpath folded into their functions. So no function call @@ -2759,12 +2770,8 @@ redo: prefetch_freepointer(s, next_object); stat(s, ALLOC_FASTPATH); } - /* - * If the object has been wiped upon free, make sure it's fully - * initialized by zeroing out freelist pointer. - */ - if (unlikely(slab_want_init_on_free(s)) && object) - memset(object + s->offset, 0, sizeof(void *)); + + maybe_wipe_obj_freeptr(s, object); if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) memset(object, 0, s->object_size); @@ -3178,10 +3185,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, goto error; c = this_cpu_ptr(s->cpu_slab); + maybe_wipe_obj_freeptr(s, p[i]); + continue; /* goto for-loop */ } c->freelist = get_freepointer(s, object); p[i] = object; + maybe_wipe_obj_freeptr(s, p[i]); } c->tid = next_tid(c->tid); local_irq_enable(); From 03a9349ac0e095dea6ef8b5b7b14f9c23e5fabe6 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 14 Oct 2019 14:12:00 -0700 Subject: [PATCH 261/572] lib/test_meminit: add a kmem_cache_alloc_bulk() test Make sure allocations from kmem_cache_alloc_bulk() and kmem_cache_free_bulk() are properly initialized. Link: http://lkml.kernel.org/r/20191007091605.30530-2-glider@google.com Signed-off-by: Alexander Potapenko Cc: Kees Cook Cc: Christoph Lameter Cc: Laura Abbott Cc: Thibaut Sautereau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_meminit.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lib/test_meminit.c b/lib/test_meminit.c index 9729f271d150..9742e5cb853a 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -297,6 +297,32 @@ out: return 1; } +static int __init do_kmem_cache_size_bulk(int size, int *total_failures) +{ + struct kmem_cache *c; + int i, iter, maxiter = 1024; + int num, bytes; + bool fail = false; + void *objects[10]; + + c = kmem_cache_create("test_cache", size, size, 0, NULL); + for (iter = 0; (iter < maxiter) && !fail; iter++) { + num = kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects), + objects); + for (i = 0; i < num; i++) { + bytes = count_nonzero_bytes(objects[i], size); + if (bytes) + fail = true; + fill_with_garbage(objects[i], size); + } + + if (num) + kmem_cache_free_bulk(c, num, objects); + } + *total_failures += fail; + return 1; +} + /* * Test kmem_cache allocation by creating caches of different sizes, with and * without constructors, with and without SLAB_TYPESAFE_BY_RCU. @@ -318,6 +344,7 @@ static int __init test_kmemcache(int *total_failures) num_tests += do_kmem_cache_size(size, ctor, rcu, zero, &failures); } + num_tests += do_kmem_cache_size_bulk(size, &failures); } REPORT_FAILURES_IN_FN(); *total_failures += failures; From 3f36d8669457605910cb7a40089b485949569c41 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 14 Oct 2019 14:12:04 -0700 Subject: [PATCH 262/572] mm, hugetlb: allow hugepage allocations to reclaim as needed Commit b39d0ee2632d ("mm, page_alloc: avoid expensive reclaim when compaction may not succeed") has chnaged the allocator to bail out from the allocator early to prevent from a potentially excessive memory reclaim. __GFP_RETRY_MAYFAIL is designed to retry the allocation, reclaim and compaction loop as long as there is a reasonable chance to make forward progress. Neither COMPACT_SKIPPED nor COMPACT_DEFERRED at the INIT_COMPACT_PRIORITY compaction attempt gives this feedback. The most obvious affected subsystem is hugetlbfs which allocates huge pages based on an admin request (or via admin configured overcommit). I have done a simple test which tries to allocate half of the memory for hugetlb pages while the memory is full of a clean page cache. This is not an unusual situation because we try to cache as much of the memory as possible and sysctl/sysfs interface to allocate huge pages is there for flexibility to allocate hugetlb pages at any time. System has 1GB of RAM and we are requesting 515MB worth of hugetlb pages after the memory is prefilled by a clean page cache: root@test1:~# cat hugetlb_test.sh set -x echo 0 > /proc/sys/vm/nr_hugepages echo 3 > /proc/sys/vm/drop_caches echo 1 > /proc/sys/vm/compact_memory dd if=/mnt/data/file-1G of=/dev/null bs=$((4<<10)) TS=$(date +%s) echo 256 > /proc/sys/vm/nr_hugepages cat /proc/sys/vm/nr_hugepages The results for 2 consecutive runs on clean 5.3 root@test1:~# sh hugetlb_test.sh + echo 0 + echo 3 + echo 1 + dd if=/mnt/data/file-1G of=/dev/null bs=4096 262144+0 records in 262144+0 records out 1073741824 bytes (1.1 GB) copied, 21.0694 s, 51.0 MB/s + date +%s + TS=1569905284 + echo 256 + cat /proc/sys/vm/nr_hugepages 256 root@test1:~# sh hugetlb_test.sh + echo 0 + echo 3 + echo 1 + dd if=/mnt/data/file-1G of=/dev/null bs=4096 262144+0 records in 262144+0 records out 1073741824 bytes (1.1 GB) copied, 21.7548 s, 49.4 MB/s + date +%s + TS=1569905311 + echo 256 + cat /proc/sys/vm/nr_hugepages 256 Now with b39d0ee2632d applied root@test1:~# sh hugetlb_test.sh + echo 0 + echo 3 + echo 1 + dd if=/mnt/data/file-1G of=/dev/null bs=4096 262144+0 records in 262144+0 records out 1073741824 bytes (1.1 GB) copied, 20.1815 s, 53.2 MB/s + date +%s + TS=1569905516 + echo 256 + cat /proc/sys/vm/nr_hugepages 11 root@test1:~# sh hugetlb_test.sh + echo 0 + echo 3 + echo 1 + dd if=/mnt/data/file-1G of=/dev/null bs=4096 262144+0 records in 262144+0 records out 1073741824 bytes (1.1 GB) copied, 21.9485 s, 48.9 MB/s + date +%s + TS=1569905541 + echo 256 + cat /proc/sys/vm/nr_hugepages 12 The success rate went down by factor of 20! Although hugetlb allocation requests might fail and it is reasonable to expect them to under extremely fragmented memory or when the memory is under a heavy pressure but the above situation is not that case. Fix the regression by reverting back to the previous behavior for __GFP_RETRY_MAYFAIL requests and disable the beail out heuristic for those requests. Mike said: : hugetlbfs allocations are commonly done via sysctl/sysfs shortly after : boot where this may not be as much of an issue. However, I am aware of at : least three use cases where allocations are made after the system has been : up and running for quite some time: : : - DB reconfiguration. If sysctl/sysfs fails to get required number of : huge pages, system is rebooted to perform allocation after boot. : : - VM provisioning. If unable get required number of huge pages, fall : back to base pages. : : - An application that does not preallocate pool, but rather allocates : pages at fault time for optimal NUMA locality. : : In all cases, I would expect b39d0ee2632d to cause regressions and : noticable behavior changes. : : My quick/limited testing in : https://lkml.kernel.org/r/3468b605-a3a9-6978-9699-57c52a90bd7e@oracle.com : was insufficient. It was also mentioned that if something like : b39d0ee2632d went forward, I would like exemptions for __GFP_RETRY_MAYFAIL : requests as in this patch. [mhocko@suse.com: reworded changelog] Link: http://lkml.kernel.org/r/20191007075548.12456-1-mhocko@kernel.org Fixes: b39d0ee2632d ("mm, page_alloc: avoid expensive reclaim when compaction may not succeed") Signed-off-by: David Rientjes Signed-off-by: Michal Hocko Reviewed-by: Mike Kravetz Acked-by: Vlastimil Babka Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c0b2e0306720..ecc3dbad606b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4473,12 +4473,14 @@ retry_cpuset: if (page) goto got_pg; - if (order >= pageblock_order && (gfp_mask & __GFP_IO)) { + if (order >= pageblock_order && (gfp_mask & __GFP_IO) && + !(gfp_mask & __GFP_RETRY_MAYFAIL)) { /* * If allocating entire pageblock(s) and compaction * failed because all zones are below low watermarks * or is prohibited because it recently failed at this - * order, fail immediately. + * order, fail immediately unless the allocator has + * requested compaction and reclaim retry. * * Reclaim is * - potentially very expensive because zones are far From a2e9a5afce080226edbf1882d63d99bf32070e9e Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Oct 2019 14:12:07 -0700 Subject: [PATCH 263/572] mm, compaction: fix wrong pfn handling in __reset_isolation_pfn() Florian and Dave reported [1] a NULL pointer dereference in __reset_isolation_pfn(). While the exact cause is unclear, staring at the code revealed two bugs, which might be related. One bug is that if zone starts in the middle of pageblock, block_page might correspond to different pfn than block_pfn, and then the pfn_valid_within() checks will check different pfn's than those accessed via struct page. This might result in acessing an unitialized page in CONFIG_HOLES_IN_ZONE configs. The other bug is that end_page refers to the first page of next pageblock and not last page of current pageblock. The online and valid check is then wrong and with sections, the while (page < end_page) loop might wander off actual struct page arrays. [1] https://lore.kernel.org/linux-xfs/87o8z1fvqu.fsf@mid.deneb.enyo.de/ Link: http://lkml.kernel.org/r/20191008152915.24704-1-vbabka@suse.cz Fixes: 6b0868c820ff ("mm/compaction.c: correct zone boundary handling when resetting pageblock skip hints") Signed-off-by: Vlastimil Babka Reported-by: Florian Weimer Reported-by: Dave Chinner Acked-by: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index ce08b39d85d4..672d3c78c6ab 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -270,14 +270,15 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, /* Ensure the start of the pageblock or zone is online and valid */ block_pfn = pageblock_start_pfn(pfn); - block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn)); + block_pfn = max(block_pfn, zone->zone_start_pfn); + block_page = pfn_to_online_page(block_pfn); if (block_page) { page = block_page; pfn = block_pfn; } /* Ensure the end of the pageblock or zone is online and valid */ - block_pfn += pageblock_nr_pages; + block_pfn = pageblock_end_pfn(pfn) - 1; block_pfn = min(block_pfn, zone_end_pfn(zone) - 1); end_page = pfn_to_online_page(block_pfn); if (!end_page) @@ -303,7 +304,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, page += (1 << PAGE_ALLOC_COSTLY_ORDER); pfn += (1 << PAGE_ALLOC_COSTLY_ORDER); - } while (page < end_page); + } while (page <= end_page); return false; } From c70d868f272befca09081190ae477c51fcbee5dd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 14 Oct 2019 14:12:11 -0700 Subject: [PATCH 264/572] fs/direct-io.c: fix kernel-doc warning Fix kernel-doc warning in fs/direct-io.c: fs/direct-io.c:258: warning: Excess function parameter 'offset' description in 'dio_complete' Also, don't mark this function as having kernel-doc notation since it is not exported. Link: http://lkml.kernel.org/r/97908511-4328-4a56-17fe-f43a1d7aa470@infradead.org Fixes: 6d544bb4d901 ("dio: centralize completion in dio_complete()") Signed-off-by: Randy Dunlap Cc: Zach Brown Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/direct-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index ae196784f487..9329ced91f1d 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -241,9 +241,8 @@ void dio_warn_stale_pagecache(struct file *filp) } } -/** +/* * dio_complete() - called when all DIO BIO I/O has been completed - * @offset: the byte offset in the file of the completed operation * * This drops i_dio_count, lets interested parties know that a DIO operation * has completed, and calculates the resulting return code for the operation. From 8e88bfba77eec6231c6a72076c28bbb7634a0e8c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 14 Oct 2019 14:12:14 -0700 Subject: [PATCH 265/572] fs/libfs.c: fix kernel-doc warning Fix kernel-doc warning in fs/libfs.c: fs/libfs.c:496: warning: Excess function parameter 'available' description in 'simple_write_end' Link: http://lkml.kernel.org/r/5fc9d70b-e377-0ec9-066a-970d49579041@infradead.org Fixes: ad2a722f196d ("libfs: Open code simple_commit_write into only user") Signed-off-by: Randy Dunlap Cc: Boaz Harrosh Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/libfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 540611b99b9a..1463b038ffc4 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -473,8 +473,7 @@ EXPORT_SYMBOL(simple_write_begin); /** * simple_write_end - .write_end helper for non-block-device FSes - * @available: See .write_end of address_space_operations - * @file: " + * @file: See .write_end of address_space_operations * @mapping: " * @pos: " * @len: " From b46ec1da5eb7d728938c8d115c4c291c7c71a98d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 14 Oct 2019 14:12:17 -0700 Subject: [PATCH 266/572] fs/fs-writeback.c: fix kernel-doc warning Fix kernel-doc warning in fs/fs-writeback.c: fs/fs-writeback.c:913: warning: Excess function parameter 'nr_pages' description in 'cgroup_writeback_by_id' Link: http://lkml.kernel.org/r/756645ac-0ce8-d47e-d30a-04d9e4923a4f@infradead.org Fixes: d62241c7a406 ("writeback, memcg: Implement cgroup_writeback_by_id()") Signed-off-by: Randy Dunlap Cc: Tejun Heo Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e88421d9a48d..8461a6322039 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -905,7 +905,7 @@ restart: * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs * @bdi_id: target bdi id * @memcg_id: target memcg css id - * @nr_pages: number of pages to write, 0 for best-effort dirty flushing + * @nr: number of pages to write, 0 for best-effort dirty flushing * @reason: reason why some writeback work initiated * @done: target wb_completion * From 2a7e582f429bd983816ad366cf0f1fcf87ec6ba6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 14 Oct 2019 14:12:20 -0700 Subject: [PATCH 267/572] bitmap.h: fix kernel-doc warning and typo Fix kernel-doc warning in : include/linux/bitmap.h:341: warning: Function parameter or member 'nbits' not described in 'bitmap_or_equal' Also fix small typo (bitnaps). Link: http://lkml.kernel.org/r/0729ea7a-2c0d-b2c5-7dd3-3629ee0803e2@infradead.org Fixes: b9fa6442f704 ("cpumask: Implement cpumask_or_equal()") Signed-off-by: Randy Dunlap Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 90528f12bdfa..29fc933df3bf 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -326,10 +326,11 @@ static inline int bitmap_equal(const unsigned long *src1, } /** - * bitmap_or_equal - Check whether the or of two bitnaps is equal to a third + * bitmap_or_equal - Check whether the or of two bitmaps is equal to a third * @src1: Pointer to bitmap 1 * @src2: Pointer to bitmap 2 will be or'ed with bitmap 1 * @src3: Pointer to bitmap 3. Compare to the result of *@src1 | *@src2 + * @nbits: number of bits in each of these bitmaps * * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise */ From 13bea898cd9153be91baa92de68ec32ff0b99362 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 14 Oct 2019 14:12:23 -0700 Subject: [PATCH 268/572] xarray.h: fix kernel-doc warning Fix (Sphinx) kernel-doc warning in : include/linux/xarray.h:232: WARNING: Unexpected indentation. Link: http://lkml.kernel.org/r/89ba2134-ce23-7c10-5ee1-ef83b35aa984@infradead.org Fixes: a3e4d3f97ec8 ("XArray: Redesign xa_alloc API") Signed-off-by: Randy Dunlap Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/xarray.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 5921599b6dc4..86eecbd98e84 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -230,8 +230,8 @@ static inline int xa_err(void *entry) * This structure is used either directly or via the XA_LIMIT() macro * to communicate the range of IDs that are valid for allocation. * Two common ranges are predefined for you: - * * xa_limit_32b - [0 - UINT_MAX] - * * xa_limit_31b - [0 - INT_MAX] + * * xa_limit_32b - [0 - UINT_MAX] + * * xa_limit_31b - [0 - INT_MAX] */ struct xa_limit { u32 max; From 87bf4f71af4fb162033fbd98b4252ec11a715dbe Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 14 Oct 2019 14:12:26 -0700 Subject: [PATCH 269/572] mm/slab.c: fix kernel-doc warning for __ksize() Fix kernel-doc warning in mm/slab.c: mm/slab.c:4215: warning: Function parameter or member 'objp' not described in '__ksize' Also add Return: documentation section for this function. Link: http://lkml.kernel.org/r/68c9fd7d-f09e-d376-e292-c7b2bdf1774d@infradead.org Fixes: 10d1f8cb3965 ("mm/slab: refactor common ksize KASAN logic into slab_common.c") Signed-off-by: Randy Dunlap Acked-by: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/slab.c b/mm/slab.c index 9df370558e5d..66e5d8032bae 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4206,9 +4206,12 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, /** * __ksize -- Uninstrumented ksize. + * @objp: pointer to the object * * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same * safety checks as ksize() with KASAN instrumentation enabled. + * + * Return: size of the actual memory used by @objp in bytes */ size_t __ksize(const void *objp) { From 3d7fed4ad8ccb691d217efbb0f934e6a4df5ef91 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Mon, 14 Oct 2019 14:12:29 -0700 Subject: [PATCH 270/572] mm/memory-failure: poison read receives SIGKILL instead of SIGBUS if mmaped more than once Mmap /dev/dax more than once, then read the poison location using address from one of the mappings. The other mappings due to not having the page mapped in will cause SIGKILLs delivered to the process. SIGKILL succeeds over SIGBUS, so user process loses the opportunity to handle the UE. Although one may add MAP_POPULATE to mmap(2) to work around the issue, MAP_POPULATE makes mapping 128GB of pmem several magnitudes slower, so isn't always an option. Details - ndctl inject-error --block=10 --count=1 namespace6.0 ./read_poison -x dax6.0 -o 5120 -m 2 mmaped address 0x7f5bb6600000 mmaped address 0x7f3cf3600000 doing local read at address 0x7f3cf3601400 Killed Console messages in instrumented kernel - mce: Uncorrected hardware memory error in user-access at edbe201400 Memory failure: tk->addr = 7f5bb6601000 Memory failure: address edbe201: call dev_pagemap_mapping_shift dev_pagemap_mapping_shift: page edbe201: no PUD Memory failure: tk->size_shift == 0 Memory failure: Unable to find user space address edbe201 in read_poison Memory failure: tk->addr = 7f3cf3601000 Memory failure: address edbe201: call dev_pagemap_mapping_shift Memory failure: tk->size_shift = 21 Memory failure: 0xedbe201: forcibly killing read_poison:22434 because of failure to unmap corrupted page => to deliver SIGKILL Memory failure: 0xedbe201: Killing read_poison:22434 due to hardware memory corruption => to deliver SIGBUS Link: http://lkml.kernel.org/r/1565112345-28754-3-git-send-email-jane.chu@oracle.com Signed-off-by: Jane Chu Suggested-by: Naoya Horiguchi Reviewed-by: Dan Williams Acked-by: Naoya Horiguchi Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 7ef849da8278..0ae72b6acee7 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -199,7 +199,6 @@ struct to_kill { struct task_struct *tsk; unsigned long addr; short size_shift; - char addr_valid; }; /* @@ -324,22 +323,27 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, } } tk->addr = page_address_in_vma(p, vma); - tk->addr_valid = 1; if (is_zone_device_page(p)) tk->size_shift = dev_pagemap_mapping_shift(p, vma); else tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT; /* - * In theory we don't have to kill when the page was - * munmaped. But it could be also a mremap. Since that's - * likely very rare kill anyways just out of paranoia, but use - * a SIGKILL because the error is not contained anymore. + * Send SIGKILL if "tk->addr == -EFAULT". Also, as + * "tk->size_shift" is always non-zero for !is_zone_device_page(), + * so "tk->size_shift == 0" effectively checks no mapping on + * ZONE_DEVICE. Indeed, when a devdax page is mmapped N times + * to a process' address space, it's possible not all N VMAs + * contain mappings for the page, but at least one VMA does. + * Only deliver SIGBUS with payload derived from the VMA that + * has a mapping for the page. */ - if (tk->addr == -EFAULT || tk->size_shift == 0) { + if (tk->addr == -EFAULT) { pr_info("Memory failure: Unable to find user space address %lx in %s\n", page_to_pfn(p), tsk->comm); - tk->addr_valid = 0; + } else if (tk->size_shift == 0) { + kfree(tk); + return; } get_task_struct(tsk); tk->tsk = tsk; @@ -366,7 +370,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, * make sure the process doesn't catch the * signal and then access the memory. Just kill it. */ - if (fail || tk->addr_valid == 0) { + if (fail || tk->addr == -EFAULT) { pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", pfn, tk->tsk->comm, tk->tsk->pid); do_send_sig_info(SIGKILL, SEND_SIG_PRIV, From 8b39da985194aac2998dd9e3a22d00b596cebf1e Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 14 Oct 2019 15:48:19 -0700 Subject: [PATCH 271/572] xtensa: drop EXPORT_SYMBOL for outs*/ins* Custom outs*/ins* implementations are long gone from the xtensa port, remove matching EXPORT_SYMBOLs. This fixes the following build warnings issued by modpost since commit 15bfc2348d54 ("modpost: check for static EXPORT_SYMBOL* functions"): WARNING: "insb" [vmlinux] is a static EXPORT_SYMBOL WARNING: "insw" [vmlinux] is a static EXPORT_SYMBOL WARNING: "insl" [vmlinux] is a static EXPORT_SYMBOL WARNING: "outsb" [vmlinux] is a static EXPORT_SYMBOL WARNING: "outsw" [vmlinux] is a static EXPORT_SYMBOL WARNING: "outsl" [vmlinux] is a static EXPORT_SYMBOL Cc: stable@vger.kernel.org Fixes: d38efc1f150f ("xtensa: adopt generic io routines") Signed-off-by: Max Filippov --- arch/xtensa/kernel/xtensa_ksyms.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 04f19de46700..4092555828b1 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -119,13 +119,6 @@ EXPORT_SYMBOL(__invalidate_icache_range); // FIXME EXPORT_SYMBOL(screen_info); #endif -EXPORT_SYMBOL(outsb); -EXPORT_SYMBOL(outsw); -EXPORT_SYMBOL(outsl); -EXPORT_SYMBOL(insb); -EXPORT_SYMBOL(insw); -EXPORT_SYMBOL(insl); - extern long common_exception_return; EXPORT_SYMBOL(common_exception_return); From 6658f87f219427ee776c498e07c878eb5cad1be2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Oct 2019 18:23:27 +0300 Subject: [PATCH 272/572] gpio: merrifield: Restore use of irq_base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During conversion to internal IRQ chip initialization the commit 8f86a5b4ad67 ("gpio: merrifield: Pass irqchip when adding gpiochip") lost the irq_base assignment. drivers/gpio/gpio-merrifield.c: In function ‘mrfld_gpio_probe’: drivers/gpio/gpio-merrifield.c:405:17: warning: variable ‘irq_base’ set but not used [-Wunused-but-set-variable] Assign the girq->first to it. Fixes: 8f86a5b4ad67 ("gpio: merrifield: Pass irqchip when adding gpiochip") Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpio-merrifield.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index 4f27ddfe1e2f..9596024c9161 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -455,6 +455,7 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id if (!girq->parents) return -ENOMEM; girq->parents[0] = pdev->irq; + girq->first = irq_base; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; From 9411e3aaa6342eb730daa55cf3377463a37d2aa7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Oct 2019 17:34:44 +0300 Subject: [PATCH 273/572] gpiolib: Initialize the hardware with a callback After changing the drivers to use GPIO core to add an IRQ chip it appears that some of them requires a hardware initialization before adding the IRQ chip. Add an optional callback ->init_hw() to allow that drivers to initialize hardware if needed. This change is a part of the fix NULL pointer dereference brought to the several drivers recently. Cc: Hans de Goede Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 22 +++++++++++++++++++++- include/linux/gpio/driver.h | 8 ++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 5833e4f380d6..104ed299d5ea 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -86,6 +86,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, struct lock_class_key *lock_key, struct lock_class_key *request_key); static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); +static int gpiochip_irqchip_init_hw(struct gpio_chip *gpiochip); static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip); static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip); @@ -1406,6 +1407,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, machine_gpiochip_add(chip); + ret = gpiochip_irqchip_init_hw(chip); + if (ret) + goto err_remove_acpi_chip; + ret = gpiochip_irqchip_init_valid_mask(chip); if (ret) goto err_remove_acpi_chip; @@ -1622,6 +1627,16 @@ static struct gpio_chip *find_chip_by_name(const char *name) * The following is irqchip helper code for gpiochips. */ +static int gpiochip_irqchip_init_hw(struct gpio_chip *gc) +{ + struct gpio_irq_chip *girq = &gc->irq; + + if (!girq->init_hw) + return 0; + + return girq->init_hw(gc); +} + static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc) { struct gpio_irq_chip *girq = &gc->irq; @@ -2446,8 +2461,13 @@ static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip, { return 0; } - static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) {} + +static inline int gpiochip_irqchip_init_hw(struct gpio_chip *gpiochip) +{ + return 0; +} + static inline int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip) { return 0; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index f8245d67f070..5dd9c982e2cb 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -201,6 +201,14 @@ struct gpio_irq_chip { */ bool threaded; + /** + * @init_hw: optional routine to initialize hardware before + * an IRQ chip will be added. This is quite useful when + * a particular driver wants to clear IRQ related registers + * in order to avoid undesired events. + */ + int (*init_hw)(struct gpio_chip *chip); + /** * @init_valid_mask: optional routine to initialize @valid_mask, to be * used if not all GPIO lines are valid interrupts. Sometimes some From a752fbb4b4644d09964e76c69ccd1acc700e907a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Oct 2019 18:23:27 +0300 Subject: [PATCH 274/572] gpio: intel-mid: Move hardware initialization to callback The driver wants to initialize related registers before IRQ chip will be added. That's why move it to a corresponding callback. It also fixes the NULL pointer dereference. Fixes: 8069e69a9792 ("gpio: intel-mid: Pass irqchip when adding gpiochip") Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpio-intel-mid.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-intel-mid.c b/drivers/gpio/gpio-intel-mid.c index 4d835f9089df..86a10c808ef6 100644 --- a/drivers/gpio/gpio-intel-mid.c +++ b/drivers/gpio/gpio-intel-mid.c @@ -293,8 +293,9 @@ static void intel_mid_irq_handler(struct irq_desc *desc) chip->irq_eoi(data); } -static void intel_mid_irq_init_hw(struct intel_mid_gpio *priv) +static int intel_mid_irq_init_hw(struct gpio_chip *chip) { + struct intel_mid_gpio *priv = gpiochip_get_data(chip); void __iomem *reg; unsigned base; @@ -309,6 +310,8 @@ static void intel_mid_irq_init_hw(struct intel_mid_gpio *priv) reg = gpio_reg(&priv->chip, base, GEDR); writel(~0, reg); } + + return 0; } static int __maybe_unused intel_gpio_runtime_idle(struct device *dev) @@ -372,6 +375,7 @@ static int intel_gpio_probe(struct pci_dev *pdev, girq = &priv->chip.irq; girq->chip = &intel_mid_irqchip; + girq->init_hw = intel_mid_irq_init_hw; girq->parent_handler = intel_mid_irq_handler; girq->num_parents = 1; girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents, @@ -384,9 +388,8 @@ static int intel_gpio_probe(struct pci_dev *pdev, girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_simple_irq; - intel_mid_irq_init_hw(priv); - pci_set_drvdata(pdev, priv); + retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv); if (retval) { dev_err(&pdev->dev, "gpiochip_add error %d\n", retval); From a33912061607979380e070af63a9adeef0a16246 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Oct 2019 18:58:45 +0300 Subject: [PATCH 275/572] gpio: lynxpoint: Move hardware initialization to callback The driver wants to initialize related registers before IRQ chip will be added. That's why move it to a corresponding callback. It also fixes the NULL pointer dereference. Fixes: 7b1e889436a1 ("gpio: lynxpoint: Pass irqchip when adding gpiochip") Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpio-lynxpoint.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-lynxpoint.c b/drivers/gpio/gpio-lynxpoint.c index 6bb9741ad036..082cd143f430 100644 --- a/drivers/gpio/gpio-lynxpoint.c +++ b/drivers/gpio/gpio-lynxpoint.c @@ -294,8 +294,9 @@ static struct irq_chip lp_irqchip = { .flags = IRQCHIP_SKIP_SET_WAKE, }; -static void lp_gpio_irq_init_hw(struct lp_gpio *lg) +static int lp_gpio_irq_init_hw(struct gpio_chip *chip) { + struct lp_gpio *lg = gpiochip_get_data(chip); unsigned long reg; unsigned base; @@ -307,6 +308,8 @@ static void lp_gpio_irq_init_hw(struct lp_gpio *lg) reg = lp_gpio_reg(&lg->chip, base, LP_INT_STAT); outl(0xffffffff, reg); } + + return 0; } static int lp_gpio_probe(struct platform_device *pdev) @@ -364,6 +367,7 @@ static int lp_gpio_probe(struct platform_device *pdev) girq = &gc->irq; girq->chip = &lp_irqchip; + girq->init_hw = lp_gpio_irq_init_hw; girq->parent_handler = lp_gpio_irq_handler; girq->num_parents = 1; girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents, @@ -374,8 +378,6 @@ static int lp_gpio_probe(struct platform_device *pdev) girq->parents[0] = (unsigned)irq_rc->start; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_simple_irq; - - lp_gpio_irq_init_hw(lg); } ret = devm_gpiochip_add_data(dev, gc, lg); From 4c87540940cbc7ddbe9674087919c605fd5c2ef1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Oct 2019 18:23:27 +0300 Subject: [PATCH 276/572] gpio: merrifield: Move hardware initialization to callback The driver wants to initialize related registers before IRQ chip will be added. That's why move it to a corresponding callback. It also fixes the NULL pointer dereference. Fixes: 8f86a5b4ad67 ("gpio: merrifield: Pass irqchip when adding gpiochip") Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpio-merrifield.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index 9596024c9161..2f1e9da81c1e 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -362,8 +362,9 @@ static void mrfld_irq_handler(struct irq_desc *desc) chained_irq_exit(irqchip, desc); } -static void mrfld_irq_init_hw(struct mrfld_gpio *priv) +static int mrfld_irq_init_hw(struct gpio_chip *chip) { + struct mrfld_gpio *priv = gpiochip_get_data(chip); void __iomem *reg; unsigned int base; @@ -375,6 +376,8 @@ static void mrfld_irq_init_hw(struct mrfld_gpio *priv) reg = gpio_reg(&priv->chip, base, GFER); writel(0, reg); } + + return 0; } static const char *mrfld_gpio_get_pinctrl_dev_name(struct mrfld_gpio *priv) @@ -447,6 +450,7 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id girq = &priv->chip.irq; girq->chip = &mrfld_irqchip; + girq->init_hw = mrfld_irq_init_hw; girq->parent_handler = mrfld_irq_handler; girq->num_parents = 1; girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents, @@ -459,8 +463,6 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; - mrfld_irq_init_hw(priv); - pci_set_drvdata(pdev, priv); retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv); if (retval) { From 75e99bf5ed8fa74bc80d693d8e0a24eeaa38202b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Oct 2019 18:59:11 +0300 Subject: [PATCH 277/572] gpio: lynxpoint: set default handler to be handle_bad_irq() We switch the default handler to be handle_bad_irq() instead of handle_simple_irq() (which was not correct anyway). Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpio-lynxpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-lynxpoint.c b/drivers/gpio/gpio-lynxpoint.c index 082cd143f430..e9e47c0d5be7 100644 --- a/drivers/gpio/gpio-lynxpoint.c +++ b/drivers/gpio/gpio-lynxpoint.c @@ -377,7 +377,7 @@ static int lp_gpio_probe(struct platform_device *pdev) return -ENOMEM; girq->parents[0] = (unsigned)irq_rc->start; girq->default_type = IRQ_TYPE_NONE; - girq->handler = handle_simple_irq; + girq->handler = handle_bad_irq; } ret = devm_gpiochip_add_data(dev, gc, lg); From 06b0d7fe7e5ff3ba4c7e265ef41135e8bcc232bb Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Fri, 11 Oct 2019 13:45:19 +0000 Subject: [PATCH 278/572] net: aquantia: temperature retrieval fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Chip temperature is a two byte word, colocated internally with cable length data. We do all readouts from HW memory by dwords, thus we should clear extra high bytes, otherwise temperature output gets weird as soon as we attach a cable to the NIC. Fixes: 8f8940118654 ("net: aquantia: add infrastructure to readout chip temperature") Tested-by: Holger Hoffstätte Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index da726489e3c8..7bc51f8d6f2f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -337,7 +337,7 @@ static int aq_fw2x_get_phy_temp(struct aq_hw_s *self, int *temp) /* Convert PHY temperature from 1/256 degree Celsius * to 1/1000 degree Celsius. */ - *temp = temp_res * 1000 / 256; + *temp = (temp_res & 0xFFFF) * 1000 / 256; return 0; } From ed4d81c4b3f28ccf624f11fd66f67aec5b58859c Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Fri, 11 Oct 2019 13:45:20 +0000 Subject: [PATCH 279/572] net: aquantia: when cleaning hw cache it should be toggled >From HW specification to correctly reset HW caches (this is a required workaround when stopping the device), register bit should actually be toggled. It was previosly always just set. Due to the way driver stops HW this never actually caused any issues, but it still may, so cleaning this up. Fixes: 7a1bb49461b1 ("net: aquantia: fix potential IOMMU fault after driver unbind") Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../aquantia/atlantic/hw_atl/hw_atl_b0.c | 16 ++++++++++++++-- .../aquantia/atlantic/hw_atl/hw_atl_llh.c | 17 +++++++++++++++-- .../aquantia/atlantic/hw_atl/hw_atl_llh.h | 7 +++++-- .../atlantic/hw_atl/hw_atl_llh_internal.h | 19 +++++++++++++++++++ 4 files changed, 53 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 30f7fc4c97ff..3459fadb7ddd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -968,14 +968,26 @@ static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self) static int hw_atl_b0_hw_stop(struct aq_hw_s *self) { + int err; + u32 val; + hw_atl_b0_hw_irq_disable(self, HW_ATL_B0_INT_MASK); /* Invalidate Descriptor Cache to prevent writing to the cached * descriptors and to the data pointer of those descriptors */ - hw_atl_rdm_rx_dma_desc_cache_init_set(self, 1); + hw_atl_rdm_rx_dma_desc_cache_init_tgl(self); - return aq_hw_err_from_flags(self); + err = aq_hw_err_from_flags(self); + + if (err) + goto err_exit; + + readx_poll_timeout_atomic(hw_atl_rdm_rx_dma_desc_cache_init_done_get, + self, val, val == 1, 1000U, 10000U); + +err_exit: + return err; } static int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c index 1149812ae463..6f340695e6bd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c @@ -606,12 +606,25 @@ void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw, u32 rx_flow_ctl_mode HW_ATL_RPB_RX_FC_MODE_SHIFT, rx_flow_ctl_mode); } -void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init) +void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw) { + u32 val; + + val = aq_hw_read_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR, + HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK, + HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT); + aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT, - init); + val ^ 1); +} + +u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw) +{ + return aq_hw_read_reg_bit(aq_hw, RDM_RX_DMA_DESC_CACHE_INIT_DONE_ADR, + RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSK, + RDM_RX_DMA_DESC_CACHE_INIT_DONE_SHIFT); } void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h index 0c37abbabca5..c3ee278c3747 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h @@ -313,8 +313,11 @@ void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_pkt_buff_size_per_tc, u32 buffer); -/* set rdm rx dma descriptor cache init */ -void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init); +/* toggle rdm rx dma descriptor cache init */ +void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw); + +/* get rdm rx dma descriptor cache init done */ +u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw); /* set rx xoff enable (per tc) */ void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h index c3febcdfa92e..35887ad89025 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h @@ -318,6 +318,25 @@ /* default value of bitfield rdm_desc_init_i */ #define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_DEFAULT 0x0 +/* rdm_desc_init_done_i bitfield definitions + * preprocessor definitions for the bitfield rdm_desc_init_done_i. + * port="pif_rdm_desc_init_done_i" + */ + +/* register address for bitfield rdm_desc_init_done_i */ +#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_ADR 0x00005a10 +/* bitmask for bitfield rdm_desc_init_done_i */ +#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSK 0x00000001U +/* inverted bitmask for bitfield rdm_desc_init_done_i */ +#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSKN 0xfffffffe +/* lower bit position of bitfield rdm_desc_init_done_i */ +#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_SHIFT 0U +/* width of bitfield rdm_desc_init_done_i */ +#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_WIDTH 1 +/* default value of bitfield rdm_desc_init_done_i */ +#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_DEFAULT 0x0 + + /* rx int_desc_wrb_en bitfield definitions * preprocessor definitions for the bitfield "int_desc_wrb_en". * port="pif_rdm_int_desc_wrb_en_i" From d08b9a0a3ebdf71b0aabe576c7dd48e57e80e0f0 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 11 Oct 2019 13:45:22 +0000 Subject: [PATCH 280/572] net: aquantia: do not pass lro session with invalid tcp checksum Individual descriptors on LRO TCP session should be checked for CRC errors. It was discovered that HW recalculates L4 checksums on LRO session and does not break it up on bad L4 csum. Thus, driver should aggregate HW LRO L4 statuses from all individual buffers of LRO session and drop packet if one of the buffers has bad L4 checksum. Fixes: f38f1ee8aeb2 ("net: aquantia: check rx csum for all packets in LRO session") Signed-off-by: Dmitry Bogdanov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 3901d7994ca1..76bdbe1596d6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -313,6 +313,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, break; buff->is_error |= buff_->is_error; + buff->is_cso_err |= buff_->is_cso_err; } while (!buff_->is_eop); @@ -320,7 +321,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, err = 0; goto err_exit; } - if (buff->is_error) { + if (buff->is_error || buff->is_cso_err) { buff_ = buff; do { next_ = buff_->next, From 9f051db566da1e8110659ab4ab188af1c2510bb4 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 11 Oct 2019 13:45:23 +0000 Subject: [PATCH 281/572] net: aquantia: correctly handle macvlan and multicast coexistence macvlan and multicast handling is now mixed up. The explicit issue is that macvlan interface gets broken (no traffic) after clearing MULTICAST flag on the real interface. We now do separate logic and consider both ALLMULTI and MULTICAST flags on the device. Fixes: 11ba961c9161 ("net: aquantia: Fix IFF_ALLMULTI flag functionality") Signed-off-by: Dmitry Bogdanov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/aq_main.c | 4 +-- .../net/ethernet/aquantia/atlantic/aq_nic.c | 32 +++++++++---------- .../aquantia/atlantic/hw_atl/hw_atl_b0.c | 7 ++-- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index b4a0fb281e69..bb65dd39f847 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -194,9 +194,7 @@ static void aq_ndev_set_multicast_settings(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); - aq_nic_set_packet_filter(aq_nic, ndev->flags); - - aq_nic_set_multicast_list(aq_nic, ndev); + (void)aq_nic_set_multicast_list(aq_nic, ndev); } static int aq_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 8f66e7817811..2a18439b36fb 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -631,9 +631,12 @@ err_exit: int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev) { - unsigned int packet_filter = self->packet_filter; + const struct aq_hw_ops *hw_ops = self->aq_hw_ops; + struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; + unsigned int packet_filter = ndev->flags; struct netdev_hw_addr *ha = NULL; unsigned int i = 0U; + int err = 0; self->mc_list.count = 0; if (netdev_uc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) { @@ -641,29 +644,26 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev) } else { netdev_for_each_uc_addr(ha, ndev) { ether_addr_copy(self->mc_list.ar[i++], ha->addr); - - if (i >= AQ_HW_MULTICAST_ADDRESS_MAX) - break; } } - if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) { - packet_filter |= IFF_ALLMULTI; - } else { - netdev_for_each_mc_addr(ha, ndev) { - ether_addr_copy(self->mc_list.ar[i++], ha->addr); - - if (i >= AQ_HW_MULTICAST_ADDRESS_MAX) - break; + cfg->is_mc_list_enabled = !!(packet_filter & IFF_MULTICAST); + if (cfg->is_mc_list_enabled) { + if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) { + packet_filter |= IFF_ALLMULTI; + } else { + netdev_for_each_mc_addr(ha, ndev) { + ether_addr_copy(self->mc_list.ar[i++], + ha->addr); + } } } if (i > 0 && i <= AQ_HW_MULTICAST_ADDRESS_MAX) { - packet_filter |= IFF_MULTICAST; self->mc_list.count = i; - self->aq_hw_ops->hw_multicast_list_set(self->aq_hw, - self->mc_list.ar, - self->mc_list.count); + err = hw_ops->hw_multicast_list_set(self->aq_hw, + self->mc_list.ar, + self->mc_list.count); } return aq_nic_set_packet_filter(self, packet_filter); } diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 3459fadb7ddd..2ad3fa6316ce 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -818,14 +818,15 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, cfg->is_vlan_force_promisc); hw_atl_rpfl2multicast_flr_en_set(self, - IS_FILTER_ENABLED(IFF_ALLMULTI), 0); + IS_FILTER_ENABLED(IFF_ALLMULTI) && + IS_FILTER_ENABLED(IFF_MULTICAST), 0); hw_atl_rpfl2_accept_all_mc_packets_set(self, - IS_FILTER_ENABLED(IFF_ALLMULTI)); + IS_FILTER_ENABLED(IFF_ALLMULTI) && + IS_FILTER_ENABLED(IFF_MULTICAST)); hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST)); - cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST); for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i) hw_atl_rpfl2_uc_flr_en_set(self, From ec52c7134b1fcef0edfc56d55072fd4f261ef198 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Mon, 14 Oct 2019 11:21:13 +0100 Subject: [PATCH 282/572] arm64: cpufeature: Treat ID_AA64ZFR0_EL1 as RAZ when SVE is not enabled If CONFIG_ARM64_SVE=n then we fail to report ID_AA64ZFR0_EL1 as 0 when read by userspace, despite being required by the architecture. Although this is theoretically a change in ABI, userspace will first check for the presence of SVE via the HWCAP or the ID_AA64PFR0_EL1.SVE field before probing the ID_AA64ZFR0_EL1 register. Given that these are reported correctly for this configuration, we can safely tighten up the current behaviour. Ensure ID_AA64ZFR0_EL1 is treated as RAZ when CONFIG_ARM64_SVE=n. Signed-off-by: Julien Grall Reviewed-by: Suzuki K Poulose Reviewed-by: Mark Rutland Reviewed-by: Dave Martin Fixes: 06a916feca2b ("arm64: Expose SVE2 features for userspace") Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index cabebf1a7976..80f459ad0190 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -176,11 +176,16 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0), ARM64_FTR_END, }; From 8c551f919a73c1dfa690a70a691be1da394145e8 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 14 Oct 2019 10:48:24 -0400 Subject: [PATCH 283/572] arm64: hibernate: check pgd table allocation There is a bug in create_safe_exec_page(), when page table is allocated it is not checked that table is allocated successfully: But it is dereferenced in: pgd_none(READ_ONCE(*pgdp)). Check that allocation was successful. Fixes: 82869ac57b5d ("arm64: kernel: Add support for hibernate/suspend-to-disk") Reviewed-by: James Morse Signed-off-by: Pavel Tatashin Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index e0a7fce0e01c..a96b2921d22c 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -201,6 +201,7 @@ static int create_safe_exec_page(void *src_start, size_t length, gfp_t mask) { int rc = 0; + pgd_t *trans_pgd; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; @@ -215,7 +216,13 @@ static int create_safe_exec_page(void *src_start, size_t length, memcpy((void *)dst, src_start, length); __flush_icache_range(dst, dst + length); - pgdp = pgd_offset_raw(allocator(mask), dst_addr); + trans_pgd = allocator(mask); + if (!trans_pgd) { + rc = -ENOMEM; + goto out; + } + + pgdp = pgd_offset_raw(trans_pgd, dst_addr); if (pgd_none(READ_ONCE(*pgdp))) { pudp = allocator(mask); if (!pudp) { From 12ade69c1eb9958b13374edf5ef742ea20ccffde Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 27 Sep 2019 13:53:43 +0200 Subject: [PATCH 284/572] KVM: PPC: Book3S HV: XIVE: Ensure VP isn't already in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Connecting a vCPU to a XIVE KVM device means establishing a 1:1 association between a vCPU id and the offset (VP id) of a VP structure within a fixed size block of VPs. We currently try to enforce the 1:1 relationship by checking that a vCPU with the same id isn't already connected. This is good but unfortunately not enough because we don't map VP ids to raw vCPU ids but to packed vCPU ids, and the packing function kvmppc_pack_vcpu_id() isn't bijective by design. We got away with it because QEMU passes vCPU ids that fit well in the packing pattern. But nothing prevents userspace to come up with a forged vCPU id resulting in a packed id collision which causes the KVM device to associate two vCPUs to the same VP. This greatly confuses the irq layer and ultimately crashes the kernel, as shown below. Example: a guest with 1 guest thread per core, a core stride of 8 and 300 vCPUs has vCPU ids 0,8,16...2392. If QEMU is patched to inject at some point an invalid vCPU id 348, which is the packed version of itself and 2392, we get: genirq: Flags mismatch irq 199. 00010000 (kvm-2-2392) vs. 00010000 (kvm-2-348) CPU: 24 PID: 88176 Comm: qemu-system-ppc Not tainted 5.3.0-xive-nr-servers-5.3-gku+ #38 Call Trace: [c000003f7f9937e0] [c000000000c0110c] dump_stack+0xb0/0xf4 (unreliable) [c000003f7f993820] [c0000000001cb480] __setup_irq+0xa70/0xad0 [c000003f7f9938d0] [c0000000001cb75c] request_threaded_irq+0x13c/0x260 [c000003f7f993940] [c00800000d44e7ac] kvmppc_xive_attach_escalation+0x104/0x270 [kvm] [c000003f7f9939d0] [c00800000d45013c] kvmppc_xive_connect_vcpu+0x424/0x620 [kvm] [c000003f7f993ac0] [c00800000d444428] kvm_arch_vcpu_ioctl+0x260/0x448 [kvm] [c000003f7f993b90] [c00800000d43593c] kvm_vcpu_ioctl+0x154/0x7c8 [kvm] [c000003f7f993d00] [c0000000004840f0] do_vfs_ioctl+0xe0/0xc30 [c000003f7f993db0] [c000000000484d44] ksys_ioctl+0x104/0x120 [c000003f7f993e00] [c000000000484d88] sys_ioctl+0x28/0x80 [c000003f7f993e20] [c00000000000b278] system_call+0x5c/0x68 xive-kvm: Failed to request escalation interrupt for queue 0 of VCPU 2392 ------------[ cut here ]------------ remove_proc_entry: removing non-empty directory 'irq/199', leaking at least 'kvm-2-348' WARNING: CPU: 24 PID: 88176 at /home/greg/Work/linux/kernel-kvm-ppc/fs/proc/generic.c:684 remove_proc_entry+0x1ec/0x200 Modules linked in: kvm_hv kvm dm_mod vhost_net vhost tap xt_CHECKSUM iptable_mangle xt_MASQUERADE iptable_nat nf_nat xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter squashfs loop fuse i2c_dev sg ofpart ocxl powernv_flash at24 xts mtd uio_pdrv_genirq vmx_crypto opal_prd ipmi_powernv uio ipmi_devintf ipmi_msghandler ibmpowernv ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables ext4 mbcache jbd2 raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq libcrc32c raid1 raid0 linear sd_mod ast i2c_algo_bit drm_vram_helper ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm ahci libahci libata tg3 drm_panel_orientation_quirks [last unloaded: kvm] CPU: 24 PID: 88176 Comm: qemu-system-ppc Not tainted 5.3.0-xive-nr-servers-5.3-gku+ #38 NIP: c00000000053b0cc LR: c00000000053b0c8 CTR: c0000000000ba3b0 REGS: c000003f7f9934b0 TRAP: 0700 Not tainted (5.3.0-xive-nr-servers-5.3-gku+) MSR: 9000000000029033 CR: 48228222 XER: 20040000 CFAR: c000000000131a50 IRQMASK: 0 GPR00: c00000000053b0c8 c000003f7f993740 c0000000015ec500 0000000000000057 GPR04: 0000000000000001 0000000000000000 000049fb98484262 0000000000001bcf GPR08: 0000000000000007 0000000000000007 0000000000000001 9000000000001033 GPR12: 0000000000008000 c000003ffffeb800 0000000000000000 000000012f4ce5a1 GPR16: 000000012ef5a0c8 0000000000000000 000000012f113bb0 0000000000000000 GPR20: 000000012f45d918 c000003f863758b0 c000003f86375870 0000000000000006 GPR24: c000003f86375a30 0000000000000007 c0002039373d9020 c0000000014c4a48 GPR28: 0000000000000001 c000003fe62a4f6b c00020394b2e9fab c000003fe62a4ec0 NIP [c00000000053b0cc] remove_proc_entry+0x1ec/0x200 LR [c00000000053b0c8] remove_proc_entry+0x1e8/0x200 Call Trace: [c000003f7f993740] [c00000000053b0c8] remove_proc_entry+0x1e8/0x200 (unreliable) [c000003f7f9937e0] [c0000000001d3654] unregister_irq_proc+0x114/0x150 [c000003f7f993880] [c0000000001c6284] free_desc+0x54/0xb0 [c000003f7f9938c0] [c0000000001c65ec] irq_free_descs+0xac/0x100 [c000003f7f993910] [c0000000001d1ff8] irq_dispose_mapping+0x68/0x80 [c000003f7f993940] [c00800000d44e8a4] kvmppc_xive_attach_escalation+0x1fc/0x270 [kvm] [c000003f7f9939d0] [c00800000d45013c] kvmppc_xive_connect_vcpu+0x424/0x620 [kvm] [c000003f7f993ac0] [c00800000d444428] kvm_arch_vcpu_ioctl+0x260/0x448 [kvm] [c000003f7f993b90] [c00800000d43593c] kvm_vcpu_ioctl+0x154/0x7c8 [kvm] [c000003f7f993d00] [c0000000004840f0] do_vfs_ioctl+0xe0/0xc30 [c000003f7f993db0] [c000000000484d44] ksys_ioctl+0x104/0x120 [c000003f7f993e00] [c000000000484d88] sys_ioctl+0x28/0x80 [c000003f7f993e20] [c00000000000b278] system_call+0x5c/0x68 Instruction dump: 2c230000 41820008 3923ff78 e8e900a0 3c82ff69 3c62ff8d 7fa6eb78 7fc5f378 3884f080 3863b948 4bbf6925 60000000 <0fe00000> 4bffff7c fba10088 4bbf6e41 ---[ end trace b925b67a74a1d8d1 ]--- BUG: Kernel NULL pointer dereference at 0x00000010 Faulting instruction address: 0xc00800000d44fc04 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: kvm_hv kvm dm_mod vhost_net vhost tap xt_CHECKSUM iptable_mangle xt_MASQUERADE iptable_nat nf_nat xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter squashfs loop fuse i2c_dev sg ofpart ocxl powernv_flash at24 xts mtd uio_pdrv_genirq vmx_crypto opal_prd ipmi_powernv uio ipmi_devintf ipmi_msghandler ibmpowernv ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables ext4 mbcache jbd2 raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq libcrc32c raid1 raid0 linear sd_mod ast i2c_algo_bit drm_vram_helper ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm ahci libahci libata tg3 drm_panel_orientation_quirks [last unloaded: kvm] CPU: 24 PID: 88176 Comm: qemu-system-ppc Tainted: G W 5.3.0-xive-nr-servers-5.3-gku+ #38 NIP: c00800000d44fc04 LR: c00800000d44fc00 CTR: c0000000001cd970 REGS: c000003f7f9938e0 TRAP: 0300 Tainted: G W (5.3.0-xive-nr-servers-5.3-gku+) MSR: 9000000000009033 CR: 24228882 XER: 20040000 CFAR: c0000000001cd9ac DAR: 0000000000000010 DSISR: 40000000 IRQMASK: 0 GPR00: c00800000d44fc00 c000003f7f993b70 c00800000d468300 0000000000000000 GPR04: 00000000000000c7 0000000000000000 0000000000000000 c000003ffacd06d8 GPR08: 0000000000000000 c000003ffacd0738 0000000000000000 fffffffffffffffd GPR12: 0000000000000040 c000003ffffeb800 0000000000000000 000000012f4ce5a1 GPR16: 000000012ef5a0c8 0000000000000000 000000012f113bb0 0000000000000000 GPR20: 000000012f45d918 00007ffffe0d9a80 000000012f4f5df0 000000012ef8c9f8 GPR24: 0000000000000001 0000000000000000 c000003fe4501ed0 c000003f8b1d0000 GPR28: c0000033314689c0 c000003fe4501c00 c000003fe4501e70 c000003fe4501e90 NIP [c00800000d44fc04] kvmppc_xive_cleanup_vcpu+0xfc/0x210 [kvm] LR [c00800000d44fc00] kvmppc_xive_cleanup_vcpu+0xf8/0x210 [kvm] Call Trace: [c000003f7f993b70] [c00800000d44fc00] kvmppc_xive_cleanup_vcpu+0xf8/0x210 [kvm] (unreliable) [c000003f7f993bd0] [c00800000d450bd4] kvmppc_xive_release+0xdc/0x1b0 [kvm] [c000003f7f993c30] [c00800000d436a98] kvm_device_release+0xb0/0x110 [kvm] [c000003f7f993c70] [c00000000046730c] __fput+0xec/0x320 [c000003f7f993cd0] [c000000000164ae0] task_work_run+0x150/0x1c0 [c000003f7f993d30] [c000000000025034] do_notify_resume+0x304/0x440 [c000003f7f993e20] [c00000000000dcc4] ret_from_except_lite+0x70/0x74 Instruction dump: 3bff0008 7fbfd040 419e0054 847e0004 2fa30000 419effec e93d0000 8929203c 2f890000 419effb8 4800821d e8410018 e9490008 9b2a0039 7c0004ac ---[ end trace b925b67a74a1d8d2 ]--- Kernel panic - not syncing: Fatal exception This affects both XIVE and XICS-on-XIVE devices since the beginning. Check the VP id instead of the vCPU id when a new vCPU is connected. The allocation of the XIVE CPU structure in kvmppc_xive_connect_vcpu() is moved after the check to avoid the need for rollback. Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Greg Kurz Reviewed-by: Cédric Le Goater Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive.c | 24 ++++++++++++++++-------- arch/powerpc/kvm/book3s_xive.h | 12 ++++++++++++ arch/powerpc/kvm/book3s_xive_native.c | 6 ++++-- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 591bfb4bfd0f..a3f9c665bb5b 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1217,6 +1217,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, struct kvmppc_xive *xive = dev->private; struct kvmppc_xive_vcpu *xc; int i, r = -EBUSY; + u32 vp_id; pr_devel("connect_vcpu(cpu=%d)\n", cpu); @@ -1228,25 +1229,32 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, return -EPERM; if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) return -EBUSY; - if (kvmppc_xive_find_server(vcpu->kvm, cpu)) { - pr_devel("Duplicate !\n"); - return -EEXIST; - } if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { pr_devel("Out of bounds !\n"); return -EINVAL; } - xc = kzalloc(sizeof(*xc), GFP_KERNEL); - if (!xc) - return -ENOMEM; /* We need to synchronize with queue provisioning */ mutex_lock(&xive->lock); + + vp_id = kvmppc_xive_vp(xive, cpu); + if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { + pr_devel("Duplicate !\n"); + r = -EEXIST; + goto bail; + } + + xc = kzalloc(sizeof(*xc), GFP_KERNEL); + if (!xc) { + r = -ENOMEM; + goto bail; + } + vcpu->arch.xive_vcpu = xc; xc->xive = xive; xc->vcpu = vcpu; xc->server_num = cpu; - xc->vp_id = kvmppc_xive_vp(xive, cpu); + xc->vp_id = vp_id; xc->mfrr = 0xff; xc->valid = true; diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 955b820ffd6d..fe3ed50e0818 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -220,6 +220,18 @@ static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); } +static inline bool kvmppc_xive_vp_in_use(struct kvm *kvm, u32 vp_id) +{ + struct kvm_vcpu *vcpu = NULL; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.xive_vcpu && vp_id == vcpu->arch.xive_vcpu->vp_id) + return true; + } + return false; +} + /* * Mapping between guest priorities and host priorities * is as follow. diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 248c1ea9e788..78b906ffa0d2 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -106,6 +106,7 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, struct kvmppc_xive *xive = dev->private; struct kvmppc_xive_vcpu *xc = NULL; int rc; + u32 vp_id; pr_devel("native_connect_vcpu(server=%d)\n", server_num); @@ -124,7 +125,8 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, mutex_lock(&xive->lock); - if (kvmppc_xive_find_server(vcpu->kvm, server_num)) { + vp_id = kvmppc_xive_vp(xive, server_num); + if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { pr_devel("Duplicate !\n"); rc = -EEXIST; goto bail; @@ -141,7 +143,7 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, xc->vcpu = vcpu; xc->server_num = server_num; - xc->vp_id = kvmppc_xive_vp(xive, server_num); + xc->vp_id = vp_id; xc->valid = true; vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; From e211288b72f15259da86eed6eca680758dbe9e74 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Thu, 10 Oct 2019 12:33:05 +0000 Subject: [PATCH 285/572] x86/hyperv: Make vapic support x2apic mode Now that there's Hyper-V IOMMU driver, Linux can switch to x2apic mode when supported by the vcpus. However, the apic access functions for Hyper-V enlightened apic assume xapic mode only. As a result, Linux fails to bring up secondary cpus when run as a guest in QEMU/KVM with both hv_apic and x2apic enabled. According to Michael Kelley, when in x2apic mode, the Hyper-V synthetic apic MSRs behave exactly the same as the corresponding architectural x2apic MSRs, so there's no need to override the apic accessors. The only exception is hv_apic_eoi_write, which benefits from lazy EOI when available; however, its implementation works for both xapic and x2apic modes. Fixes: 29217a474683 ("iommu/hyper-v: Add Hyper-V stub IOMMU driver") Fixes: 6b48cb5f8347 ("X86/Hyper-V: Enlighten APIC access") Suggested-by: Michael Kelley Signed-off-by: Roman Kagan Signed-off-by: Thomas Gleixner Reviewed-by: Vitaly Kuznetsov Reviewed-by: Michael Kelley Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20191010123258.16919-1-rkagan@virtuozzo.com --- arch/x86/hyperv/hv_apic.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 5c056b8aebef..e01078e93dd3 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -260,11 +260,21 @@ void __init hv_apic_init(void) } if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) { - pr_info("Hyper-V: Using MSR based APIC access\n"); + pr_info("Hyper-V: Using enlightened APIC (%s mode)", + x2apic_enabled() ? "x2apic" : "xapic"); + /* + * With x2apic, architectural x2apic MSRs are equivalent to the + * respective synthetic MSRs, so there's no need to override + * the apic accessors. The only exception is + * hv_apic_eoi_write, because it benefits from lazy EOI when + * available, but it works for both xapic and x2apic modes. + */ apic_set_eoi_write(hv_apic_eoi_write); - apic->read = hv_apic_read; - apic->write = hv_apic_write; - apic->icr_write = hv_apic_icr_write; - apic->icr_read = hv_apic_icr_read; + if (!x2apic_enabled()) { + apic->read = hv_apic_read; + apic->write = hv_apic_write; + apic->icr_write = hv_apic_icr_write; + apic->icr_read = hv_apic_icr_read; + } } } From 7a22e03b0c02988e91003c505b34d752a51de344 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Oct 2019 13:50:19 -0700 Subject: [PATCH 286/572] x86/apic/x2apic: Fix a NULL pointer deref when handling a dying cpu Check that the per-cpu cluster mask pointer has been set prior to clearing a dying cpu's bit. The per-cpu pointer is not set until the target cpu reaches smp_callin() during CPUHP_BRINGUP_CPU, whereas the teardown function, x2apic_dead_cpu(), is associated with the earlier CPUHP_X2APIC_PREPARE. If an error occurs before the cpu is awakened, e.g. if do_boot_cpu() itself fails, x2apic_dead_cpu() will dereference the NULL pointer and cause a panic. smpboot: do_boot_cpu failed(-22) to wakeup CPU#1 BUG: kernel NULL pointer dereference, address: 0000000000000008 RIP: 0010:x2apic_dead_cpu+0x1a/0x30 Call Trace: cpuhp_invoke_callback+0x9a/0x580 _cpu_up+0x10d/0x140 do_cpu_up+0x69/0xb0 smp_init+0x63/0xa9 kernel_init_freeable+0xd7/0x229 ? rest_init+0xa0/0xa0 kernel_init+0xa/0x100 ret_from_fork+0x35/0x40 Fixes: 023a611748fd5 ("x86/apic/x2apic: Simplify cluster management") Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20191001205019.5789-1-sean.j.christopherson@intel.com --- arch/x86/kernel/apic/x2apic_cluster.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 45e92cba92f5..b0889c48a2ac 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -156,7 +156,8 @@ static int x2apic_dead_cpu(unsigned int dead_cpu) { struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); - cpumask_clear_cpu(dead_cpu, &cmsk->mask); + if (cmsk) + cpumask_clear_cpu(dead_cpu, &cmsk->mask); free_cpumask_var(per_cpu(ipi_mask, dead_cpu)); return 0; } From f9258156c73c2b2aa96731cd78bc23c4c4aac83d Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 25 Sep 2019 20:43:46 +0200 Subject: [PATCH 287/572] iommu/rockchip: Don't use platform_get_irq to implicitly count irqs Till now the Rockchip iommu driver walked through the irq list via platform_get_irq() until it encountered an ENXIO error. With the recent change to add a central error message, this always results in such an error for each iommu on probe and shutdown. To not confuse people, switch to platform_count_irqs() to get the actual number of interrupts before walking through them. Fixes: 7723f4c5ecdb ("driver core: platform: Add an error message to platform_get_irq*()") Signed-off-by: Heiko Stuebner Tested-by: Enric Balletbo i Serra Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 26290f310f90..4dcbf68dfda4 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -100,6 +100,7 @@ struct rk_iommu { struct device *dev; void __iomem **bases; int num_mmu; + int num_irq; struct clk_bulk_data *clocks; int num_clocks; bool reset_disabled; @@ -1136,7 +1137,7 @@ static int rk_iommu_probe(struct platform_device *pdev) struct rk_iommu *iommu; struct resource *res; int num_res = pdev->num_resources; - int err, i, irq; + int err, i; iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -1163,6 +1164,10 @@ static int rk_iommu_probe(struct platform_device *pdev) if (iommu->num_mmu == 0) return PTR_ERR(iommu->bases[0]); + iommu->num_irq = platform_irq_count(pdev); + if (iommu->num_irq < 0) + return iommu->num_irq; + iommu->reset_disabled = device_property_read_bool(dev, "rockchip,disable-mmu-reset"); @@ -1219,8 +1224,9 @@ static int rk_iommu_probe(struct platform_device *pdev) pm_runtime_enable(dev); - i = 0; - while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) { + for (i = 0; i < iommu->num_irq; i++) { + int irq = platform_get_irq(pdev, i); + if (irq < 0) return irq; @@ -1245,10 +1251,13 @@ err_unprepare_clocks: static void rk_iommu_shutdown(struct platform_device *pdev) { struct rk_iommu *iommu = platform_get_drvdata(pdev); - int i = 0, irq; + int i; + + for (i = 0; i < iommu->num_irq; i++) { + int irq = platform_get_irq(pdev, i); - while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) devm_free_irq(iommu->dev, irq, iommu); + } pm_runtime_force_suspend(&pdev->dev); } From ec37d4e999041ae8eb507d8d444a28b338670336 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 1 Oct 2019 20:06:22 +0200 Subject: [PATCH 288/572] iommu/ipmmu-vmsa: Only call platform_get_irq() when interrupt is mandatory As platform_get_irq() now prints an error when the interrupt does not exist, calling it gratuitously causes scary messages like: ipmmu-vmsa e6740000.mmu: IRQ index 0 not found Fix this by moving the call to platform_get_irq() down, where the existence of the interrupt is mandatory. Fixes: 7723f4c5ecdb8d83 ("driver core: platform: Add an error message to platform_get_irq*()") Signed-off-by: Geert Uytterhoeven Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Reviewed-by: Stephen Boyd Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 9da8309f7170..237103465b82 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1086,8 +1086,6 @@ static int ipmmu_probe(struct platform_device *pdev) mmu->num_ctx = min(IPMMU_CTX_MAX, mmu->features->number_of_contexts); - irq = platform_get_irq(pdev, 0); - /* * Determine if this IPMMU instance is a root device by checking for * the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property. @@ -1106,6 +1104,7 @@ static int ipmmu_probe(struct platform_device *pdev) /* Root devices have mandatory IRQs */ if (ipmmu_is_root(mmu)) { + irq = platform_get_irq(pdev, 0); if (irq < 0) { dev_err(&pdev->dev, "no IRQ found\n"); return irq; From ec21f17a9437e11bb29e5fa375aa31b472793c15 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Mon, 14 Oct 2019 20:06:05 +0000 Subject: [PATCH 289/572] iommu/amd: Fix incorrect PASID decoding from event log IOMMU Event Log encodes 20-bit PASID for events: ILLEGAL_DEV_TABLE_ENTRY IO_PAGE_FAULT PAGE_TAB_HARDWARE_ERROR INVALID_DEVICE_REQUEST as: PASID[15:0] = bit 47:32 PASID[19:16] = bit 19:16 Note that INVALID_PPR_REQUEST event has different encoding from the rest of the events as the following: PASID[15:0] = bit 31:16 PASID[19:16] = bit 45:42 So, fixes the decoding logic. Fixes: d64c0486ed50 ("iommu/amd: Update the PASID information printed to the system log") Cc: Joerg Roedel Cc: Gary R Hook Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 5 +++-- drivers/iommu/amd_iommu_types.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2369b8af81f3..bcd89ea50a8b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -583,7 +583,8 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) retry: type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; - pasid = PPR_PASID(*(u64 *)&event[0]); + pasid = (event[0] & EVENT_DOMID_MASK_HI) | + (event[1] & EVENT_DOMID_MASK_LO); flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; address = (u64)(((u64)event[3]) << 32) | event[2]; @@ -616,7 +617,7 @@ retry: address, flags); break; case EVENT_TYPE_PAGE_TAB_ERR: - dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", + dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index c9c1612d52e0..17bd5a349119 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -130,8 +130,8 @@ #define EVENT_TYPE_INV_PPR_REQ 0x9 #define EVENT_DEVID_MASK 0xffff #define EVENT_DEVID_SHIFT 0 -#define EVENT_DOMID_MASK 0xffff -#define EVENT_DOMID_SHIFT 0 +#define EVENT_DOMID_MASK_LO 0xffff +#define EVENT_DOMID_MASK_HI 0xf0000 #define EVENT_FLAGS_MASK 0xfff #define EVENT_FLAGS_SHIFT 0x10 From 80ed4548d0711d15ca51be5dee0ff813051cfc90 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sat, 12 Oct 2019 18:42:10 +0200 Subject: [PATCH 290/572] btrfs: don't needlessly create extent-refs kernel thread The patch 32b593bfcb58 ("Btrfs: remove no longer used function to run delayed refs asynchronously") removed the async delayed refs but the thread has been created, without any use. Remove it to avoid resource consumption. Fixes: 32b593bfcb58 ("Btrfs: remove no longer used function to run delayed refs asynchronously") CC: stable@vger.kernel.org # 5.2+ Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 -- fs/btrfs/disk-io.c | 6 ------ 2 files changed, 8 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 19d669d12ca1..c1489c229694 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -734,8 +734,6 @@ struct btrfs_fs_info { struct btrfs_workqueue *fixup_workers; struct btrfs_workqueue *delayed_workers; - /* the extent workers do delayed refs on the extent allocation tree */ - struct btrfs_workqueue *extent_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; u32 thread_pool_size; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 044981cf6df9..402b61bf345c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2008,7 +2008,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->readahead_workers); btrfs_destroy_workqueue(fs_info->flush_workers); btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); - btrfs_destroy_workqueue(fs_info->extent_workers); /* * Now that all other work queues are destroyed, we can safely destroy * the queues used for metadata I/O, since tasks from those other work @@ -2214,10 +2213,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, max_active, 2); fs_info->qgroup_rescan_workers = btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0); - fs_info->extent_workers = - btrfs_alloc_workqueue(fs_info, "extent-refs", flags, - min_t(u64, fs_devices->num_devices, - max_active), 8); if (!(fs_info->workers && fs_info->delalloc_workers && fs_info->submit_workers && fs_info->flush_workers && @@ -2228,7 +2223,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->readahead_workers && fs_info->fixup_workers && fs_info->delayed_workers && - fs_info->extent_workers && fs_info->qgroup_rescan_workers)) { return -ENOMEM; } From 5812d04c4c7455627d8722e04ab99a737cfe9713 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 13 Oct 2019 19:57:37 +0300 Subject: [PATCH 291/572] nvmet-loop: fix possible leakage during error flow During nvme_loop_queue_rq error flow, one must call nvme_cleanup_cmd since it's symmetric to nvme_setup_cmd. Signed-off-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/loop.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 748a39fca771..11f5aea97d1b 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -157,8 +157,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, iod->sg_table.sgl = iod->first_sgl; if (sg_alloc_table_chained(&iod->sg_table, blk_rq_nr_phys_segments(req), - iod->sg_table.sgl, SG_CHUNK_SIZE)) + iod->sg_table.sgl, SG_CHUNK_SIZE)) { + nvme_cleanup_cmd(req); return BLK_STS_RESOURCE; + } iod->req.sg = iod->sg_table.sgl; iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); From 28a4cac48c7e897a0b4e7d79a53a8e4fe40337ae Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 13 Oct 2019 19:57:38 +0300 Subject: [PATCH 292/572] nvme-tcp: fix possible leakage during error flow During nvme_tcp_setup_cmd_pdu error flow, one must call nvme_cleanup_cmd since it's symmetric to nvme_setup_cmd. Signed-off-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 820dac10fa9e..770dbcbc999e 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2136,6 +2136,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, ret = nvme_tcp_map_data(queue, rq); if (unlikely(ret)) { + nvme_cleanup_cmd(rq); dev_err(queue->ctrl->ctrl.device, "Failed to map data (%d)\n", ret); return ret; From 85f0ae7e435afb261af33f9a605b973c0c8c1139 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 21 Sep 2019 22:18:46 +0900 Subject: [PATCH 293/572] kbuild: update comment about KBUILD_ALLDIRS Commit 000ec95fbe75 ("kbuild: pkg: rename scripts/package/Makefile to scripts/Makefile.package") missed to update this comment. Fixes: 000ec95fbe75 ("kbuild: pkg: rename scripts/package/Makefile to scripts/Makefile.package") Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ffd7a912fc46..19efe04b398c 100644 --- a/Makefile +++ b/Makefile @@ -1037,7 +1037,7 @@ export KBUILD_VMLINUX_OBJS := $(head-y) $(init-y) $(core-y) $(libs-y2) \ export KBUILD_VMLINUX_LIBS := $(libs-y1) export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds export LDFLAGS_vmlinux -# used by scripts/package/Makefile +# used by scripts/Makefile.package export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) LICENSES arch include scripts tools) vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_OBJS) $(KBUILD_VMLINUX_LIBS) From 991b78fbd2231175293bd1d7c0768fb4e4382e14 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 5 Oct 2019 08:01:59 -0700 Subject: [PATCH 294/572] scripts: setlocalversion: fix a bashism Fix bashism reported by checkbashisms by using only one '=': possible bashism in scripts/setlocalversion line 96 (should be 'b = a'): if [ "`hg log -r . --template '{latesttagdistance}'`" == "1" ]; then Fixes: 38b3439d84f4 ("setlocalversion: update mercurial tag parsing") Signed-off-by: Randy Dunlap Cc: Mike Crowe Signed-off-by: Masahiro Yamada --- scripts/setlocalversion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 220dae0db3f1..a2998b118ef9 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -93,7 +93,7 @@ scm_version() # Check for mercurial and a mercurial repo. if test -d .hg && hgid=`hg id 2>/dev/null`; then # Do we have an tagged version? If so, latesttagdistance == 1 - if [ "`hg log -r . --template '{latesttagdistance}'`" == "1" ]; then + if [ "`hg log -r . --template '{latesttagdistance}'`" = "1" ]; then id=`hg log -r . --template '{latesttag}'` printf '%s%s' -hg "$id" else From 6a6fac11b11299aa5bd8532ea863fc2f652af2b6 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 8 Oct 2019 11:38:41 +0200 Subject: [PATCH 295/572] perf jvmti: Link against tools/lib/ctype.h to have weak strlcpy() The build of file libperf-jvmti.so succeeds but the resulting object fails to load: # ~/linux/tools/perf/perf record -k mono -- java \ -XX:+PreserveFramePointer \ -agentpath:/root/linux/tools/perf/libperf-jvmti.so \ hog 100000 123450 Error occurred during initialization of VM Could not find agent library /root/linux/tools/perf/libperf-jvmti.so in absolute path, with error: /root/linux/tools/perf/libperf-jvmti.so: undefined symbol: _ctype Add the missing _ctype symbol into the build script. Fixes: 79743bc927f6 ("perf jvmti: Link against tools/lib/string.o to have weak strlcpy()") Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20191008093841.59387-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/jvmti/Build | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/jvmti/Build b/tools/perf/jvmti/Build index 1e148bbdf820..202cadaaf097 100644 --- a/tools/perf/jvmti/Build +++ b/tools/perf/jvmti/Build @@ -2,7 +2,7 @@ jvmti-y += libjvmti.o jvmti-y += jvmti_agent.o # For strlcpy -jvmti-y += libstring.o +jvmti-y += libstring.o libctype.o CFLAGS_jvmti = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux CFLAGS_REMOVE_jvmti = -Wmissing-declarations @@ -15,3 +15,7 @@ CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PE $(OUTPUT)jvmti/libstring.o: ../lib/string.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) + +$(OUTPUT)jvmti/libctype.o: ../lib/ctype.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) From 98a8b2e60c69927b1f405c3b001a1de3f4e53901 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 11 Oct 2019 11:21:40 -0700 Subject: [PATCH 296/572] perf evlist: Fix fix for freed id arrays In the earlier fix for the memory overrun of id arrays I managed to typo the wrong event in the fix. Of course we need to close the current event in the loop, not the original failing event. The same test case as in the original patch still passes. Fixes: 7834fa948beb ("perf evlist: Fix access of freed id arrays") Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191011182140.8353-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d277a98e62df..de79c735e441 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1659,7 +1659,7 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, is_open = false; if (c2->leader == leader) { if (is_open) - perf_evsel__close(&evsel->core); + perf_evsel__close(&c2->core); c2->leader = c2; c2->core.nr_members = 0; } From 6080728ff8e9c9116e52e6f840152356ac2fea56 Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Tue, 15 Oct 2019 16:30:08 +0800 Subject: [PATCH 297/572] perf tools: Fix resource leak of closedir() on the error paths Both build_mem_topology() and rm_rf_depth_pat() have resource leaks of closedir() on the error paths. Fix this by calling closedir() before function returns. Fixes: e2091cedd51b ("perf tools: Add MEM_TOPOLOGY feature to perf data file") Fixes: cdb6b0235f17 ("perf tools: Add pattern name checking to rm_rf") Signed-off-by: Yunfeng Ye Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Alexey Budankov Cc: Andi Kleen Cc: Daniel Borkmann Cc: Feilong Lin Cc: Hu Shiyuan Cc: Igor Lubashev Cc: Kan Liang Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Yonghong Song Link: http://lore.kernel.org/lkml/cd5f7cd2-b80d-6add-20a1-32f4f43e0744@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 4 +++- tools/perf/util/util.c | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 86d9396cb131..becc2d109423 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1296,8 +1296,10 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp) continue; if (WARN_ONCE(cnt >= size, - "failed to write MEM_TOPOLOGY, way too many nodes\n")) + "failed to write MEM_TOPOLOGY, way too many nodes\n")) { + closedir(dir); return -1; + } ret = memory_node__read(&nodes[cnt++], idx); } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 5eda6e19c947..ae56c766eda1 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -154,8 +154,10 @@ static int rm_rf_depth_pat(const char *path, int depth, const char **pat) if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) continue; - if (!match_pat(d->d_name, pat)) - return -2; + if (!match_pat(d->d_name, pat)) { + ret = -2; + break; + } scnprintf(namebuf, sizeof(namebuf), "%s/%s", path, d->d_name); From 5da0fb1ab34ccfe6d49210b4f5a739c59fcbf25e Mon Sep 17 00:00:00 2001 From: yangerkun Date: Tue, 15 Oct 2019 21:59:29 +0800 Subject: [PATCH 298/572] io_uring: consider the overflow of sequence for timeout req Now we recalculate the sequence of timeout with 'req->sequence = ctx->cached_sq_head + count - 1', judge the right place to insert for timeout_list by compare the number of request we still expected for completion. But we have not consider about the situation of overflow: 1. ctx->cached_sq_head + count - 1 may overflow. And a bigger count for the new timeout req can have a small req->sequence. 2. cached_sq_head of now may overflow compare with before req. And it will lead the timeout req with small req->sequence. This overflow will lead to the misorder of timeout_list, which can lead to the wrong order of the completion of timeout_list. Fix it by reuse req->submit.sequence to store the count, and change the logic of inserting sort in io_timeout. Signed-off-by: yangerkun Signed-off-by: Jens Axboe --- fs/io_uring.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 38d274fc0f25..d2cb277da2f4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1884,7 +1884,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - unsigned count, req_dist, tail_index; + unsigned count; struct io_ring_ctx *ctx = req->ctx; struct list_head *entry; struct timespec64 ts; @@ -1907,21 +1907,36 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) count = 1; req->sequence = ctx->cached_sq_head + count - 1; + /* reuse it to store the count */ + req->submit.sequence = count; req->flags |= REQ_F_TIMEOUT; /* * Insertion sort, ensuring the first entry in the list is always * the one we need first. */ - tail_index = ctx->cached_cq_tail - ctx->rings->sq_dropped; - req_dist = req->sequence - tail_index; spin_lock_irq(&ctx->completion_lock); list_for_each_prev(entry, &ctx->timeout_list) { struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list); - unsigned dist; + unsigned nxt_sq_head; + long long tmp, tmp_nxt; - dist = nxt->sequence - tail_index; - if (req_dist >= dist) + /* + * Since cached_sq_head + count - 1 can overflow, use type long + * long to store it. + */ + tmp = (long long)ctx->cached_sq_head + count - 1; + nxt_sq_head = nxt->sequence - nxt->submit.sequence + 1; + tmp_nxt = (long long)nxt_sq_head + nxt->submit.sequence - 1; + + /* + * cached_sq_head may overflow, and it will never overflow twice + * once there is some timeout req still be valid. + */ + if (ctx->cached_sq_head < nxt_sq_head) + tmp_nxt += UINT_MAX; + + if (tmp >= tmp_nxt) break; } list_add(&req->list, entry); From f948eb45e3af9fb18a0487d0797a773897ef6929 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 14 Oct 2019 12:10:47 -0500 Subject: [PATCH 299/572] perf annotate: Fix multiple memory and file descriptor leaks Store SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF in variable *ret*, instead of returning in the middle of the function and leaking multiple resources: prog_linfo, btf, s and bfdf. Addresses-Coverity-ID: 1454832 ("Structurally dead code") Fixes: 11aad897f6d1 ("perf annotate: Don't return -1 for error when doing BPF disassembly") Signed-off-by: Gustavo A. R. Silva Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191014171047.GA30850@embeddedor Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 4036c7f7b0fb..e42bf572358c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1758,7 +1758,7 @@ static int symbol__disassemble_bpf(struct symbol *sym, info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id); if (!info_node) { - return SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF; + ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF; goto out; } info_linear = info_node->info_linear; From 5a0baf5123236362fda4e9772cc63b7faa16a0df Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Oct 2019 10:02:21 +0300 Subject: [PATCH 300/572] perf tools: Fix mode setting in copyfile_mode_ns() slow_copyfile() opens the file by name, so "write" permissions must not be removed in copyfile_mode_ns() before calling slow_copyfile(). Example: Before: $ sudo chmod +r /proc/kcore $ sudo setcap "cap_sys_admin,cap_sys_ptrace,cap_syslog,cap_sys_rawio=ep" tools/perf/perf $ tools/perf/perf buildid-cache -k /proc/kcore Couldn't add /proc/kcore After: $ sudo chmod +r /proc/kcore $ sudo setcap "cap_sys_admin,cap_sys_ptrace,cap_syslog,cap_sys_rawio=ep" tools/perf/perf $ tools/perf/perf buildid-cache -v -k /proc/kcore kcore added to build-id cache directory /home/ahunter/.debug/[kernel.kcore]/37e340b1b5a7cf4f57ba8de2bc777359588a957f/2019100709562289 Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lore.kernel.org/lkml/20191007070221.11158-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/copyfile.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/copyfile.c b/tools/perf/util/copyfile.c index 3fa0db136667..47e03de7c235 100644 --- a/tools/perf/util/copyfile.c +++ b/tools/perf/util/copyfile.c @@ -101,14 +101,16 @@ static int copyfile_mode_ns(const char *from, const char *to, mode_t mode, if (tofd < 0) goto out; - if (fchmod(tofd, mode)) - goto out_close_to; - if (st.st_size == 0) { /* /proc? do it slowly... */ err = slow_copyfile(from, tmp, nsi); + if (!err && fchmod(tofd, mode)) + err = -1; goto out_close_to; } + if (fchmod(tofd, mode)) + goto out_close_to; + nsinfo__mountns_enter(nsi, &nsc); fromfd = open(from, O_RDONLY); nsinfo__mountns_exit(&nsc); From ae199c580da1754a2b051321eeb76d6dacd8707b Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Tue, 15 Oct 2019 10:54:14 +0800 Subject: [PATCH 301/572] perf c2c: Fix memory leak in build_cl_output() There is a memory leak problem in the failure paths of build_cl_output(), so fix it. Signed-off-by: Yunfeng Ye Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Feilong Lin Cc: Hu Shiyuan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/4d3c0178-5482-c313-98e1-f82090d2d456@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 3542b6ab9813..e69f44941aad 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2635,6 +2635,7 @@ static int build_cl_output(char *cl_sort, bool no_source) bool add_sym = false; bool add_dso = false; bool add_src = false; + int ret = 0; if (!buf) return -ENOMEM; @@ -2653,7 +2654,8 @@ static int build_cl_output(char *cl_sort, bool no_source) add_dso = true; } else if (strcmp(tok, "offset")) { pr_err("unrecognized sort token: %s\n", tok); - return -EINVAL; + ret = -EINVAL; + goto err; } } @@ -2676,13 +2678,15 @@ static int build_cl_output(char *cl_sort, bool no_source) add_sym ? "symbol," : "", add_dso ? "dso," : "", add_src ? "cl_srcline," : "", - "node") < 0) - return -ENOMEM; + "node") < 0) { + ret = -ENOMEM; + goto err; + } c2c.show_src = add_src; - +err: free(buf); - return 0; + return ret; } static int setup_coalesce(const char *coalesce, bool no_source) From 7a12f514c408cc499e61211f234858d3d4b7f4ea Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 15 Oct 2019 12:22:37 -0300 Subject: [PATCH 302/572] tools headers kvm: Sync kvm headers with the kernel sources To pick the changes in: bf653b78f960 ("KVM: vmx: Introduce handle_unexpected_vmexit and handle WAITPKG vmexit") That trigger these changes in tooling: CC /tmp/build/perf/arch/x86/util/kvm-stat.o INSTALL GTK UI DESCEND plugins make[3]: Nothing to be done for '/tmp/build/perf/plugins/libtraceevent-dynamic-list'. INSTALL trace_plugins LD /tmp/build/perf/arch/x86/util/perf-in.o LD /tmp/build/perf/arch/x86/perf-in.o LD /tmp/build/perf/arch/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/perf And this is not just because that header is included, kvm-stat.c uses the VMX_EXIT_REASONS define and it got changed by the above cset. And addresses this perf build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h' diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Tao Xu Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-gr1eel0hckmi5l3p2ewdpfxh@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/vmx.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index f01950aa7fae..3eb8411ab60e 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -86,6 +86,8 @@ #define EXIT_REASON_PML_FULL 62 #define EXIT_REASON_XSAVES 63 #define EXIT_REASON_XRSTORS 64 +#define EXIT_REASON_UMWAIT 67 +#define EXIT_REASON_TPAUSE 68 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -144,7 +146,9 @@ { EXIT_REASON_RDSEED, "RDSEED" }, \ { EXIT_REASON_PML_FULL, "PML_FULL" }, \ { EXIT_REASON_XSAVES, "XSAVES" }, \ - { EXIT_REASON_XRSTORS, "XRSTORS" } + { EXIT_REASON_XRSTORS, "XRSTORS" }, \ + { EXIT_REASON_UMWAIT, "UMWAIT" }, \ + { EXIT_REASON_TPAUSE, "TPAUSE" } #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 From 7cb3a2445705e45af9b58dd241d26598a35b1fdd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 15 Oct 2019 12:30:08 -0300 Subject: [PATCH 303/572] tools headers kvm: Sync kvm headers with the kernel sources To pick the changes in: 0cb8410b90e7 ("kvm: svm: Intercept RDPRU") That trigger a rebuild in too in tooling: CC /tmp/build/perf/arch/x86/util/kvm-stat.o But this time around no changes in tooling results, as SVM_EXIT_RDPRU wasn't added to SVM_EXIT_REASONS, that is used in kvm-stat.c. And addresses this perf build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/svm.h' differs from latest version at 'arch/x86/include/uapi/asm/svm.h' diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h Cc: Adrian Hunter Cc: Jim Mattson Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Link: https://lkml.kernel.org/n/tip-pqzkt1hmfpqph3ts8i6zzmim@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/svm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index a9731f8a480f..2e8a30f06c74 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -75,6 +75,7 @@ #define SVM_EXIT_MWAIT 0x08b #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_RDPRU 0x08e #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 From 8daf1fb73295b43fb2f624f709449b484532ba64 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 15 Oct 2019 12:35:02 -0300 Subject: [PATCH 304/572] tools headers kvm: Sync kvm.h headers with the kernel sources To pick the changes in: 344c6c804703 ("KVM/Hyper-V: Add new KVM capability KVM_CAP_HYPERV_DIRECT_TLBFLUSH") dee04eee9182 ("KVM: RISC-V: Add KVM_REG_RISCV for ONE_REG interface") These trigger the rebuild of this object: CC /tmp/build/perf/trace/beauty/ioctl.o But do not result in any change in tooling, as the additions are not being used in any table generatator. This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Adrian Hunter Cc: Anup Patel Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Paul Walmsley Cc: Tianyu Lan Link: https://lkml.kernel.org/n/tip-d1v48a0qfoe98u5v9tn3mu5u@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 233efbb1c81c..52641d8ca9e8 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -999,6 +999,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 #define KVM_CAP_PMU_EVENT_FILTER 173 #define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174 +#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175 #ifdef KVM_CAP_IRQ_ROUTING @@ -1145,6 +1146,7 @@ struct kvm_dirty_tlb { #define KVM_REG_S390 0x5000000000000000ULL #define KVM_REG_ARM64 0x6000000000000000ULL #define KVM_REG_MIPS 0x7000000000000000ULL +#define KVM_REG_RISCV 0x8000000000000000ULL #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL From 1d3f87233e26362fc3d4e59f0f31a71b570f90b9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 26 Sep 2019 16:05:11 -0400 Subject: [PATCH 305/572] ceph: just skip unrecognized info in ceph_reply_info_extra In the future, we're going to want to extend the ceph_reply_info_extra for create replies. Currently though, the kernel code doesn't accept an extra blob that is larger than the expected data. Change the code to skip over any unrecognized fields at the end of the extra blob, rather than returning -EIO. Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a8a8f84f3bbf..a5163296d9d9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -384,8 +384,8 @@ static int parse_reply_info_readdir(void **p, void *end, } done: - if (*p != end) - goto bad; + /* Skip over any unrecognized fields */ + *p = end; return 0; bad: @@ -406,12 +406,10 @@ static int parse_reply_info_filelock(void **p, void *end, goto bad; info->filelock_reply = *p; - *p += sizeof(*info->filelock_reply); - if (unlikely(*p != end)) - goto bad; + /* Skip over any unrecognized fields */ + *p = end; return 0; - bad: return -EIO; } @@ -425,18 +423,21 @@ static int parse_reply_info_create(void **p, void *end, { if (features == (u64)-1 || (features & CEPH_FEATURE_REPLY_CREATE_INODE)) { + /* Malformed reply? */ if (*p == end) { info->has_create_ino = false; } else { info->has_create_ino = true; - info->ino = ceph_decode_64(p); + ceph_decode_64_safe(p, end, info->ino, bad); } + } else { + if (*p != end) + goto bad; } - if (unlikely(*p != end)) - goto bad; + /* Skip over any unrecognized fields */ + *p = end; return 0; - bad: return -EIO; } From 25e6be21230d3208d687dad90b6e43419013c351 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Fri, 27 Sep 2019 15:33:22 +0000 Subject: [PATCH 306/572] rbd: cancel lock_dwork if the wait is interrupted There is a warning message in my test with below steps: # rbd bench --io-type write --io-size 4K --io-threads 1 --io-pattern rand test & # sleep 5 # pkill -9 rbd # rbd map test & # sleep 5 # pkill rbd The reason is that the rbd_add_acquire_lock() is interruptable, that means, when we kill the waiting on ->acquire_wait, the lock_dwork could be still running. 1. do_rbd_add() 2. lock_dwork rbd_add_acquire_lock() - queue_delayed_work() lock_dwork queued - wait_for_completion_killable_timeout() <-- kill happen rbd_dev_image_unlock() <-- UNLOCKED now, nothing to do. rbd_dev_device_release() rbd_dev_image_release() - ... lock successed here - cancel_delayed_work_sync(&rbd_dev->lock_dwork) Then when we reach the rbd_dev_free(), WARN_ON is triggered because lock_state is not RBD_LOCK_STATE_UNLOCKED. To fix it, this commit make sure the lock_dwork was finished before calling rbd_dev_image_unlock(). On the other hand, this would not happend in do_rbd_remove(), because after rbd mapped, lock_dwork will only be queued for IO request, and request will continue unless lock_dwork finished. when we call rbd_dev_image_unlock() in do_rbd_remove(), all requests are done. That means, lock_state should not be locked again after rbd_dev_image_unlock(). [ Cancel lock_dwork in rbd_add_acquire_lock(), only if the wait is interrupted. ] Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code") Signed-off-by: Dongsheng Yang Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 7c4350c0fb77..39136675dae5 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -6639,10 +6639,13 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait, ceph_timeout_jiffies(rbd_dev->opts->lock_timeout)); - if (ret > 0) + if (ret > 0) { ret = rbd_dev->acquire_err; - else if (!ret) - ret = -ETIMEDOUT; + } else { + cancel_delayed_work_sync(&rbd_dev->lock_dwork); + if (!ret) + ret = -ETIMEDOUT; + } if (ret) { rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret); From 5eca1379c0eb06e3908179665230a86d19bd2df7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 15 Oct 2019 12:44:00 -0300 Subject: [PATCH 307/572] tools headers UAPI: Sync sched.h with the kernel To get the changes in: 78f6face5af3 ("sched: add kernel-doc for struct clone_args") f14c234b4bc5 ("clone3: switch to copy_struct_from_user()") This file gets rebuilt, but no changes ensues: CC /tmp/build/perf/trace/beauty/clone.o This addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/sched.h' differs from latest version at 'include/uapi/linux/sched.h' diff -u tools/include/uapi/linux/sched.h include/uapi/linux/sched.h Cc: Adrian Hunter Cc: Aleksa Sarai Cc: Christian Brauner Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-xqruu8wohwlbc57udg1g0xzx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/sched.h | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h index b3105ac1381a..99335e1f4a27 100644 --- a/tools/include/uapi/linux/sched.h +++ b/tools/include/uapi/linux/sched.h @@ -33,8 +33,31 @@ #define CLONE_NEWNET 0x40000000 /* New network namespace */ #define CLONE_IO 0x80000000 /* Clone io context */ -/* - * Arguments for the clone3 syscall +#ifndef __ASSEMBLY__ +/** + * struct clone_args - arguments for the clone3 syscall + * @flags: Flags for the new process as listed above. + * All flags are valid except for CSIGNAL and + * CLONE_DETACHED. + * @pidfd: If CLONE_PIDFD is set, a pidfd will be + * returned in this argument. + * @child_tid: If CLONE_CHILD_SETTID is set, the TID of the + * child process will be returned in the child's + * memory. + * @parent_tid: If CLONE_PARENT_SETTID is set, the TID of + * the child process will be returned in the + * parent's memory. + * @exit_signal: The exit_signal the parent process will be + * sent when the child exits. + * @stack: Specify the location of the stack for the + * child process. + * @stack_size: The size of the stack for the child process. + * @tls: If CLONE_SETTLS is set, the tls descriptor + * is set to tls. + * + * The structure is versioned by size and thus extensible. + * New struct members must go at the end of the struct and + * must be properly 64bit aligned. */ struct clone_args { __aligned_u64 flags; @@ -46,6 +69,9 @@ struct clone_args { __aligned_u64 stack_size; __aligned_u64 tls; }; +#endif + +#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ /* * Scheduling policies From 5e0cd1ef64744e41e029dfca7d0ae285c486f386 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 15 Oct 2019 08:46:07 -0700 Subject: [PATCH 308/572] xfs: change the seconds fields in xfs_bulkstat to signed 64-bit time is a signed quantity in the kernel, so the bulkstat structure should reflect that. Note that the structure size stays the same and that we have not yet published userspace headers for this new ioctl so there are no users to break. Fixes: 7035f9724f84 ("xfs: introduce new v5 bulkstat structure") Signed-off-by: Darrick J. Wong Reviewed-by: Carlos Maiolino Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 39dd2b908106..e9371a8e0e26 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -366,11 +366,11 @@ struct xfs_bulkstat { uint64_t bs_blocks; /* number of blocks */ uint64_t bs_xflags; /* extended flags */ - uint64_t bs_atime; /* access time, seconds */ - uint64_t bs_mtime; /* modify time, seconds */ + int64_t bs_atime; /* access time, seconds */ + int64_t bs_mtime; /* modify time, seconds */ - uint64_t bs_ctime; /* inode change time, seconds */ - uint64_t bs_btime; /* creation time, seconds */ + int64_t bs_ctime; /* inode change time, seconds */ + int64_t bs_btime; /* creation time, seconds */ uint32_t bs_gen; /* generation count */ uint32_t bs_uid; /* user id */ From 9d179b865449b351ad5cb76dbea480c9170d4a27 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Oct 2019 09:03:47 -0700 Subject: [PATCH 309/572] blkcg: Fix multiple bugs in blkcg_activate_policy() blkcg_activate_policy() has the following bugs. * cf09a8ee19ad ("blkcg: pass @q and @blkcg into blkcg_pol_alloc_pd_fn()") added @blkcg to ->pd_alloc_fn(); however, blkcg_activate_policy() ends up using pd's allocated for the root blkcg for all preallocations, so ->pd_init_fn() for non-root blkcgs can be passed in pd's which are allocated for the root blkcg. For blk-iocost, this means that ->pd_init_fn() can write beyond the end of the allocated object as it determines the length of the flex array at the end based on the blkcg's nesting level. * Each pd is initialized as they get allocated. If alloc fails, the policy will get freed with pd's initialized on it. * After the above partial failure, the partial pds are not freed. This patch fixes all the above issues by * Restructuring blkcg_activate_policy() so that alloc and init passes are separate. Init takes place only after all allocs succeeded and on failure all allocated pds are freed. * Unifying and fixing the cleanup of the remaining pd_prealloc. Signed-off-by: Tejun Heo Fixes: cf09a8ee19ad ("blkcg: pass @q and @blkcg into blkcg_pol_alloc_pd_fn()") Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 69 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b6f20be0fc78..5d21027b1faf 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1362,7 +1362,7 @@ int blkcg_activate_policy(struct request_queue *q, const struct blkcg_policy *pol) { struct blkg_policy_data *pd_prealloc = NULL; - struct blkcg_gq *blkg; + struct blkcg_gq *blkg, *pinned_blkg = NULL; int ret; if (blkcg_policy_enabled(q, pol)) @@ -1370,49 +1370,82 @@ int blkcg_activate_policy(struct request_queue *q, if (queue_is_mq(q)) blk_mq_freeze_queue(q); -pd_prealloc: - if (!pd_prealloc) { - pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root); - if (!pd_prealloc) { - ret = -ENOMEM; - goto out_bypass_end; - } - } - +retry: spin_lock_irq(&q->queue_lock); - /* blkg_list is pushed at the head, reverse walk to init parents first */ + /* blkg_list is pushed at the head, reverse walk to allocate parents first */ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { struct blkg_policy_data *pd; if (blkg->pd[pol->plid]) continue; - pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root); - if (!pd) - swap(pd, pd_prealloc); + /* If prealloc matches, use it; otherwise try GFP_NOWAIT */ + if (blkg == pinned_blkg) { + pd = pd_prealloc; + pd_prealloc = NULL; + } else { + pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, + blkg->blkcg); + } + if (!pd) { + /* + * GFP_NOWAIT failed. Free the existing one and + * prealloc for @blkg w/ GFP_KERNEL. + */ + if (pinned_blkg) + blkg_put(pinned_blkg); + blkg_get(blkg); + pinned_blkg = blkg; + spin_unlock_irq(&q->queue_lock); - goto pd_prealloc; + + if (pd_prealloc) + pol->pd_free_fn(pd_prealloc); + pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, + blkg->blkcg); + if (pd_prealloc) + goto retry; + else + goto enomem; } blkg->pd[pol->plid] = pd; pd->blkg = blkg; pd->plid = pol->plid; - if (pol->pd_init_fn) - pol->pd_init_fn(pd); } + /* all allocated, init in the same order */ + if (pol->pd_init_fn) + list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) + pol->pd_init_fn(blkg->pd[pol->plid]); + __set_bit(pol->plid, q->blkcg_pols); ret = 0; spin_unlock_irq(&q->queue_lock); -out_bypass_end: +out: if (queue_is_mq(q)) blk_mq_unfreeze_queue(q); + if (pinned_blkg) + blkg_put(pinned_blkg); if (pd_prealloc) pol->pd_free_fn(pd_prealloc); return ret; + +enomem: + /* alloc failed, nothing's initialized yet, free everything */ + spin_lock_irq(&q->queue_lock); + list_for_each_entry(blkg, &q->blkg_list, q_node) { + if (blkg->pd[pol->plid]) { + pol->pd_free_fn(blkg->pd[pol->plid]); + blkg->pd[pol->plid] = NULL; + } + } + spin_unlock_irq(&q->queue_lock); + ret = -ENOMEM; + goto out; } EXPORT_SYMBOL_GPL(blkcg_activate_policy); From 307f4065b9d7c1e887e8bdfb2487e4638559fea1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Oct 2019 08:49:27 -0700 Subject: [PATCH 310/572] blk-rq-qos: fix first node deletion of rq_qos_del() rq_qos_del() incorrectly assigns the node being deleted to the head if it was the first on the list in the !prev path. Fix it by iterating with ** instead. Signed-off-by: Tejun Heo Cc: Josef Bacik Fixes: a79050434b45 ("blk-rq-qos: refactor out common elements of blk-wbt") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Jens Axboe --- block/blk-rq-qos.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index e8cb68f6958a..2bc43e94f4c4 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -108,16 +108,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) { - struct rq_qos *cur, *prev = NULL; - for (cur = q->rq_qos; cur; cur = cur->next) { - if (cur == rqos) { - if (prev) - prev->next = rqos->next; - else - q->rq_qos = cur; + struct rq_qos **cur; + + for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { + if (*cur == rqos) { + *cur = rqos->next; break; } - prev = cur; } blk_mq_debugfs_unregister_rqos(rqos); From 5b3ec8134f5f9fa1ed0a538441a495521078bbee Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 9 Oct 2019 10:44:55 +0100 Subject: [PATCH 311/572] drm/panfrost: Handle resetting on timeout better Panfrost uses multiple schedulers (one for each slot, so 2 in reality), and on a timeout has to stop all the schedulers to safely perform a reset. However more than one scheduler can trigger a timeout at the same time. This race condition results in jobs being freed while they are still in use. When stopping other slots use cancel_delayed_work_sync() to ensure that any timeout started for that slot has completed. Also use mutex_trylock() to obtain reset_lock. This means that only one thread attempts the reset, the other threads will simply complete without doing anything (the first thread will wait for this in the call to cancel_delayed_work_sync()). While we're here and since the function is already dependent on sched_job not being NULL, let's remove the unnecessary checks. Fixes: aa20236784ab ("drm/panfrost: Prevent concurrent resets") Tested-by: Neil Armstrong Signed-off-by: Steven Price Cc: stable@vger.kernel.org Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20191009094456.9704-1-steven.price@arm.com --- drivers/gpu/drm/panfrost/panfrost_job.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index a58551668d9a..21f34d44aac2 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -381,13 +381,19 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job) job_read(pfdev, JS_TAIL_LO(js)), sched_job); - mutex_lock(&pfdev->reset_lock); + if (!mutex_trylock(&pfdev->reset_lock)) + return; - for (i = 0; i < NUM_JOB_SLOTS; i++) - drm_sched_stop(&pfdev->js->queue[i].sched, sched_job); + for (i = 0; i < NUM_JOB_SLOTS; i++) { + struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched; - if (sched_job) - drm_sched_increase_karma(sched_job); + drm_sched_stop(sched, sched_job); + if (js != i) + /* Ensure any timeouts on other slots have finished */ + cancel_delayed_work_sync(&sched->work_tdr); + } + + drm_sched_increase_karma(sched_job); spin_lock_irqsave(&pfdev->js->job_lock, flags); for (i = 0; i < NUM_JOB_SLOTS; i++) { From 8702ba9396bf7bbae2ab93c94acd4bd37cfa4f09 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 14 Oct 2019 14:34:51 +0800 Subject: [PATCH 312/572] btrfs: qgroup: Always free PREALLOC META reserve in btrfs_delalloc_release_extents() [Background] Btrfs qgroup uses two types of reserved space for METADATA space, PERTRANS and PREALLOC. PERTRANS is metadata space reserved for each transaction started by btrfs_start_transaction(). While PREALLOC is for delalloc, where we reserve space before joining a transaction, and finally it will be converted to PERTRANS after the writeback is done. [Inconsistency] However there is inconsistency in how we handle PREALLOC metadata space. The most obvious one is: In btrfs_buffered_write(): btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, true); We always free qgroup PREALLOC meta space. While in btrfs_truncate_block(): btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0)); We only free qgroup PREALLOC meta space when something went wrong. [The Correct Behavior] The correct behavior should be the one in btrfs_buffered_write(), we should always free PREALLOC metadata space. The reason is, the btrfs_delalloc_* mechanism works by: - Reserve metadata first, even it's not necessary In btrfs_delalloc_reserve_metadata() - Free the unused metadata space Normally in: btrfs_delalloc_release_extents() |- btrfs_inode_rsv_release() Here we do calculation on whether we should release or not. E.g. for 64K buffered write, the metadata rsv works like: /* The first page */ reserve_meta: num_bytes=calc_inode_reservations() free_meta: num_bytes=0 total: num_bytes=calc_inode_reservations() /* The first page caused one outstanding extent, thus needs metadata rsv */ /* The 2nd page */ reserve_meta: num_bytes=calc_inode_reservations() free_meta: num_bytes=calc_inode_reservations() total: not changed /* The 2nd page doesn't cause new outstanding extent, needs no new meta rsv, so we free what we have reserved */ /* The 3rd~16th pages */ reserve_meta: num_bytes=calc_inode_reservations() free_meta: num_bytes=calc_inode_reservations() total: not changed (still space for one outstanding extent) This means, if btrfs_delalloc_release_extents() determines to free some space, then those space should be freed NOW. So for qgroup, we should call btrfs_qgroup_free_meta_prealloc() other than btrfs_qgroup_convert_reserved_meta(). The good news is: - The callers are not that hot The hottest caller is in btrfs_buffered_write(), which is already fixed by commit 336a8bb8e36a ("btrfs: Fix wrong btrfs_delalloc_release_extents parameter"). Thus it's not that easy to cause false EDQUOT. - The trans commit in advance for qgroup would hide the bug Since commit f5fef4593653 ("btrfs: qgroup: Make qgroup async transaction commit more aggressive"), when btrfs qgroup metadata free space is slow, it will try to commit transaction and free the wrongly converted PERTRANS space, so it's not that easy to hit such bug. [FIX] So to fix the problem, remove the @qgroup_free parameter for btrfs_delalloc_release_extents(), and always pass true to btrfs_inode_rsv_release(). Reported-by: Filipe Manana Fixes: 43b18595d660 ("btrfs: qgroup: Use separate meta reservation type for delalloc") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 3 +-- fs/btrfs/delalloc-space.c | 6 ++---- fs/btrfs/file.c | 7 +++---- fs/btrfs/inode-map.c | 4 ++-- fs/btrfs/inode.c | 12 ++++++------ fs/btrfs/ioctl.c | 6 ++---- fs/btrfs/relocation.c | 9 ++++----- 7 files changed, 20 insertions(+), 27 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c1489c229694..fe2b8765d9e6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2487,8 +2487,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, int nitems, bool use_global_rsv); void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *rsv); -void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, - bool qgroup_free); +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes); int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index d949d7d2abed..571e7b31ea2f 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -418,7 +418,6 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, * btrfs_delalloc_release_extents - release our outstanding_extents * @inode: the inode to balance the reservation for. * @num_bytes: the number of bytes we originally reserved with - * @qgroup_free: do we need to free qgroup meta reservation or convert them. * * When we reserve space we increase outstanding_extents for the extents we may * add. Once we've set the range as delalloc or created our ordered extents we @@ -426,8 +425,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, * temporarily tracked outstanding_extents. This _must_ be used in conjunction * with btrfs_delalloc_reserve_metadata. */ -void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, - bool qgroup_free) +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) { struct btrfs_fs_info *fs_info = inode->root->fs_info; unsigned num_extents; @@ -441,7 +439,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, if (btrfs_is_testing(fs_info)) return; - btrfs_inode_rsv_release(inode, qgroup_free); + btrfs_inode_rsv_release(inode, true); } /** diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 27e5b269e729..e955e7fa9201 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1692,7 +1692,7 @@ again: force_page_uptodate); if (ret) { btrfs_delalloc_release_extents(BTRFS_I(inode), - reserve_bytes, true); + reserve_bytes); break; } @@ -1704,7 +1704,7 @@ again: if (extents_locked == -EAGAIN) goto again; btrfs_delalloc_release_extents(BTRFS_I(inode), - reserve_bytes, true); + reserve_bytes); ret = extents_locked; break; } @@ -1772,8 +1772,7 @@ again: else free_extent_state(cached_state); - btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, - true); + btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); if (ret) { btrfs_drop_pages(pages, num_pages); break; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 63cad7865d75..37345fb6191d 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -501,13 +501,13 @@ again: ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, prealloc, prealloc, &alloc_hint); if (ret) { - btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc); btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc, true); goto out_put; } ret = btrfs_write_out_ino_cache(root, trans, path, inode); - btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false); + btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc); out_put: iput(inode); out_release: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0f2754eaa05b..c3f386b7cc0b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2206,7 +2206,7 @@ again: ClearPageChecked(page); set_page_dirty(page); - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, &cached_state); @@ -4951,7 +4951,7 @@ again: if (!page) { btrfs_delalloc_release_space(inode, data_reserved, block_start, blocksize, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); ret = -ENOMEM; goto out; } @@ -5018,7 +5018,7 @@ out_unlock: if (ret) btrfs_delalloc_release_space(inode, data_reserved, block_start, blocksize, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0)); + btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); unlock_page(page); put_page(page); out: @@ -8709,7 +8709,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, data_reserved, offset, count - (size_t)ret, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), count, false); + btrfs_delalloc_release_extents(BTRFS_I(inode), count); } out: if (wakeup) @@ -9059,7 +9059,7 @@ again: unlock_extent_cached(io_tree, page_start, page_end, &cached_state); if (!ret2) { - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); sb_end_pagefault(inode->i_sb); extent_changeset_free(data_reserved); return VM_FAULT_LOCKED; @@ -9068,7 +9068,7 @@ again: out_unlock: unlock_page(page); out: - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0)); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); btrfs_delalloc_release_space(inode, data_reserved, page_start, reserved_space, (ret != 0)); out_noreserve: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index de730e56d3f5..7c145a41decd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1360,8 +1360,7 @@ again: unlock_page(pages[i]); put_page(pages[i]); } - btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT, - false); + btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); extent_changeset_free(data_reserved); return i_done; out: @@ -1372,8 +1371,7 @@ out: btrfs_delalloc_release_space(inode, data_reserved, start_index << PAGE_SHIFT, page_cnt << PAGE_SHIFT, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT, - true); + btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); extent_changeset_free(data_reserved); return ret; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 7b883239fa7e..5cd42b66818c 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3278,7 +3278,7 @@ static int relocate_file_extent_cluster(struct inode *inode, btrfs_delalloc_release_metadata(BTRFS_I(inode), PAGE_SIZE, true); btrfs_delalloc_release_extents(BTRFS_I(inode), - PAGE_SIZE, true); + PAGE_SIZE); ret = -ENOMEM; goto out; } @@ -3299,7 +3299,7 @@ static int relocate_file_extent_cluster(struct inode *inode, btrfs_delalloc_release_metadata(BTRFS_I(inode), PAGE_SIZE, true); btrfs_delalloc_release_extents(BTRFS_I(inode), - PAGE_SIZE, true); + PAGE_SIZE); ret = -EIO; goto out; } @@ -3328,7 +3328,7 @@ static int relocate_file_extent_cluster(struct inode *inode, btrfs_delalloc_release_metadata(BTRFS_I(inode), PAGE_SIZE, true); btrfs_delalloc_release_extents(BTRFS_I(inode), - PAGE_SIZE, true); + PAGE_SIZE); clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, @@ -3344,8 +3344,7 @@ static int relocate_file_extent_cluster(struct inode *inode, put_page(page); index++; - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, - false); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); balance_dirty_pages_ratelimited(inode->i_mapping); btrfs_throttle(fs_info); } From 8e0d0ad206f08506c893326ca7c9c3d9cc042cef Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 15 Oct 2019 09:56:36 -0700 Subject: [PATCH 313/572] sparc64: disable fast-GUP due to unexplained oopses HAVE_FAST_GUP enables the lockless quick page table walker for simple cases, and is a nice optimization for some random loads that can then use get_user_pages_fast() rather than the more careful page walker. However, for some unexplained reason, it seems to be subtly broken on sparc64. The breakage is only with some compiler versions and some hardware, and nobody seems to have figured out what triggers it, although there's a simple reprodicer for the problem when it does trigger. The problem was introduced with the conversion to the generic GUP code in commit 7b9afb86b632 ("sparc64: use the generic get_user_pages_fast code"), but nothing looks obviously wrong in that conversion. It may be a compiler bug that just hits us with the code reorganization. Or it may be something very specific to sparc64. This disables HAVE_FAST_GUP entirely. That makes things like futexes a bit slower, but at least they work. If we can figure out the trigger, that would be lovely, but it's been three months already.. Link: https://lore.kernel.org/lkml/20190717215956.GA30369@altlinux.org/ Fixes: 7b9afb86b632 ("sparc64: use the generic get_user_pages_fast code") Reported-by: Dmitry V Levin Reported-by: Anatoly Pugachev Requested-by: Meelis Roos Suggested-by: Christoph Hellwig Cc: David Miller Signed-off-by: Linus Torvalds --- arch/sparc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index fbc1aecf0f94..eb24cb1afc11 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -29,7 +29,6 @@ config SPARC select RTC_DRV_M48T59 select RTC_SYSTOHC select HAVE_ARCH_JUMP_LABEL if SPARC64 - select HAVE_FAST_GUP if SPARC64 select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_PCI_IOMAP From 9786340acaa33b3722925125e2df358d14b8ee12 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 23 Sep 2019 20:21:22 +0200 Subject: [PATCH 314/572] ARM: dts: bcm2835-rpi-zero-w: Fix bus-width of sdhci The commit e7774049ff25 ("ARM: dts: bcm283x: Define MMC interfaces at board level") accidently dropped the bus width for the sdhci on the RPi Zero W, because the board file was relying on the defaults from bcm2835-rpi.dtsi. So fix this performance regression by adding the bus width to the board file. Fixes: e7774049ff25 ("ARM: dts: bcm283x: Define MMC interfaces at board level") Reported-by: Phil Elwell Signed-off-by: Stefan Wahren Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm2835-rpi-zero-w.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index 09a088f98566..b75af21069f9 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -113,6 +113,7 @@ #address-cells = <1>; #size-cells = <0>; pinctrl-0 = <&emmc_gpio34 &gpclk2_gpio43>; + bus-width = <4>; mmc-pwrseq = <&wifi_pwrseq>; non-removable; status = "okay"; From b0818f80c8c1bc215bba276bd61c216014fab23b Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 11 Oct 2019 18:14:55 -0700 Subject: [PATCH 315/572] blackhole_netdev: fix syzkaller reported issue While invalidating the dst, we assign backhole_netdev instead of loopback device. However, this device does not have idev pointer and hence no ip6_ptr even if IPv6 is enabled. Possibly this has triggered the syzbot reported crash. The syzbot report does not have reproducer, however, this is the only device that doesn't have matching idev created. Crash instruction is : static inline bool ip6_ignore_linkdown(const struct net_device *dev) { const struct inet6_dev *idev = __in6_dev_get(dev); return !!idev->cnf.ignore_routes_with_linkdown; <= crash } Also ipv6 always assumes presence of idev and never checks for it being NULL (as does the above referenced code). So adding a idev for the blackhole_netdev to avoid this class of crashes in the future. Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 7 ++++++- net/ipv6/route.c | 15 ++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 34ccef18b40e..4c87594d1389 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6996,7 +6996,7 @@ static struct rtnl_af_ops inet6_ops __read_mostly = { int __init addrconf_init(void) { - struct inet6_dev *idev; + struct inet6_dev *idev, *bdev; int i, err; err = ipv6_addr_label_init(); @@ -7036,10 +7036,14 @@ int __init addrconf_init(void) */ rtnl_lock(); idev = ipv6_add_dev(init_net.loopback_dev); + bdev = ipv6_add_dev(blackhole_netdev); rtnl_unlock(); if (IS_ERR(idev)) { err = PTR_ERR(idev); goto errlo; + } else if (IS_ERR(bdev)) { + err = PTR_ERR(bdev); + goto errlo; } ip6_route_init_special_entries(); @@ -7124,6 +7128,7 @@ void addrconf_cleanup(void) addrconf_ifdown(dev, 1); } addrconf_ifdown(init_net.loopback_dev, 2); + addrconf_ifdown(blackhole_netdev, 2); /* * Check hash table. diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a63ff85fe141..742120728869 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -155,10 +155,9 @@ void rt6_uncached_list_del(struct rt6_info *rt) static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) { - struct net_device *loopback_dev = net->loopback_dev; int cpu; - if (dev == loopback_dev) + if (dev == net->loopback_dev) return; for_each_possible_cpu(cpu) { @@ -171,7 +170,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) struct net_device *rt_dev = rt->dst.dev; if (rt_idev->dev == dev) { - rt->rt6i_idev = in6_dev_get(loopback_dev); + rt->rt6i_idev = in6_dev_get(blackhole_netdev); in6_dev_put(rt_idev); } @@ -386,13 +385,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - struct net_device *loopback_dev = - dev_net(dev)->loopback_dev; - if (idev && idev->dev != loopback_dev) { - struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); - if (loopback_idev) { - rt->rt6i_idev = loopback_idev; + if (idev && idev->dev != dev_net(dev)->loopback_dev) { + struct inet6_dev *ibdev = in6_dev_get(blackhole_netdev); + if (ibdev) { + rt->rt6i_idev = ibdev; in6_dev_put(idev); } } From 92696286f3bb37ba50e4bd8d1beb24afb759a799 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 11 Oct 2019 12:53:49 -0700 Subject: [PATCH 316/572] net: bcmgenet: Set phydev->dev_flags only for internal PHYs phydev->dev_flags is entirely dependent on the PHY device driver which is going to be used, setting the internal GENET PHY revision in those bits only makes sense when drivers/net/phy/bcm7xxx.c is the PHY driver being used. Fixes: 487320c54143 ("net: bcmgenet: communicate integrated PHY revision to PHY driver") Signed-off-by: Florian Fainelli Acked-by: Doug Berger Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 970e478a9017..94d1dd5d56bf 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -273,11 +273,12 @@ int bcmgenet_mii_probe(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); struct device_node *dn = priv->pdev->dev.of_node; struct phy_device *phydev; - u32 phy_flags; + u32 phy_flags = 0; int ret; /* Communicate the integrated PHY revision */ - phy_flags = priv->gphy_rev; + if (priv->internal_phy) + phy_flags = priv->gphy_rev; /* Initialize link state variables that bcmgenet_mii_setup() uses */ priv->old_link = -1; From f913eac8e555bc776565a21ce84bf2bcc3715cbb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 11 Oct 2019 20:43:03 -0600 Subject: [PATCH 317/572] net: Update address for vrf and l3mdev in MAINTAINERS Use my kernel.org address for all entries in MAINTAINERS. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 55199ef7fa74..49e4b004917b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9126,7 +9126,7 @@ F: drivers/auxdisplay/ks0108.c F: include/linux/ks0108.h L3MDEV -M: David Ahern +M: David Ahern L: netdev@vger.kernel.org S: Maintained F: net/l3mdev @@ -17439,7 +17439,7 @@ F: include/linux/regulator/ K: regulator_get_optional VRF -M: David Ahern +M: David Ahern M: Shrijeet Mukherjee L: netdev@vger.kernel.org S: Maintained From ddc790e92b3afa4e366ffb41818cfcd19015031e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 11 Oct 2019 21:03:33 -0700 Subject: [PATCH 318/572] net: ethernet: broadcom: have drivers select DIMLIB as needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NET_VENDOR_BROADCOM is intended to control a kconfig menu only. It should not have anything to do with code generation. As such, it should not select DIMLIB for all drivers under NET_VENDOR_BROADCOM. Instead each driver that needs DIMLIB should select it (being the symbols SYSTEMPORT, BNXT, and BCMGENET). Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1907021810220.13058@ramsan.of.borg/ Fixes: 4f75da3666c0 ("linux/dim: Move implementation to .c files") Reported-by: Geert Uytterhoeven Signed-off-by: Randy Dunlap Cc: Uwe Kleine-König Cc: Tal Gilboa Cc: Saeed Mahameed Cc: netdev@vger.kernel.org Cc: linux-rdma@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Doug Ledford Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Or Gerlitz Cc: Sagi Grimberg Acked-by: Florian Fainelli Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index e24f5d2b6afe..53055ce5dfd6 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -8,7 +8,6 @@ config NET_VENDOR_BROADCOM default y depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \ SIBYTE_SB1xxx_SOC - select DIMLIB ---help--- If you have a network (Ethernet) chipset belonging to this class, say Y. @@ -69,6 +68,7 @@ config BCMGENET select FIXED_PHY select BCM7XXX_PHY select MDIO_BCM_UNIMAC + select DIMLIB help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset. @@ -188,6 +188,7 @@ config SYSTEMPORT select MII select PHYLIB select FIXED_PHY + select DIMLIB help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset using an internal @@ -200,6 +201,7 @@ config BNXT select LIBCRC32C select NET_DEVLINK select PAGE_POOL + select DIMLIB ---help--- This driver supports Broadcom NetXtreme-C/E 10/25/40/50 gigabit Ethernet cards. To compile this driver as a module, choose M here: From b14a39048c1156cfee76228bf449852da2f14df8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 10 Oct 2019 14:58:34 +0200 Subject: [PATCH 319/572] USB: ldusb: fix memleak on disconnect If disconnect() races with release() after a process has been interrupted, release() could end up returning early and the driver would fail to free its driver data. Fixes: 2824bd250f0b ("[PATCH] USB: add ldusb driver") Cc: stable # 2.6.13 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191010125835.27031-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/ldusb.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index f3108d85e768..147c90c2a4e5 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -380,10 +380,7 @@ static int ld_usb_release(struct inode *inode, struct file *file) goto exit; } - if (mutex_lock_interruptible(&dev->mutex)) { - retval = -ERESTARTSYS; - goto exit; - } + mutex_lock(&dev->mutex); if (dev->open_count != 1) { retval = -ENODEV; From b6c03e5f7b463efcafd1ce141bd5a8fc4e583ae2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 10 Oct 2019 14:58:35 +0200 Subject: [PATCH 320/572] USB: legousbtower: fix memleak on disconnect If disconnect() races with release() after a process has been interrupted, release() could end up returning early and the driver would fail to free its driver data. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191010125835.27031-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/legousbtower.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index 9d4c52a7ebe0..62dab2441ec4 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -419,10 +419,7 @@ static int tower_release (struct inode *inode, struct file *file) goto exit; } - if (mutex_lock_interruptible(&dev->lock)) { - retval = -ERESTARTSYS; - goto exit; - } + mutex_lock(&dev->lock); if (dev->open_count != 1) { dev_dbg(&dev->udev->dev, "%s: device not opened exactly once\n", From fd47a417e75e2506eb3672ae569b1c87e3774155 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 11 Oct 2019 17:11:15 +0300 Subject: [PATCH 321/572] USB: legousbtower: fix a signedness bug in tower_probe() The problem is that sizeof() is unsigned long so negative error codes are type promoted to high positive values and the condition becomes false. Fixes: 1d427be4a39d ("USB: legousbtower: fix slab info leak at probe") Signed-off-by: Dan Carpenter Acked-by: Johan Hovold Link: https://lore.kernel.org/r/20191011141115.GA4521@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/legousbtower.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index 62dab2441ec4..23061f1526b4 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -878,7 +878,7 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device get_version_reply, sizeof(*get_version_reply), 1000); - if (result < sizeof(*get_version_reply)) { + if (result != sizeof(*get_version_reply)) { if (result >= 0) result = -EIO; dev_err(idev, "get version request failed: %d\n", result); From f616c3bda47e314ddb797e969f27081f3c33e3b3 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Sun, 13 Oct 2019 10:20:20 +0100 Subject: [PATCH 322/572] usb: cdns3: Fix dequeue implementation. Dequeuing implementation in cdns3_gadget_ep_dequeue gets first request from deferred_req_list and changed TRB associated with it to LINK TRB. This approach is incorrect because deferred_req_list contains requests that have not been placed on hardware RING. In this case driver should just giveback this request to gadget driver. The patch implements new approach that first checks where dequeuing request is located and only when it's on Transfer Ring then changes TRB associated with it to LINK TRB. During processing completed transfers such LINK TRB will be ignored. Reported-by: Peter Chen Signed-off-by: Pawel Laszczak Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/1570958420-22196-1-git-send-email-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/gadget.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index 2ca280f4c054..9050b380ab83 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -1145,6 +1145,14 @@ static void cdns3_transfer_completed(struct cdns3_device *priv_dev, request = cdns3_next_request(&priv_ep->pending_req_list); priv_req = to_cdns3_request(request); + trb = priv_ep->trb_pool + priv_ep->dequeue; + + /* Request was dequeued and TRB was changed to TRB_LINK. */ + if (TRB_FIELD_TO_TYPE(trb->control) == TRB_LINK) { + trace_cdns3_complete_trb(priv_ep, trb); + cdns3_move_deq_to_next_trb(priv_req); + } + /* Re-select endpoint. It could be changed by other CPU during * handling usb_gadget_giveback_request. */ @@ -2067,6 +2075,7 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep, struct usb_request *req, *req_temp; struct cdns3_request *priv_req; struct cdns3_trb *link_trb; + u8 req_on_hw_ring = 0; unsigned long flags; int ret = 0; @@ -2083,8 +2092,10 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep, list_for_each_entry_safe(req, req_temp, &priv_ep->pending_req_list, list) { - if (request == req) + if (request == req) { + req_on_hw_ring = 1; goto found; + } } list_for_each_entry_safe(req, req_temp, &priv_ep->deferred_req_list, @@ -2096,27 +2107,21 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep, goto not_found; found: - - if (priv_ep->wa1_trb == priv_req->trb) - cdns3_wa1_restore_cycle_bit(priv_ep); - link_trb = priv_req->trb; - cdns3_move_deq_to_next_trb(priv_req); - cdns3_gadget_giveback(priv_ep, priv_req, -ECONNRESET); - - /* Update ring */ - request = cdns3_next_request(&priv_ep->deferred_req_list); - if (request) { - priv_req = to_cdns3_request(request); + /* Update ring only if removed request is on pending_req_list list */ + if (req_on_hw_ring) { link_trb->buffer = TRB_BUFFER(priv_ep->trb_pool_dma + (priv_req->start_trb * TRB_SIZE)); link_trb->control = (link_trb->control & TRB_CYCLE) | - TRB_TYPE(TRB_LINK) | TRB_CHAIN | TRB_TOGGLE; - } else { - priv_ep->flags |= EP_UPDATE_EP_TRBADDR; + TRB_TYPE(TRB_LINK) | TRB_CHAIN; + + if (priv_ep->wa1_trb == priv_req->trb) + cdns3_wa1_restore_cycle_bit(priv_ep); } + cdns3_gadget_giveback(priv_ep, priv_req, -ECONNRESET); + not_found: spin_unlock_irqrestore(&priv_dev->lock, flags); return ret; From b987b66ac3a2bc2f7b03a0ba48a07dc553100c07 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 14 Oct 2019 14:18:30 -0500 Subject: [PATCH 323/572] usb: udc: lpc32xx: fix bad bit shift operation It seems that the right variable to use in this case is *i*, instead of *n*, otherwise there is an undefined behavior when right shifiting by more than 31 bits when multiplying n by 8; notice that *n* can take values equal or greater than 4 (4, 8, 16, ...). Also, notice that under the current conditions (bl = 3), we are skiping the handling of bytes 3, 7, 31... So, fix this by updating this logic and limit *bl* up to 4 instead of up to 3. This fix is based on function udc_stuff_fifo(). Addresses-Coverity-ID: 1454834 ("Bad bit shift operation") Fixes: 24a28e428351 ("USB: gadget driver for LPC32xx") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20191014191830.GA10721@embeddedor Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/lpc32xx_udc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c index 2b1f3cc7819b..bf6c81e2f8cc 100644 --- a/drivers/usb/gadget/udc/lpc32xx_udc.c +++ b/drivers/usb/gadget/udc/lpc32xx_udc.c @@ -1177,11 +1177,11 @@ static void udc_pop_fifo(struct lpc32xx_udc *udc, u8 *data, u32 bytes) tmp = readl(USBD_RXDATA(udc->udp_baseaddr)); bl = bytes - n; - if (bl > 3) - bl = 3; + if (bl > 4) + bl = 4; for (i = 0; i < bl; i++) - data[n + i] = (u8) ((tmp >> (n * 8)) & 0xFF); + data[n + i] = (u8) ((tmp >> (i * 8)) & 0xFF); } break; From 7a759197974894213621aa65f0571b51904733d6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 15 Oct 2019 19:55:22 +0200 Subject: [PATCH 324/572] USB: usblp: fix use-after-free on disconnect A recent commit addressing a runtime PM use-count regression, introduced a use-after-free by not making sure we held a reference to the struct usb_interface for the lifetime of the driver data. Fixes: 9a31535859bf ("USB: usblp: fix runtime PM after driver unbind") Cc: stable Reported-by: syzbot+cd24df4d075c319ebfc5@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191015175522.18490-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index fb8bd60c83f4..0d8e3f3804a3 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -445,6 +445,7 @@ static void usblp_cleanup(struct usblp *usblp) kfree(usblp->readbuf); kfree(usblp->device_id_string); kfree(usblp->statusbuf); + usb_put_intf(usblp->intf); kfree(usblp); } @@ -1113,7 +1114,7 @@ static int usblp_probe(struct usb_interface *intf, init_waitqueue_head(&usblp->wwait); init_usb_anchor(&usblp->urbs); usblp->ifnum = intf->cur_altsetting->desc.bInterfaceNumber; - usblp->intf = intf; + usblp->intf = usb_get_intf(intf); /* Malloc device ID string buffer to the largest expected length, * since we can re-query it on an ioctl and a dynamic string @@ -1198,6 +1199,7 @@ abort: kfree(usblp->readbuf); kfree(usblp->statusbuf); kfree(usblp->device_id_string); + usb_put_intf(usblp->intf); kfree(usblp); abort_ret: return retval; From f50b6805dbb993152025ec04dea094c40cc93a0c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 13 Oct 2019 23:00:16 +0100 Subject: [PATCH 325/572] 8250-men-mcb: fix error checking when get_num_ports returns -ENODEV The current checking for failure on the number of ports fails when -ENODEV is returned from the call to get_num_ports. Fix this by making num_ports and loop counter i signed rather than unsigned ints. Also add check for num_ports being less than zero to check for -ve error returns. Addresses-Coverity: ("Unsigned compared against 0") Fixes: e2fea54e4592 ("8250-men-mcb: add support for 16z025 and 16z057") Signed-off-by: Colin Ian King Reviewed-by: Michael Moese Link: https://lore.kernel.org/r/20191013220016.9369-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_men_mcb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_men_mcb.c b/drivers/tty/serial/8250/8250_men_mcb.c index 02c5aff58a74..8df89e9cd254 100644 --- a/drivers/tty/serial/8250/8250_men_mcb.c +++ b/drivers/tty/serial/8250/8250_men_mcb.c @@ -72,8 +72,8 @@ static int serial_8250_men_mcb_probe(struct mcb_device *mdev, { struct serial_8250_men_mcb_data *data; struct resource *mem; - unsigned int num_ports; - unsigned int i; + int num_ports; + int i; void __iomem *membase; mem = mcb_get_resource(mdev, IORESOURCE_MEM); @@ -88,7 +88,7 @@ static int serial_8250_men_mcb_probe(struct mcb_device *mdev, dev_dbg(&mdev->dev, "found a 16z%03u with %u ports\n", mdev->id, num_ports); - if (num_ports == 0 || num_ports > 4) { + if (num_ports <= 0 || num_ports > 4) { dev_err(&mdev->dev, "unexpected number of ports: %u\n", num_ports); return -ENODEV; @@ -133,7 +133,7 @@ static int serial_8250_men_mcb_probe(struct mcb_device *mdev, static void serial_8250_men_mcb_remove(struct mcb_device *mdev) { - unsigned int num_ports, i; + int num_ports, i; struct serial_8250_men_mcb_data *data = mcb_get_drvdata(mdev); if (!data) From 95f89e090618efca63918b658c2002e57d393036 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 15 Oct 2019 17:16:50 +0200 Subject: [PATCH 326/572] vfio/type1: Initialize resv_msi_base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After enabling CONFIG_IOMMU_DMA on X86 a new warning appears when compiling vfio: drivers/vfio/vfio_iommu_type1.c: In function ‘vfio_iommu_type1_attach_group’: drivers/vfio/vfio_iommu_type1.c:1827:7: warning: ‘resv_msi_base’ may be used uninitialized in this function [-Wmaybe-uninitialized] ret = iommu_get_msi_cookie(domain->domain, resv_msi_base); ~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The warning is a false positive, because the call to iommu_get_msi_cookie() only happens when vfio_iommu_has_sw_msi() returned true. And that only happens when it also set resv_msi_base. But initialize the variable anyway to get rid of the warning. Signed-off-by: Joerg Roedel Reviewed-by: Cornelia Huck Reviewed-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 96fddc1dafc3..d864277ea16f 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1658,7 +1658,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, struct bus_type *bus = NULL; int ret; bool resv_msi, msi_remap; - phys_addr_t resv_msi_base; + phys_addr_t resv_msi_base = 0; struct iommu_domain_geometry geo; LIST_HEAD(iova_copy); LIST_HEAD(group_resv_regions); From 09d6ac8dc51a033ae0043c1fe40b4d02563c2496 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 15 Oct 2019 12:54:17 -0700 Subject: [PATCH 327/572] libata/ahci: Fix PCS quirk application Commit c312ef176399 "libata/ahci: Drop PCS quirk for Denverton and beyond" got the polarity wrong on the check for which board-ids should have the quirk applied. The board type board_ahci_pcs7 is defined at the end of the list such that "pcs7" boards can be special cased in the future if they need the quirk. All prior Intel board ids "< board_ahci_pcs7" should proceed with applying the quirk. Reported-by: Andreas Friedrich Reported-by: Stephen Douthit Fixes: c312ef176399 ("libata/ahci: Drop PCS quirk for Denverton and beyond") Cc: Signed-off-by: Dan Williams Signed-off-by: Jens Axboe --- drivers/ata/ahci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index dd92faf197d5..05c2b32dcc4d 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1600,7 +1600,9 @@ static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hp */ if (!id || id->vendor != PCI_VENDOR_ID_INTEL) return; - if (((enum board_ids) id->driver_data) < board_ahci_pcs7) + + /* Skip applying the quirk on Denverton and beyond */ + if (((enum board_ids) id->driver_data) >= board_ahci_pcs7) return; /* From 0c401fdf27ba52830c39670dc81cd8379756bd65 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 15 Oct 2019 13:52:03 -0700 Subject: [PATCH 328/572] xtensa: virt: fix PCI IO ports mapping virt device tree incorrectly uses 0xf0000000 on both sides of PCI IO ports address space mapping. This results in incorrect port address assignment in PCI IO BARs and subsequent crash on attempt to access them. Use 0 as base address in PCI IO ports address space. Signed-off-by: Max Filippov --- arch/xtensa/boot/dts/virt.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/boot/dts/virt.dts b/arch/xtensa/boot/dts/virt.dts index a9dcd87b6eb1..611b98a02a65 100644 --- a/arch/xtensa/boot/dts/virt.dts +++ b/arch/xtensa/boot/dts/virt.dts @@ -56,7 +56,7 @@ reg = <0xf0100000 0x03f00000>; // BUS_ADDRESS(3) CPU_PHYSICAL(1) SIZE(2) - ranges = <0x01000000 0x0 0xf0000000 0xf0000000 0x0 0x00010000>, + ranges = <0x01000000 0x0 0x00000000 0xf0000000 0x0 0x00010000>, <0x02000000 0x0 0xf4000000 0xf4000000 0x0 0x08000000>; // PCI_DEVICE(3) INT#(1) CONTROLLER(PHANDLE) CONTROLLER_DATA(2) From 45144d42f299455911cc29366656c7324a3a7c97 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Oct 2019 13:25:00 +0200 Subject: [PATCH 329/572] PCI: PM: Fix pci_power_up() There is an arbitrary difference between the system resume and runtime resume code paths for PCI devices regarding the delay to apply when switching the devices from D3cold to D0. Namely, pci_restore_standard_config() used in the runtime resume code path calls pci_set_power_state() which in turn invokes __pci_start_power_transition() to power up the device through the platform firmware and that function applies the transition delay (as per PCI Express Base Specification Revision 2.0, Section 6.6.1). However, pci_pm_default_resume_early() used in the system resume code path calls pci_power_up() which doesn't apply the delay at all and that causes issues to occur during resume from suspend-to-idle on some systems where the delay is required. Since there is no reason for that difference to exist, modify pci_power_up() to follow pci_set_power_state() more closely and invoke __pci_start_power_transition() from there to call the platform firmware to power up the device (in case that's necessary). Fixes: db288c9c5f9d ("PCI / PM: restore the original behavior of pci_set_power_state()") Reported-by: Daniel Drake Tested-by: Daniel Drake Link: https://lore.kernel.org/linux-pm/CAD8Lp44TYxrMgPLkHCqF9hv6smEurMXvmmvmtyFhZ6Q4SE+dig@mail.gmail.com/T/#m21be74af263c6a34f36e0fc5c77c5449d9406925 Signed-off-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas Cc: 3.10+ # 3.10+ --- drivers/pci/pci.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e7982af9a5d8..a97e2571a527 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -958,19 +958,6 @@ void pci_refresh_power_state(struct pci_dev *dev) pci_update_current_state(dev, dev->current_state); } -/** - * pci_power_up - Put the given device into D0 forcibly - * @dev: PCI device to power up - */ -void pci_power_up(struct pci_dev *dev) -{ - if (platform_pci_power_manageable(dev)) - platform_pci_set_power_state(dev, PCI_D0); - - pci_raw_set_power_state(dev, PCI_D0); - pci_update_current_state(dev, PCI_D0); -} - /** * pci_platform_power_transition - Use platform to change device power state * @dev: PCI device to handle. @@ -1153,6 +1140,17 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) } EXPORT_SYMBOL(pci_set_power_state); +/** + * pci_power_up - Put the given device into D0 forcibly + * @dev: PCI device to power up + */ +void pci_power_up(struct pci_dev *dev) +{ + __pci_start_power_transition(dev, PCI_D0); + pci_raw_set_power_state(dev, PCI_D0); + pci_update_current_state(dev, PCI_D0); +} + /** * pci_choose_state - Choose the power state of a PCI device * @dev: PCI device to be suspended From b31141d390f1d47f1f3e1aef04909dc48e894611 Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Sat, 12 Oct 2019 17:48:56 +0530 Subject: [PATCH 330/572] net: dsa: microchip: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header files related to Distributed Switch Architecture drivers for Microchip KSZ series switch support. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used) Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz9477_reg.h | 4 ++-- drivers/net/dsa/microchip/ksz_common.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index 2938e892b631..16939f29faa5 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * +/* SPDX-License-Identifier: GPL-2.0 */ +/* * Microchip KSZ9477 register definitions * * Copyright (C) 2017-2018 Microchip Technology Inc. diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index dd60d0837fc6..64c2677693d2 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Microchip switch driver common header +/* SPDX-License-Identifier: GPL-2.0 */ +/* Microchip switch driver common header * * Copyright (C) 2017-2019 Microchip Technology Inc. */ From a03681dd5d1bf7b0ed3aad3162f7810af4c4e05b Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Sat, 12 Oct 2019 18:42:28 +0530 Subject: [PATCH 331/572] net: cavium: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header files related to Cavium Ethernet drivers. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used) Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/common/cavium_ptp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.h b/drivers/net/ethernet/cavium/common/cavium_ptp.h index be2bafc7beeb..a04eccbc78e8 100644 --- a/drivers/net/ethernet/cavium/common/cavium_ptp.h +++ b/drivers/net/ethernet/cavium/common/cavium_ptp.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* cavium_ptp.h - PTP 1588 clock on Cavium hardware * Copyright (c) 2003-2015, 2017 Cavium, Inc. */ From dedc5a08da07874c6e0d411e7f39c5c2cf137014 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Sat, 12 Oct 2019 13:55:06 +0200 Subject: [PATCH 332/572] net: avoid errors when trying to pop MLPS header on non-MPLS packets the following script: # tc qdisc add dev eth0 clsact # tc filter add dev eth0 egress matchall action mpls pop implicitly makes the kernel drop all packets transmitted by eth0, if they don't have a MPLS header. This behavior is uncommon: other encapsulations (like VLAN) just let the packet pass unmodified. Since the result of MPLS 'pop' operation would be the same regardless of the presence / absence of MPLS header(s) in the original packet, we can let skb_mpls_pop() return 0 when dealing with non-MPLS packets. For the OVS use-case, this is acceptable because __ovs_nla_copy_actions() already ensures that MPLS 'pop' operation only occurs with packets having an MPLS Ethernet type (and there are no other callers in current code, so the semantic change should be ok). v2: better documentation of use-cases for skb_mpls_pop(), thanks to Simon Horman Fixes: 2a2ea50870ba ("net: sched: add mpls manipulation actions to TC") Reviewed-by: Simon Horman Acked-by: John Hurley Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8c178703467b..03b6809ebde4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5536,7 +5536,7 @@ int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto) int err; if (unlikely(!eth_p_mpls(skb->protocol))) - return -EINVAL; + return 0; err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); if (unlikely(err)) From fa4e0f8855fcba600e0be2575ee29c69166f74bd Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Sat, 12 Oct 2019 13:55:07 +0200 Subject: [PATCH 333/572] net/sched: fix corrupted L2 header with MPLS 'push' and 'pop' actions the following script: # tc qdisc add dev eth0 clsact # tc filter add dev eth0 egress protocol ip matchall \ > action mpls push protocol mpls_uc label 0x355aa bos 1 causes corruption of all IP packets transmitted by eth0. On TC egress, we can't rely on the value of skb->mac_len, because it's 0 and a MPLS 'push' operation will result in an overwrite of the first 4 octets in the packet L2 header (e.g. the Destination Address if eth0 is an Ethernet); the same error pattern is present also in the MPLS 'pop' operation. Fix this error in act_mpls data plane, computing 'mac_len' as the difference between the network header and the mac header (when not at TC ingress), and use it in MPLS 'push'/'pop' core functions. v2: unbreak 'make htmldocs' because of missing documentation of 'mac_len' in skb_mpls_pop(), reported by kbuild test robot CC: Lorenzo Bianconi Fixes: 2a2ea50870ba ("net: sched: add mpls manipulation actions to TC") Reviewed-by: Simon Horman Acked-by: John Hurley Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++-- net/core/skbuff.c | 19 +++++++++++-------- net/openvswitch/actions.c | 5 +++-- net/sched/act_mpls.c | 12 ++++++++---- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4351577b14d7..7914fdaf4226 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3510,8 +3510,9 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); -int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto); -int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto); +int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, + int mac_len); +int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len); int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse); int skb_mpls_dec_ttl(struct sk_buff *skb); struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 03b6809ebde4..867e61df00db 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5477,12 +5477,14 @@ static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr, * @skb: buffer * @mpls_lse: MPLS label stack entry to push * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848) + * @mac_len: length of the MAC header * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ -int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto) +int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, + int mac_len) { struct mpls_shim_hdr *lse; int err; @@ -5499,15 +5501,15 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto) return err; if (!skb->inner_protocol) { - skb_set_inner_network_header(skb, skb->mac_len); + skb_set_inner_network_header(skb, mac_len); skb_set_inner_protocol(skb, skb->protocol); } skb_push(skb, MPLS_HLEN); memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), - skb->mac_len); + mac_len); skb_reset_mac_header(skb); - skb_set_network_header(skb, skb->mac_len); + skb_set_network_header(skb, mac_len); lse = mpls_hdr(skb); lse->label_stack_entry = mpls_lse; @@ -5526,29 +5528,30 @@ EXPORT_SYMBOL_GPL(skb_mpls_push); * * @skb: buffer * @next_proto: ethertype of header after popped MPLS header + * @mac_len: length of the MAC header * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ -int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto) +int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len) { int err; if (unlikely(!eth_p_mpls(skb->protocol))) return 0; - err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); + err = skb_ensure_writable(skb, mac_len + MPLS_HLEN); if (unlikely(err)) return err; skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN); memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), - skb->mac_len); + mac_len); __skb_pull(skb, MPLS_HLEN); skb_reset_mac_header(skb); - skb_set_network_header(skb, skb->mac_len); + skb_set_network_header(skb, mac_len); if (skb->dev && skb->dev->type == ARPHRD_ETHER) { struct ethhdr *hdr; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 3572e11b6f21..1c77f520f474 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -165,7 +165,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, { int err; - err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype); + err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype, + skb->mac_len); if (err) return err; @@ -178,7 +179,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, { int err; - err = skb_mpls_pop(skb, ethertype); + err = skb_mpls_pop(skb, ethertype, skb->mac_len); if (err) return err; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index e168df0e008a..4cf6c553bb0b 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -55,7 +55,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_mpls *m = to_mpls(a); struct tcf_mpls_params *p; __be32 new_lse; - int ret; + int ret, mac_len; tcf_lastuse_update(&m->tcf_tm); bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); @@ -63,8 +63,12 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, /* Ensure 'data' points at mac_header prior calling mpls manipulating * functions. */ - if (skb_at_tc_ingress(skb)) + if (skb_at_tc_ingress(skb)) { skb_push_rcsum(skb, skb->mac_len); + mac_len = skb->mac_len; + } else { + mac_len = skb_network_header(skb) - skb_mac_header(skb); + } ret = READ_ONCE(m->tcf_action); @@ -72,12 +76,12 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, switch (p->tcfm_action) { case TCA_MPLS_ACT_POP: - if (skb_mpls_pop(skb, p->tcfm_proto)) + if (skb_mpls_pop(skb, p->tcfm_proto, mac_len)) goto drop; break; case TCA_MPLS_ACT_PUSH: new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol)); - if (skb_mpls_push(skb, new_lse, p->tcfm_proto)) + if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len)) goto drop; break; case TCA_MPLS_ACT_MODIFY: From c324345ce89c3cc50226372960619c7ee940f616 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 15 Oct 2019 17:37:37 -0700 Subject: [PATCH 334/572] Revert "Input: elantech - enable SMBus on new (2018+) systems" This reverts commit 883a2a80f79ca5c0c105605fafabd1f3df99b34c. Apparently use dmi_get_bios_year() as manufacturing date isn't accurate and this breaks older laptops with new BIOS update. So let's revert this patch. There are still new HP laptops still need to use SMBus to support all features, but it'll be enabled via a whitelist. Signed-off-by: Kai-Heng Feng Acked-by: Benjamin Tissoires Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191001070845.9720-1-kai.heng.feng@canonical.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 55 ++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 04fe43440a3c..2d8434b7b623 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1827,31 +1827,6 @@ static int elantech_create_smbus(struct psmouse *psmouse, leave_breadcrumbs); } -static bool elantech_use_host_notify(struct psmouse *psmouse, - struct elantech_device_info *info) -{ - if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version)) - return true; - - switch (info->bus) { - case ETP_BUS_PS2_ONLY: - /* expected case */ - break; - case ETP_BUS_SMB_HST_NTFY_ONLY: - case ETP_BUS_PS2_SMB_HST_NTFY: - /* SMbus implementation is stable since 2018 */ - if (dmi_get_bios_year() >= 2018) - return true; - /* fall through */ - default: - psmouse_dbg(psmouse, - "Ignoring SMBus bus provider %d\n", info->bus); - break; - } - - return false; -} - /** * elantech_setup_smbus - called once the PS/2 devices are enumerated * and decides to instantiate a SMBus InterTouch device. @@ -1871,7 +1846,7 @@ static int elantech_setup_smbus(struct psmouse *psmouse, * i2c_blacklist_pnp_ids. * Old ICs are up to the user to decide. */ - if (!elantech_use_host_notify(psmouse, info) || + if (!ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version) || psmouse_matches_pnp_id(psmouse, i2c_blacklist_pnp_ids)) return -ENXIO; } @@ -1891,6 +1866,34 @@ static int elantech_setup_smbus(struct psmouse *psmouse, return 0; } +static bool elantech_use_host_notify(struct psmouse *psmouse, + struct elantech_device_info *info) +{ + if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version)) + return true; + + switch (info->bus) { + case ETP_BUS_PS2_ONLY: + /* expected case */ + break; + case ETP_BUS_SMB_ALERT_ONLY: + /* fall-through */ + case ETP_BUS_PS2_SMB_ALERT: + psmouse_dbg(psmouse, "Ignoring SMBus provider through alert protocol.\n"); + break; + case ETP_BUS_SMB_HST_NTFY_ONLY: + /* fall-through */ + case ETP_BUS_PS2_SMB_HST_NTFY: + return true; + default: + psmouse_dbg(psmouse, + "Ignoring SMBus bus provider %d.\n", + info->bus); + } + + return false; +} + int elantech_init_smbus(struct psmouse *psmouse) { struct elantech_device_info info; From 4e3eff5beafa5c97cdf2af2181f46479941a7f45 Mon Sep 17 00:00:00 2001 From: MarkLee Date: Mon, 14 Oct 2019 15:15:17 +0800 Subject: [PATCH 335/572] net: ethernet: mediatek: Fix MT7629 missing GMII mode support In the original design, mtk_phy_connect function will set ge_mode=1 if phy-mode is GMII(PHY_INTERFACE_MODE_GMII) and then set the correct ge_mode to ETHSYS_SYSCFG0 register. This logic was broken after apply mediatek PHYLINK patch(Fixes tag), the new mtk_mac_config function will not set ge_mode=1 for GMII mode hence the final ETHSYS_SYSCFG0 setting will be incorrect for mt7629 GMII mode. This patch add the missing logic back to fix it. Fixes: b8fc9f30821e ("net: ethernet: mediatek: Add basic PHYLINK support") Signed-off-by: MarkLee Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c61069340f4f..703adb96429e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -261,6 +261,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, ge_mode = 0; switch (state->interface) { case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_GMII: ge_mode = 1; break; case PHY_INTERFACE_MODE_REVMII: From 2618500dd370da413cb1f616111e1bd8d9f5f94f Mon Sep 17 00:00:00 2001 From: MarkLee Date: Mon, 14 Oct 2019 15:15:18 +0800 Subject: [PATCH 336/572] arm: dts: mediatek: Update mt7629 dts to reflect the latest dt-binding * Removes mediatek,physpeed property from dtsi that is useless in PHYLINK * Use the fixed-link property speed = <2500> to set the phy in 2.5Gbit. * Set gmac1 to gmii mode that connect to a internal gphy Signed-off-by: MarkLee Signed-off-by: David S. Miller --- arch/arm/boot/dts/mt7629-rfb.dts | 13 ++++++++++++- arch/arm/boot/dts/mt7629.dtsi | 2 -- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/mt7629-rfb.dts b/arch/arm/boot/dts/mt7629-rfb.dts index 3621b7d2b22a..9980c10c6e29 100644 --- a/arch/arm/boot/dts/mt7629-rfb.dts +++ b/arch/arm/boot/dts/mt7629-rfb.dts @@ -66,9 +66,21 @@ pinctrl-1 = <&ephy_leds_pins>; status = "okay"; + gmac0: mac@0 { + compatible = "mediatek,eth-mac"; + reg = <0>; + phy-mode = "2500base-x"; + fixed-link { + speed = <2500>; + full-duplex; + pause; + }; + }; + gmac1: mac@1 { compatible = "mediatek,eth-mac"; reg = <1>; + phy-mode = "gmii"; phy-handle = <&phy0>; }; @@ -78,7 +90,6 @@ phy0: ethernet-phy@0 { reg = <0>; - phy-mode = "gmii"; }; }; }; diff --git a/arch/arm/boot/dts/mt7629.dtsi b/arch/arm/boot/dts/mt7629.dtsi index 9608bc2ccb3f..867b88103b9d 100644 --- a/arch/arm/boot/dts/mt7629.dtsi +++ b/arch/arm/boot/dts/mt7629.dtsi @@ -468,14 +468,12 @@ compatible = "mediatek,mt7629-sgmiisys", "syscon"; reg = <0x1b128000 0x3000>; #clock-cells = <1>; - mediatek,physpeed = "2500"; }; sgmiisys1: syscon@1b130000 { compatible = "mediatek,mt7629-sgmiisys", "syscon"; reg = <0x1b130000 0x3000>; #clock-cells = <1>; - mediatek,physpeed = "2500"; }; }; }; From cab209e571a9375f7dc6db69a6c40d2d98e57e3b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Oct 2019 06:47:57 -0700 Subject: [PATCH 337/572] tcp: fix a possible lockdep splat in tcp_done() syzbot found that if __inet_inherit_port() returns an error, we call tcp_done() after inet_csk_prepare_forced_close(), meaning the socket lock is no longer held. We might fix this in a different way in net-next, but for 5.4 it seems safer to relax the lockdep check. Fixes: d983ea6f16b8 ("tcp: add rcu protection around tp->fastopen_rsk") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b2ac4f074e2d..42187a3b82f4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3842,8 +3842,12 @@ void tcp_done(struct sock *sk) { struct request_sock *req; - req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, - lockdep_sock_is_held(sk)); + /* We might be called with a new socket, after + * inet_csk_prepare_forced_close() has been called + * so we can not use lockdep_sock_is_held(sk) + */ + req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1); + if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); From b790b5549bdf498ab0ecc5632610a9149532fa38 Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Mon, 14 Oct 2019 21:51:20 +0530 Subject: [PATCH 338/572] net: dsa: sja1105: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header files related to Distributed Switch Architecture drivers for NXP SJA1105 series Ethernet switch support. It uses an expilict block comment for the SPDX License Identifier. Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105.h | 4 ++-- drivers/net/dsa/sja1105/sja1105_dynamic_config.h | 4 ++-- drivers/net/dsa/sja1105/sja1105_ptp.h | 4 ++-- drivers/net/dsa/sja1105/sja1105_static_config.h | 4 ++-- drivers/net/dsa/sja1105/sja1105_tas.h | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index e53e494c22e0..fbb564c3beb8 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2018, Sensor-Technik Wiedemann GmbH +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Sensor-Technik Wiedemann GmbH * Copyright (c) 2018-2019, Vladimir Oltean */ #ifndef _SJA1105_H diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.h b/drivers/net/dsa/sja1105/sja1105_dynamic_config.h index 740dadf43f01..1fc0d13dc623 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.h +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2019, Vladimir Oltean +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Vladimir Oltean */ #ifndef _SJA1105_DYNAMIC_CONFIG_H #define _SJA1105_DYNAMIC_CONFIG_H diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h index af456b0a4d27..394e12a6ad59 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.h +++ b/drivers/net/dsa/sja1105/sja1105_ptp.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2019, Vladimir Oltean +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Vladimir Oltean */ #ifndef _SJA1105_PTP_H #define _SJA1105_PTP_H diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h index 7f87022a2d61..f4a5c5c04311 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.h +++ b/drivers/net/dsa/sja1105/sja1105_static_config.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright (c) 2016-2018, NXP Semiconductors +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2016-2018, NXP Semiconductors * Copyright (c) 2018-2019, Vladimir Oltean */ #ifndef _SJA1105_STATIC_CONFIG_H diff --git a/drivers/net/dsa/sja1105/sja1105_tas.h b/drivers/net/dsa/sja1105/sja1105_tas.h index 0b803c30e640..0aad212d88b2 100644 --- a/drivers/net/dsa/sja1105/sja1105_tas.h +++ b/drivers/net/dsa/sja1105/sja1105_tas.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2019, Vladimir Oltean +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Vladimir Oltean */ #ifndef _SJA1105_TAS_H #define _SJA1105_TAS_H From 39f13ea2f61b439ebe0060393e9c39925c9ee28c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Oct 2019 11:22:30 -0700 Subject: [PATCH 339/572] net: avoid potential infinite loop in tc_ctl_action() tc_ctl_action() has the ability to loop forever if tcf_action_add() returns -EAGAIN. This special case has been done in case a module needed to be loaded, but it turns out that tcf_add_notify() could also return -EAGAIN if the socket sk_rcvbuf limit is hit. We need to separate the two cases, and only loop for the module loading case. While we are at it, add a limit of 10 attempts since unbounded loops are always scary. syzbot repro was something like : socket(PF_NETLINK, SOCK_RAW|SOCK_NONBLOCK, NETLINK_ROUTE) = 3 write(3, ..., 38) = 38 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [0], 4) = 0 sendmsg(3, {msg_name(0)=NULL, msg_iov(1)=[{..., 388}], msg_controllen=0, msg_flags=0x10}, ...) NMI backtrace for cpu 0 CPU: 0 PID: 1054 Comm: khungtaskd Not tainted 5.4.0-rc1+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 nmi_cpu_backtrace.cold+0x70/0xb2 lib/nmi_backtrace.c:101 nmi_trigger_cpumask_backtrace+0x23b/0x28b lib/nmi_backtrace.c:62 arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38 trigger_all_cpu_backtrace include/linux/nmi.h:146 [inline] check_hung_uninterruptible_tasks kernel/hung_task.c:205 [inline] watchdog+0x9d0/0xef0 kernel/hung_task.c:289 kthread+0x361/0x430 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 8859 Comm: syz-executor910 Not tainted 5.4.0-rc1+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:arch_local_save_flags arch/x86/include/asm/paravirt.h:751 [inline] RIP: 0010:lockdep_hardirqs_off+0x1df/0x2e0 kernel/locking/lockdep.c:3453 Code: 5c 08 00 00 5b 41 5c 41 5d 5d c3 48 c7 c0 58 1d f3 88 48 ba 00 00 00 00 00 fc ff df 48 c1 e8 03 80 3c 10 00 0f 85 d3 00 00 00 <48> 83 3d 21 9e 99 07 00 0f 84 b9 00 00 00 9c 58 0f 1f 44 00 00 f6 RSP: 0018:ffff8880a6f3f1b8 EFLAGS: 00000046 RAX: 1ffffffff11e63ab RBX: ffff88808c9c6080 RCX: 0000000000000000 RDX: dffffc0000000000 RSI: 0000000000000000 RDI: ffff88808c9c6914 RBP: ffff8880a6f3f1d0 R08: ffff88808c9c6080 R09: fffffbfff16be5d1 R10: fffffbfff16be5d0 R11: 0000000000000003 R12: ffffffff8746591f R13: ffff88808c9c6080 R14: ffffffff8746591f R15: 0000000000000003 FS: 00000000011e4880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffff600400 CR3: 00000000a8920000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: trace_hardirqs_off+0x62/0x240 kernel/trace/trace_preemptirq.c:45 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline] _raw_spin_lock_irqsave+0x6f/0xcd kernel/locking/spinlock.c:159 __wake_up_common_lock+0xc8/0x150 kernel/sched/wait.c:122 __wake_up+0xe/0x10 kernel/sched/wait.c:142 netlink_unlock_table net/netlink/af_netlink.c:466 [inline] netlink_unlock_table net/netlink/af_netlink.c:463 [inline] netlink_broadcast_filtered+0x705/0xb80 net/netlink/af_netlink.c:1514 netlink_broadcast+0x3a/0x50 net/netlink/af_netlink.c:1534 rtnetlink_send+0xdd/0x110 net/core/rtnetlink.c:714 tcf_add_notify net/sched/act_api.c:1343 [inline] tcf_action_add+0x243/0x370 net/sched/act_api.c:1362 tc_ctl_action+0x3b5/0x4bc net/sched/act_api.c:1410 rtnetlink_rcv_msg+0x463/0xb00 net/core/rtnetlink.c:5386 netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5404 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0x531/0x710 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x8a5/0xd60 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:657 ___sys_sendmsg+0x803/0x920 net/socket.c:2311 __sys_sendmsg+0x105/0x1d0 net/socket.c:2356 __do_sys_sendmsg net/socket.c:2365 [inline] __se_sys_sendmsg net/socket.c:2363 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2363 do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x440939 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot+cf0adbb9c28c8866c788@syzkaller.appspotmail.com Signed-off-by: David S. Miller --- net/sched/act_api.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 4e7429c6f864..69d4676a402f 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1353,11 +1353,16 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, struct netlink_ext_ack *extack) { size_t attr_size = 0; - int ret = 0; + int loop, ret; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; - ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, actions, - &attr_size, true, extack); + for (loop = 0; loop < 10; loop++) { + ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, + actions, &attr_size, true, extack); + if (ret != -EAGAIN) + break; + } + if (ret < 0) return ret; ret = tcf_add_notify(net, n, actions, portid, attr_size, extack); @@ -1407,11 +1412,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, */ if (n->nlmsg_flags & NLM_F_REPLACE) ovr = 1; -replay: ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr, extack); - if (ret == -EAGAIN) - goto replay; break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, From 28aa7c86c2b49f659c8460a89e53b506c45979bb Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 14 Oct 2019 13:38:22 -0700 Subject: [PATCH 340/572] sched: etf: Fix ordering of packets with same txtime When a application sends many packets with the same txtime, they may be transmitted out of order (different from the order in which they were enqueued). This happens because when inserting elements into the tree, when the txtime of two packets are the same, the new packet is inserted at the left side of the tree, causing the reordering. The only effect of this change should be that packets with the same txtime will be transmitted in the order they are enqueued. The application in question (the AVTP GStreamer plugin, still in development) is sending video traffic, in which each video frame have a single presentation time, the problem is that when packetizing, multiple packets end up with the same txtime. The receiving side was rejecting packets because they were being received out of order. Fixes: 25db26a91364 ("net/sched: Introduce the ETF Qdisc") Reported-by: Ederson de Souza Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_etf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index cebfb65d8556..b1da5589a0c6 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -177,7 +177,7 @@ static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, parent = *p; skb = rb_to_skb(parent); - if (ktime_after(txtime, skb->tstamp)) { + if (ktime_compare(txtime, skb->tstamp) >= 0) { p = &parent->rb_right; leftmost = false; } else { From 63dfb7938b13fa2c2fbcb45f34d065769eb09414 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 15 Oct 2019 15:24:38 +0800 Subject: [PATCH 341/572] sctp: change sctp_prot .no_autobind with true syzbot reported a memory leak: BUG: memory leak, unreferenced object 0xffff888120b3d380 (size 64): backtrace: [...] slab_alloc mm/slab.c:3319 [inline] [...] kmem_cache_alloc+0x13f/0x2c0 mm/slab.c:3483 [...] sctp_bucket_create net/sctp/socket.c:8523 [inline] [...] sctp_get_port_local+0x189/0x5a0 net/sctp/socket.c:8270 [...] sctp_do_bind+0xcc/0x200 net/sctp/socket.c:402 [...] sctp_bindx_add+0x4b/0xd0 net/sctp/socket.c:497 [...] sctp_setsockopt_bindx+0x156/0x1b0 net/sctp/socket.c:1022 [...] sctp_setsockopt net/sctp/socket.c:4641 [inline] [...] sctp_setsockopt+0xaea/0x2dc0 net/sctp/socket.c:4611 [...] sock_common_setsockopt+0x38/0x50 net/core/sock.c:3147 [...] __sys_setsockopt+0x10f/0x220 net/socket.c:2084 [...] __do_sys_setsockopt net/socket.c:2100 [inline] It was caused by when sending msgs without binding a port, in the path: inet_sendmsg() -> inet_send_prepare() -> inet_autobind() -> .get_port/sctp_get_port(), sp->bind_hash will be set while bp->port is not. Later when binding another port by sctp_setsockopt_bindx(), a new bucket will be created as bp->port is not set. sctp's autobind is supposed to call sctp_autobind() where it does all things including setting bp->port. Since sctp_autobind() is called in sctp_sendmsg() if the sk is not yet bound, it should have skipped the auto bind. THis patch is to avoid calling inet_autobind() in inet_send_prepare() by changing sctp_prot .no_autobind with true, also remove the unused .get_port. Reported-by: syzbot+d44f7bbebdea49dbc84a@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 939b8d2595bc..5ca0ec0e823c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9500,7 +9500,7 @@ struct proto sctp_prot = { .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, - .get_port = sctp_get_port, + .no_autobind = true, .obj_size = sizeof(struct sctp_sock), .useroffset = offsetof(struct sctp_sock, subscribe), .usersize = offsetof(struct sctp_sock, initmsg) - @@ -9542,7 +9542,7 @@ struct proto sctpv6_prot = { .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, - .get_port = sctp_get_port, + .no_autobind = true, .obj_size = sizeof(struct sctp6_sock), .useroffset = offsetof(struct sctp6_sock, sctp.subscribe), .usersize = offsetof(struct sctp6_sock, sctp.initmsg) - From 61c1d33daf7b5146f44d4363b3322f8cda6a6c43 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Tue, 15 Oct 2019 16:42:45 +0200 Subject: [PATCH 342/572] net: i82596: fix dma_alloc_attr for sni_82596 Commit 7f683b920479 ("i825xx: switch to switch to dma_alloc_attrs") switched dma allocation over to dma_alloc_attr, but didn't convert the SNI part to request consistent DMA memory. This broke sni_82596 since driver doesn't do dma_cache_sync for performance reasons. Fix this by using different DMA_ATTRs for lasi_82596 and sni_82596. Fixes: 7f683b920479 ("i825xx: switch to switch to dma_alloc_attrs") Signed-off-by: Thomas Bogendoerfer Signed-off-by: David S. Miller --- drivers/net/ethernet/i825xx/lasi_82596.c | 4 +++- drivers/net/ethernet/i825xx/lib82596.c | 4 ++-- drivers/net/ethernet/i825xx/sni_82596.c | 4 +++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/i825xx/lasi_82596.c b/drivers/net/ethernet/i825xx/lasi_82596.c index 211c5f74b4c8..aec7e98bcc85 100644 --- a/drivers/net/ethernet/i825xx/lasi_82596.c +++ b/drivers/net/ethernet/i825xx/lasi_82596.c @@ -96,6 +96,8 @@ #define OPT_SWAP_PORT 0x0001 /* Need to wordswp on the MPU port */ +#define LIB82596_DMA_ATTR DMA_ATTR_NON_CONSISTENT + #define DMA_WBACK(ndev, addr, len) \ do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_TO_DEVICE); } while (0) @@ -200,7 +202,7 @@ static int __exit lan_remove_chip(struct parisc_device *pdev) unregister_netdev (dev); dma_free_attrs(&pdev->dev, sizeof(struct i596_private), lp->dma, - lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + lp->dma_addr, LIB82596_DMA_ATTR); free_netdev (dev); return 0; } diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index 1274ad24d6af..f9742af7f142 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -1065,7 +1065,7 @@ static int i82596_probe(struct net_device *dev) dma = dma_alloc_attrs(dev->dev.parent, sizeof(struct i596_dma), &lp->dma_addr, GFP_KERNEL, - DMA_ATTR_NON_CONSISTENT); + LIB82596_DMA_ATTR); if (!dma) { printk(KERN_ERR "%s: Couldn't get shared memory\n", __FILE__); return -ENOMEM; @@ -1087,7 +1087,7 @@ static int i82596_probe(struct net_device *dev) i = register_netdev(dev); if (i) { dma_free_attrs(dev->dev.parent, sizeof(struct i596_dma), - dma, lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + dma, lp->dma_addr, LIB82596_DMA_ATTR); return i; } diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c index 6eb6c2ff7f09..6436a98c5953 100644 --- a/drivers/net/ethernet/i825xx/sni_82596.c +++ b/drivers/net/ethernet/i825xx/sni_82596.c @@ -24,6 +24,8 @@ static const char sni_82596_string[] = "snirm_82596"; +#define LIB82596_DMA_ATTR 0 + #define DMA_WBACK(priv, addr, len) do { } while (0) #define DMA_INV(priv, addr, len) do { } while (0) #define DMA_WBACK_INV(priv, addr, len) do { } while (0) @@ -152,7 +154,7 @@ static int sni_82596_driver_remove(struct platform_device *pdev) unregister_netdev(dev); dma_free_attrs(dev->dev.parent, sizeof(struct i596_private), lp->dma, - lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + lp->dma_addr, LIB82596_DMA_ATTR); iounmap(lp->ca); iounmap(lp->mpu_port); free_netdev (dev); From 539825a536040e09a2f26eb54bef206e8e8086b5 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Tue, 15 Oct 2019 17:15:58 +0100 Subject: [PATCH 343/572] davinci_cpdma: make cpdma_chan_split_pool static The cpdma_chan_split_pool() function is not used outside of the driver, so make it static to avoid the following sparse warning: drivers/net/ethernet/ti/davinci_cpdma.c:725:5: warning: symbol 'cpdma_chan_split_pool' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_cpdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index a65edd2770e6..37ba708ac781 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -722,7 +722,7 @@ static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr, * cpdma_chan_split_pool - Splits ctrl pool between all channels. * Has to be called under ctlr lock */ -int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) +static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) { int tx_per_ch_desc = 0, rx_per_ch_desc = 0; int free_rx_num = 0, free_tx_num = 0; From bad28d889caea00bc9415c59fa6a433a72eeaf5e Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Tue, 15 Oct 2019 17:17:48 +0100 Subject: [PATCH 344/572] net: stmmac: make tc_flow_parsers static The tc_flow_parsers is not used outside of the driver, so make it static to avoid the following sparse warning: drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:516:3: warning: symbol 'tc_flow_parsers' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index e231098061b6..f9a9a9d82233 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -510,7 +510,7 @@ static struct stmmac_flow_entry *tc_find_flow(struct stmmac_priv *priv, return NULL; } -struct { +static struct { int (*fn)(struct stmmac_priv *priv, struct flow_cls_offload *cls, struct stmmac_flow_entry *entry); } tc_flow_parsers[] = { From efb86fede98cdc70b674692ff617b1162f642c49 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 15 Oct 2019 10:45:47 -0700 Subject: [PATCH 345/572] net: bcmgenet: Fix RGMII_MODE_EN value for GENET v1/2/3 The RGMII_MODE_EN bit value was 0 for GENET versions 1 through 3, and became 6 for GENET v4 and above, account for that difference. Fixes: aa09677cba42 ("net: bcmgenet: add MDIO routines") Signed-off-by: Florian Fainelli Acked-by: Doug Berger Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 1 + drivers/net/ethernet/broadcom/genet/bcmmii.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 4a8fc03d82fd..dbc69d8fa05f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -366,6 +366,7 @@ struct bcmgenet_mib_counters { #define EXT_PWR_DOWN_PHY_EN (1 << 20) #define EXT_RGMII_OOB_CTRL 0x0C +#define RGMII_MODE_EN_V123 (1 << 0) #define RGMII_LINK (1 << 4) #define OOB_DISABLE (1 << 5) #define RGMII_MODE_EN (1 << 6) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 94d1dd5d56bf..e7c291bf4ed1 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -258,7 +258,11 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) */ if (priv->ext_phy) { reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); - reg |= RGMII_MODE_EN | id_mode_dis; + reg |= id_mode_dis; + if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) + reg |= RGMII_MODE_EN_V123; + else + reg |= RGMII_MODE_EN; bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); } From 77b6d09f4ae66d42cd63b121af67780ae3d1a5e9 Mon Sep 17 00:00:00 2001 From: Valentin Vidic Date: Tue, 15 Oct 2019 22:20:20 +0200 Subject: [PATCH 346/572] net: usb: sr9800: fix uninitialized local variable Make sure res does not contain random value if the call to sr_read_cmd fails for some reason. Reported-by: syzbot+f1842130bbcfb335bac1@syzkaller.appspotmail.com Signed-off-by: Valentin Vidic Signed-off-by: David S. Miller --- drivers/net/usb/sr9800.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index c5d4a0060124..681e0def6356 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -335,7 +335,7 @@ static void sr_set_multicast(struct net_device *net) static int sr_mdio_read(struct net_device *net, int phy_id, int loc) { struct usbnet *dev = netdev_priv(net); - __le16 res; + __le16 res = 0; mutex_lock(&dev->phy_mutex); sr_set_sw_mii(dev); From 5bf4e52ff0317db083fafee010dc806f8d4cb0cb Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Tue, 8 Oct 2019 14:45:24 +0800 Subject: [PATCH 347/572] RISC-V: fix virtual address overlapped in FIXADDR_START and VMEMMAP_START This patch fixes the virtual address layout in pgtable.h. The virtual address of FIXADDR_START and VMEMMAP_START should not be overlapped. Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") Signed-off-by: Greentime Hu Reviewed-by: Anup Patel [paul.walmsley@sifive.com: fixed patch description] Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/pgtable.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7255f2d8395b..42292d99cc74 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -87,14 +87,6 @@ extern pgd_t swapper_pg_dir[]; #define VMALLOC_END (PAGE_OFFSET - 1) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) -#define FIXADDR_TOP VMALLOC_START -#ifdef CONFIG_64BIT -#define FIXADDR_SIZE PMD_SIZE -#else -#define FIXADDR_SIZE PGDIR_SIZE -#endif -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - /* * Roughly size the vmemmap space to be large enough to fit enough * struct pages to map half the virtual address space. Then @@ -108,6 +100,14 @@ extern pgd_t swapper_pg_dir[]; #define vmemmap ((struct page *)VMEMMAP_START) +#define FIXADDR_TOP (VMEMMAP_START) +#ifdef CONFIG_64BIT +#define FIXADDR_SIZE PMD_SIZE +#else +#define FIXADDR_SIZE PGDIR_SIZE +#endif +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + /* * ZERO_PAGE is a global shared page that is always zero, * used for zero-mapped memory areas, etc. From 775fd6bfefc66a8c33e91dd9687ed530643b954d Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 15 Oct 2019 21:51:43 -0700 Subject: [PATCH 348/572] xtensa: fix change_bit in exclusive access option change_bit implementation for XCHAL_HAVE_EXCLUSIVE case changes all bits except the one required due to copy-paste error from clear_bit. Cc: stable@vger.kernel.org # v5.2+ Fixes: f7c34874f04a ("xtensa: add exclusive atomics support") Signed-off-by: Max Filippov --- arch/xtensa/include/asm/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/include/asm/bitops.h b/arch/xtensa/include/asm/bitops.h index aeb15f4c755b..be8b2be5a98b 100644 --- a/arch/xtensa/include/asm/bitops.h +++ b/arch/xtensa/include/asm/bitops.h @@ -148,7 +148,7 @@ static inline void change_bit(unsigned int bit, volatile unsigned long *p) " getex %0\n" " beqz %0, 1b\n" : "=&a" (tmp) - : "a" (~mask), "a" (p) + : "a" (mask), "a" (p) : "memory"); } From 6f1d1dc8d540a9aa6e39b9cb86d3a67bbc1c8d8d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Oct 2019 11:57:35 +0200 Subject: [PATCH 349/572] USB: serial: ti_usb_3410_5052: fix port-close races Fix races between closing a port and opening or closing another port on the same device which could lead to a failure to start or stop the shared interrupt URB. The latter could potentially cause a use-after-free or worse in the completion handler on driver unbind. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ti_usb_3410_5052.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index dd0ad67aa71e..9174ba2e06da 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -776,7 +776,6 @@ static void ti_close(struct usb_serial_port *port) struct ti_port *tport; int port_number; int status; - int do_unlock; unsigned long flags; tdev = usb_get_serial_data(port->serial); @@ -800,16 +799,13 @@ static void ti_close(struct usb_serial_port *port) "%s - cannot send close port command, %d\n" , __func__, status); - /* if mutex_lock is interrupted, continue anyway */ - do_unlock = !mutex_lock_interruptible(&tdev->td_open_close_lock); + mutex_lock(&tdev->td_open_close_lock); --tport->tp_tdev->td_open_port_count; - if (tport->tp_tdev->td_open_port_count <= 0) { + if (tport->tp_tdev->td_open_port_count == 0) { /* last port is closed, shut down interrupt urb */ usb_kill_urb(port->serial->port[0]->interrupt_in_urb); - tport->tp_tdev->td_open_port_count = 0; } - if (do_unlock) - mutex_unlock(&tdev->td_open_close_lock); + mutex_unlock(&tdev->td_open_close_lock); } From bc25770f00d3f4e7482278f9823c2c2793605484 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Oct 2019 11:57:36 +0200 Subject: [PATCH 350/572] USB: serial: ti_usb_3410_5052: clean up serial data access Use the tdev pointer directly instead of going through the port data when accessing the serial data in close(). Signed-off-by: Johan Hovold --- drivers/usb/serial/ti_usb_3410_5052.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 9174ba2e06da..ef23acc9b9ce 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -800,8 +800,8 @@ static void ti_close(struct usb_serial_port *port) , __func__, status); mutex_lock(&tdev->td_open_close_lock); - --tport->tp_tdev->td_open_port_count; - if (tport->tp_tdev->td_open_port_count == 0) { + --tdev->td_open_port_count; + if (tdev->td_open_port_count == 0) { /* last port is closed, shut down interrupt urb */ usb_kill_urb(port->serial->port[0]->interrupt_in_urb); } From 2d8b39a62a5d386a73f339e46fe05354a3a4895b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 15 Oct 2019 19:35:20 +0200 Subject: [PATCH 351/572] ACPI: processor: Avoid NULL pointer dereferences at init time If there are neither processor objects nor processor device objects in the ACPI tables, the per-CPU processors table will not be initialized and attempting to dereference pointers from there will cause the kernel to crash. This happens in acpi_processor_ppc_init() and acpi_thermal_cpufreq_init() after commit d15ce412737a ("ACPI: cpufreq: Switch to QoS requests instead of cpufreq notifier") which didn't add the requisite NULL pointer checks in there. Add the NULL pointer checks to acpi_processor_ppc_init() and acpi_thermal_cpufreq_init(), and to the corresponding "exit" routines. While at it, drop redundant return instructions from acpi_processor_ppc_init() and acpi_thermal_cpufreq_init(). Fixes: d15ce412737a ("ACPI: cpufreq: Switch to QoS requests instead of cpufreq notifier") Reported-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/acpi/processor_perflib.c | 10 ++++++---- drivers/acpi/processor_thermal.c | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 2261713d1aec..930a49fa4dfc 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -162,21 +162,23 @@ void acpi_processor_ppc_init(int cpu) struct acpi_processor *pr = per_cpu(processors, cpu); int ret; + if (!pr) + return; + ret = dev_pm_qos_add_request(get_cpu_device(cpu), &pr->perflib_req, DEV_PM_QOS_MAX_FREQUENCY, INT_MAX); - if (ret < 0) { + if (ret < 0) pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, ret); - return; - } } void acpi_processor_ppc_exit(int cpu) { struct acpi_processor *pr = per_cpu(processors, cpu); - dev_pm_qos_remove_request(&pr->perflib_req); + if (pr) + dev_pm_qos_remove_request(&pr->perflib_req); } static int acpi_processor_get_performance_control(struct acpi_processor *pr) diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index ec2638f1df4f..8227c7dd75b1 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -130,21 +130,23 @@ void acpi_thermal_cpufreq_init(int cpu) struct acpi_processor *pr = per_cpu(processors, cpu); int ret; + if (!pr) + return; + ret = dev_pm_qos_add_request(get_cpu_device(cpu), &pr->thermal_req, DEV_PM_QOS_MAX_FREQUENCY, INT_MAX); - if (ret < 0) { + if (ret < 0) pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, ret); - return; - } } void acpi_thermal_cpufreq_exit(int cpu) { struct acpi_processor *pr = per_cpu(processors, cpu); - dev_pm_qos_remove_request(&pr->thermal_req); + if (pr) + dev_pm_qos_remove_request(&pr->thermal_req); } #else /* ! CONFIG_CPU_FREQ */ static int cpufreq_get_max_state(unsigned int cpu) From 8f1c9dffe30b04219690f8e94a27b2ab977a66f2 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 11 Oct 2019 07:55:45 +0000 Subject: [PATCH 352/572] pinctrl: berlin: as370: fix a typo s/spififib/spdifib The function should be spdifib, fix this typo. Fixes: 423ddc580b13 ("pinctrl: berlin: add the as370 SoC pinctrl driver") Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20191011154321.44f08f9a@xhacker.debian Signed-off-by: Linus Walleij --- drivers/pinctrl/berlin/pinctrl-as370.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/berlin/pinctrl-as370.c b/drivers/pinctrl/berlin/pinctrl-as370.c index 44f8ccdbeeff..9dfdc275ee33 100644 --- a/drivers/pinctrl/berlin/pinctrl-as370.c +++ b/drivers/pinctrl/berlin/pinctrl-as370.c @@ -43,7 +43,7 @@ static const struct berlin_desc_group as370_soc_pinctrl_groups[] = { BERLIN_PINCTRL_FUNCTION(0x0, "gpio"), /* GPIO5 */ BERLIN_PINCTRL_FUNCTION(0x1, "i2s1"), /* DO3 */ BERLIN_PINCTRL_FUNCTION(0x2, "pwm"), /* PWM5 */ - BERLIN_PINCTRL_FUNCTION(0x3, "spififib"), /* SPDIFIB */ + BERLIN_PINCTRL_FUNCTION(0x3, "spdifib"), /* SPDIFIB */ BERLIN_PINCTRL_FUNCTION(0x4, "spdifo"), /* SPDIFO */ BERLIN_PINCTRL_FUNCTION(0x5, "phy")), /* DBG5 */ BERLIN_PINCTRL_GROUP("I2S1_MCLK", 0x0, 0x3, 0x12, From f418dddffc8007945fd5962380ebde770a240cf5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 16 Oct 2019 23:27:32 +1100 Subject: [PATCH 353/572] usercopy: Avoid soft lockups in test_check_nonzero_user() On a machine with a 64K PAGE_SIZE, the nested for loops in test_check_nonzero_user() can lead to soft lockups, eg: watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [modprobe:611] Modules linked in: test_user_copy(+) vmx_crypto gf128mul crc32c_vpmsum virtio_balloon ip_tables x_tables autofs4 CPU: 4 PID: 611 Comm: modprobe Tainted: G L 5.4.0-rc1-gcc-8.2.0-00001-gf5a1a536fa14-dirty #1151 ... NIP __might_sleep+0x20/0xc0 LR __might_fault+0x40/0x60 Call Trace: check_zeroed_user+0x12c/0x200 test_user_copy_init+0x67c/0x1210 [test_user_copy] do_one_initcall+0x60/0x340 do_init_module+0x7c/0x2f0 load_module+0x2d94/0x30e0 __do_sys_finit_module+0xc8/0x150 system_call+0x5c/0x68 Even with a 4K PAGE_SIZE the test takes multiple seconds. Instead tweak it to only scan a 1024 byte region, but make it cross the page boundary. Fixes: f5a1a536fa14 ("lib: introduce copy_struct_from_user() helper") Suggested-by: Aleksa Sarai Signed-off-by: Michael Ellerman Reviewed-by: Aleksa Sarai Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20191016122732.13467-1-mpe@ellerman.id.au Signed-off-by: Christian Brauner --- lib/test_user_copy.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c index ad2372727b1b..5ff04d8fe971 100644 --- a/lib/test_user_copy.c +++ b/lib/test_user_copy.c @@ -47,9 +47,25 @@ static bool is_zeroed(void *from, size_t size) static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size) { int ret = 0; - size_t start, end, i; - size_t zero_start = size / 4; - size_t zero_end = size - zero_start; + size_t start, end, i, zero_start, zero_end; + + if (test(size < 2 * PAGE_SIZE, "buffer too small")) + return -EINVAL; + + /* + * We want to cross a page boundary to exercise the code more + * effectively. We also don't want to make the size we scan too large, + * otherwise the test can take a long time and cause soft lockups. So + * scan a 1024 byte region across the page boundary. + */ + size = 1024; + start = PAGE_SIZE - (size / 2); + + kmem += start; + umem += start; + + zero_start = size / 4; + zero_end = size - zero_start; /* * We conduct a series of check_nonzero_user() tests on a block of From 1abecfcaa7bba21c9985e0136fa49836164dd8fd Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Wed, 16 Oct 2019 16:38:45 +0800 Subject: [PATCH 354/572] perf kmem: Fix memory leak in compact_gfp_flags() The memory @orig_flags is allocated by strdup(), it is freed on the normal path, but leak to free on the error path. Fix this by adding free(orig_flags) on the error path. Fixes: 0e11115644b3 ("perf kmem: Print gfp flags in human readable string") Signed-off-by: Yunfeng Ye Cc: Alexander Shishkin Cc: Feilong Lin Cc: Hu Shiyuan Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/f9e9f458-96f3-4a97-a1d5-9feec2420e07@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 1e61e353f579..9661671cc26e 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -691,6 +691,7 @@ static char *compact_gfp_flags(char *gfp_flags) new = realloc(new_flags, len + strlen(cpt) + 2); if (new == NULL) { free(new_flags); + free(orig_flags); return NULL; } From 9091a0698be23561b2e1898139a62f8ba6d61d2d Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Tue, 8 Oct 2019 15:11:47 +1030 Subject: [PATCH 355/572] dt-bindings: pinctrl: aspeed-g6: Rework SD3 function and groups Rename SD3 functions and groups to EMMC to better reflect their intended use before the binding escapes too far into the wild. Also clean up the SD3 pin groups to eliminate some silliness that slipped through the cracks (SD3DAT[4-7]) by unifying them into three new groups: EMMCG1, EMMCG4 and EMMCG8 for 1, 4 and 8-bit data buses respectively. Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20191008044153.12734-2-andrew@aj.id.au Reviewed-by: Rob Herring Reviewed-by: Joel Stanley Signed-off-by: Linus Walleij --- .../pinctrl/aspeed,ast2600-pinctrl.yaml | 86 +++++++++---------- 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml index f83d888176cc..064b7dfc4252 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml @@ -33,13 +33,13 @@ patternProperties: allOf: - $ref: "/schemas/types.yaml#/definitions/string" - enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, - ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, ESPI, - ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, - GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, - GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, I2C1, I2C10, I2C11, I2C12, - I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6, I2C7, - I2C8, I2C9, I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ, LPC, - LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2, + ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMC, + ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, + GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, + GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, I2C1, I2C10, I2C11, + I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6, + I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ, + LPC, LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2, MACLINK3, MACLINK4, MDIO1, MDIO2, MDIO3, MDIO4, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1, NDCD2, NDCD3, NDCD4, NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, NRI1, NRI2, NRI3, NRI4, NRTS1, @@ -48,47 +48,45 @@ patternProperties: PWM8, PWM9, RGMII1, RGMII2, RGMII3, RGMII4, RMII1, RMII2, RMII3, RMII4, RXD1, RXD2, RXD3, RXD4, SALT1, SALT10, SALT11, SALT12, SALT13, SALT14, SALT15, SALT16, SALT2, SALT3, SALT4, SALT5, - SALT6, SALT7, SALT8, SALT9, SD1, SD2, SD3, SD3DAT4, SD3DAT5, - SD3DAT6, SD3DAT7, SGPM1, SGPS1, SIOONCTRL, SIOPBI, SIOPBO, - SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, SPI1ABR, SPI1CS1, - SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1, TACH10, TACH11, - TACH12, TACH13, TACH14, TACH15, TACH2, TACH3, TACH4, TACH5, - TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2, THRU3, TXD1, - TXD2, TXD3, TXD4, UART10, UART11, UART12, UART13, UART6, UART7, - UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3, - WDTRST4, ] + SALT6, SALT7, SALT8, SALT9, SD1, SD2, SGPM1, SGPS1, SIOONCTRL, + SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, + SPI1ABR, SPI1CS1, SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1, + TACH10, TACH11, TACH12, TACH13, TACH14, TACH15, TACH2, TACH3, + TACH4, TACH5, TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2, + THRU3, TXD1, TXD2, TXD3, TXD4, UART10, UART11, UART12, UART13, + UART6, UART7, UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, + WDTRST3, WDTRST4, ] groups: allOf: - $ref: "/schemas/types.yaml#/definitions/string" - enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, - ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, ESPI, - ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP, GPIT0, - GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, - GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1, - I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, - I2C4, I2C5, I2C6, I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, I3C6, - JTAGM, LHPD, LHSIRQ, LPC, LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, - MACLINK1, MACLINK2, MACLINK3, MACLINK4, MDIO1, MDIO2, MDIO3, - MDIO4, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1, NDCD2, NDCD3, NDCD4, - NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, NRI1, - NRI2, NRI3, NRI4, NRTS1, NRTS2, NRTS3, NRTS4, OSCCLK, PEWAKE, - PWM0, PWM1, PWM10G0, PWM10G1, PWM11G0, PWM11G1, PWM12G0, PWM12G1, - PWM13G0, PWM13G1, PWM14G0, PWM14G1, PWM15G0, PWM15G1, PWM2, PWM3, - PWM4, PWM5, PWM6, PWM7, PWM8G0, PWM8G1, PWM9G0, PWM9G1, QSPI1, - QSPI2, RGMII1, RGMII2, RGMII3, RGMII4, RMII1, RMII2, RMII3, - RMII4, RXD1, RXD2, RXD3, RXD4, SALT1, SALT10G0, SALT10G1, - SALT11G0, SALT11G1, SALT12G0, SALT12G1, SALT13G0, SALT13G1, - SALT14G0, SALT14G1, SALT15G0, SALT15G1, SALT16G0, SALT16G1, - SALT2, SALT3, SALT4, SALT5, SALT6, SALT7, SALT8, SALT9G0, - SALT9G1, SD1, SD2, SD3, SD3DAT4, SD3DAT5, SD3DAT6, SD3DAT7, - SGPM1, SGPS1, SIOONCTRL, SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD, - SIOS3, SIOS5, SIOSCI, SPI1, SPI1ABR, SPI1CS1, SPI1WP, SPI2, - SPI2CS1, SPI2CS2, TACH0, TACH1, TACH10, TACH11, TACH12, TACH13, - TACH14, TACH15, TACH2, TACH3, TACH4, TACH5, TACH6, TACH7, TACH8, - TACH9, THRU0, THRU1, THRU2, THRU3, TXD1, TXD2, TXD3, TXD4, - UART10, UART11, UART12G0, UART12G1, UART13G0, UART13G1, UART6, - UART7, UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3, - WDTRST4, ] + ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1, + EMMCG4, EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, + FWQSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, + GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, + GPIU7, HVI3C3, HVI3C4, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14, + I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6, I2C7, I2C8, I2C9, + I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ, LPC, LPCHC, LPCPD, + LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2, MACLINK3, MACLINK4, + MDIO1, MDIO2, MDIO3, MDIO4, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1, + NDCD2, NDCD3, NDCD4, NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2, + NDTR3, NDTR4, NRI1, NRI2, NRI3, NRI4, NRTS1, NRTS2, NRTS3, NRTS4, + OSCCLK, PEWAKE, PWM0, PWM1, PWM10G0, PWM10G1, PWM11G0, PWM11G1, + PWM12G0, PWM12G1, PWM13G0, PWM13G1, PWM14G0, PWM14G1, PWM15G0, + PWM15G1, PWM2, PWM3, PWM4, PWM5, PWM6, PWM7, PWM8G0, PWM8G1, + PWM9G0, PWM9G1, QSPI1, QSPI2, RGMII1, RGMII2, RGMII3, RGMII4, + RMII1, RMII2, RMII3, RMII4, RXD1, RXD2, RXD3, RXD4, SALT1, + SALT10G0, SALT10G1, SALT11G0, SALT11G1, SALT12G0, SALT12G1, + SALT13G0, SALT13G1, SALT14G0, SALT14G1, SALT15G0, SALT15G1, + SALT16G0, SALT16G1, SALT2, SALT3, SALT4, SALT5, SALT6, SALT7, + SALT8, SALT9G0, SALT9G1, SD1, SD2, SD3, SGPM1, SGPS1, SIOONCTRL, + SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, + SPI1ABR, SPI1CS1, SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1, + TACH10, TACH11, TACH12, TACH13, TACH14, TACH15, TACH2, TACH3, + TACH4, TACH5, TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2, + THRU3, TXD1, TXD2, TXD3, TXD4, UART10, UART11, UART12G0, + UART12G1, UART13G0, UART13G1, UART6, UART7, UART8, UART9, VB, + VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3, WDTRST4, ] required: - compatible From 377dfcdcc0469b6b38f7943de63a13132b066679 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Tue, 8 Oct 2019 15:11:48 +1030 Subject: [PATCH 356/572] pinctrl: aspeed-g6: Sort pins for sanity Some pins crept in that weren't ordered in the list. Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20191008044153.12734-3-andrew@aj.id.au Reviewed-by: Joel Stanley Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index 648ddb7f038a..ff208b7c75a8 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -1574,6 +1574,8 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = { ASPEED_PINCTRL_PIN(A3), ASPEED_PINCTRL_PIN(AA11), ASPEED_PINCTRL_PIN(AA12), + ASPEED_PINCTRL_PIN(AA16), + ASPEED_PINCTRL_PIN(AA17), ASPEED_PINCTRL_PIN(AA23), ASPEED_PINCTRL_PIN(AA24), ASPEED_PINCTRL_PIN(AA25), @@ -1585,6 +1587,8 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = { ASPEED_PINCTRL_PIN(AB11), ASPEED_PINCTRL_PIN(AB12), ASPEED_PINCTRL_PIN(AB15), + ASPEED_PINCTRL_PIN(AB16), + ASPEED_PINCTRL_PIN(AB17), ASPEED_PINCTRL_PIN(AB18), ASPEED_PINCTRL_PIN(AB19), ASPEED_PINCTRL_PIN(AB22), @@ -1602,6 +1606,7 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = { ASPEED_PINCTRL_PIN(AC11), ASPEED_PINCTRL_PIN(AC12), ASPEED_PINCTRL_PIN(AC15), + ASPEED_PINCTRL_PIN(AC16), ASPEED_PINCTRL_PIN(AC17), ASPEED_PINCTRL_PIN(AC18), ASPEED_PINCTRL_PIN(AC19), @@ -1619,6 +1624,7 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = { ASPEED_PINCTRL_PIN(AD12), ASPEED_PINCTRL_PIN(AD14), ASPEED_PINCTRL_PIN(AD15), + ASPEED_PINCTRL_PIN(AD16), ASPEED_PINCTRL_PIN(AD19), ASPEED_PINCTRL_PIN(AD20), ASPEED_PINCTRL_PIN(AD22), @@ -1634,8 +1640,11 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = { ASPEED_PINCTRL_PIN(AE12), ASPEED_PINCTRL_PIN(AE14), ASPEED_PINCTRL_PIN(AE15), + ASPEED_PINCTRL_PIN(AE16), ASPEED_PINCTRL_PIN(AE18), ASPEED_PINCTRL_PIN(AE19), + ASPEED_PINCTRL_PIN(AE25), + ASPEED_PINCTRL_PIN(AE26), ASPEED_PINCTRL_PIN(AE7), ASPEED_PINCTRL_PIN(AE8), ASPEED_PINCTRL_PIN(AF10), @@ -1643,6 +1652,8 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = { ASPEED_PINCTRL_PIN(AF12), ASPEED_PINCTRL_PIN(AF14), ASPEED_PINCTRL_PIN(AF15), + ASPEED_PINCTRL_PIN(AF24), + ASPEED_PINCTRL_PIN(AF25), ASPEED_PINCTRL_PIN(AF7), ASPEED_PINCTRL_PIN(AF8), ASPEED_PINCTRL_PIN(AF9), @@ -1792,17 +1803,6 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = { ASPEED_PINCTRL_PIN(Y3), ASPEED_PINCTRL_PIN(Y4), ASPEED_PINCTRL_PIN(Y5), - ASPEED_PINCTRL_PIN(AB16), - ASPEED_PINCTRL_PIN(AA17), - ASPEED_PINCTRL_PIN(AB17), - ASPEED_PINCTRL_PIN(AE16), - ASPEED_PINCTRL_PIN(AC16), - ASPEED_PINCTRL_PIN(AA16), - ASPEED_PINCTRL_PIN(AD16), - ASPEED_PINCTRL_PIN(AF25), - ASPEED_PINCTRL_PIN(AE26), - ASPEED_PINCTRL_PIN(AE25), - ASPEED_PINCTRL_PIN(AF24), }; static const struct aspeed_pin_group aspeed_g6_groups[] = { From b178f91f449caec552e66d9842dcc2090a2e0987 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Tue, 8 Oct 2019 15:11:49 +1030 Subject: [PATCH 357/572] pinctrl: aspeed-g6: Fix I2C14 SDA description The I2C function the pin participated in was incorrectly named SDA14 which lead to a failure to mux: [ 6.884344] No function I2C14 found on pin 7 (7). Found signal(s) MACLINK4, SDA14, GPIOA7 for function(s) MACLINK4, SDA14, GPIOA7 Fixes: 58dc52ad00a0 ("pinctrl: aspeed: Add AST2600 pinmux support") Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20191008044153.12734-4-andrew@aj.id.au Reviewed-by: Joel Stanley Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index ff208b7c75a8..9079655cc818 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -87,7 +87,7 @@ FUNC_GROUP_DECL(MACLINK3, L23); #define K25 7 SIG_EXPR_LIST_DECL_SESG(K25, MACLINK4, MACLINK4, SIG_DESC_SET(SCU410, 7)); -SIG_EXPR_LIST_DECL_SESG(K25, SDA14, SDA14, SIG_DESC_SET(SCU4B0, 7)); +SIG_EXPR_LIST_DECL_SESG(K25, SDA14, I2C14, SIG_DESC_SET(SCU4B0, 7)); PIN_DECL_2(K25, GPIOA7, MACLINK4, SDA14); FUNC_GROUP_DECL(MACLINK4, K25); From 9979346f5560956daf2a73da854e88d60a5309cf Mon Sep 17 00:00:00 2001 From: Johnny Huang Date: Tue, 8 Oct 2019 15:11:50 +1030 Subject: [PATCH 358/572] pinctrl: aspeed-g6: Fix I3C3/I3C4 pinmux configuration The documentation to configure I3C3/FSI1 and I3C4/FSI2 was initially unclear. Fixes: 58dc52ad00a0 ("pinctrl: aspeed: Add AST2600 pinmux support") Signed-off-by: Johnny Huang [AJ: Tweak commit message, resolve rebase conflicts] Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20191008044153.12734-5-andrew@aj.id.au Reviewed-by: Joel Stanley Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 24 ++++++++-------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index 9079655cc818..68b066594461 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -1513,18 +1513,14 @@ FUNC_GROUP_DECL(VB, Y1, Y2, Y3, Y4); * following 4 pins */ #define AF25 244 -SIG_EXPR_LIST_DECL_SEMG(AF25, I3C3SCL, I3C3, I3C3, SIG_DESC_SET(SCU438, 20), - SIG_DESC_SET(SCU4D8, 20)); -SIG_EXPR_LIST_DECL_SESG(AF25, FSI1CLK, FSI1, SIG_DESC_CLEAR(SCU438, 20), - SIG_DESC_SET(SCU4D8, 20)); +SIG_EXPR_LIST_DECL_SEMG(AF25, I3C3SCL, I3C3, I3C3, SIG_DESC_SET(SCU438, 20)); +SIG_EXPR_LIST_DECL_SESG(AF25, FSI1CLK, FSI1, SIG_DESC_SET(SCU4D8, 20)); PIN_DECL_(AF25, SIG_EXPR_LIST_PTR(AF25, I3C3SCL), SIG_EXPR_LIST_PTR(AF25, FSI1CLK)); #define AE26 245 -SIG_EXPR_LIST_DECL_SEMG(AE26, I3C3SDA, I3C3, I3C3, SIG_DESC_SET(SCU438, 21), - SIG_DESC_SET(SCU4D8, 21)); -SIG_EXPR_LIST_DECL_SESG(AE26, FSI1DATA, FSI1, SIG_DESC_CLEAR(SCU438, 21), - SIG_DESC_SET(SCU4D8, 21)); +SIG_EXPR_LIST_DECL_SEMG(AE26, I3C3SDA, I3C3, I3C3, SIG_DESC_SET(SCU438, 21)); +SIG_EXPR_LIST_DECL_SESG(AE26, FSI1DATA, FSI1, SIG_DESC_SET(SCU4D8, 21)); PIN_DECL_(AE26, SIG_EXPR_LIST_PTR(AE26, I3C3SDA), SIG_EXPR_LIST_PTR(AE26, FSI1DATA)); @@ -1533,18 +1529,14 @@ FUNC_DECL_2(I3C3, HVI3C3, I3C3); FUNC_GROUP_DECL(FSI1, AF25, AE26); #define AE25 246 -SIG_EXPR_LIST_DECL_SEMG(AE25, I3C4SCL, I3C4, I3C4, SIG_DESC_SET(SCU438, 22), - SIG_DESC_SET(SCU4D8, 22)); -SIG_EXPR_LIST_DECL_SESG(AE25, FSI2CLK, FSI2, SIG_DESC_CLEAR(SCU438, 22), - SIG_DESC_SET(SCU4D8, 22)); +SIG_EXPR_LIST_DECL_SEMG(AE25, I3C4SCL, I3C4, I3C4, SIG_DESC_SET(SCU438, 22)); +SIG_EXPR_LIST_DECL_SESG(AE25, FSI2CLK, FSI2, SIG_DESC_SET(SCU4D8, 22)); PIN_DECL_(AE25, SIG_EXPR_LIST_PTR(AE25, I3C4SCL), SIG_EXPR_LIST_PTR(AE25, FSI2CLK)); #define AF24 247 -SIG_EXPR_LIST_DECL_SEMG(AF24, I3C4SDA, I3C4, I3C4, SIG_DESC_SET(SCU438, 23), - SIG_DESC_SET(SCU4D8, 23)); -SIG_EXPR_LIST_DECL_SESG(AF24, FSI2DATA, FSI2, SIG_DESC_CLEAR(SCU438, 23), - SIG_DESC_SET(SCU4D8, 23)); +SIG_EXPR_LIST_DECL_SEMG(AF24, I3C4SDA, I3C4, I3C4, SIG_DESC_SET(SCU438, 23)); +SIG_EXPR_LIST_DECL_SESG(AF24, FSI2DATA, FSI2, SIG_DESC_SET(SCU4D8, 23)); PIN_DECL_(AF24, SIG_EXPR_LIST_PTR(AF24, I3C4SDA), SIG_EXPR_LIST_PTR(AF24, FSI2DATA)); From c136d4c71f755a189fe13a0cd4f3e8f538dda567 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Tue, 8 Oct 2019 15:11:51 +1030 Subject: [PATCH 359/572] pinctrl: aspeed-g6: Make SIG_DESC_CLEAR() behave intuitively Signal descriptors can represent multi-bit bitfields and so have explicit "enable" and "disable" states. However many descriptor instances only describe a single bit, and so the SIG_DESC_SET() macro is provides an abstraction for the single-bit cases: Its expansion configures the "enable" state to set the bit and "disable" to clear. SIG_DESC_CLEAR() was introduced to provide a similar single-bit abstraction for for descriptors to clear the bit of interest. However its behaviour was defined as the literal inverse of SIG_DESC_SET() - the impact is the bit of interest is set in the disable path. This behaviour isn't intuitive and doesn't align with how we want to use the macro in practice, so make it clear the bit for both the enable and disable paths. Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20191008044153.12734-6-andrew@aj.id.au Reviewed-by: Joel Stanley Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinmux-aspeed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/aspeed/pinmux-aspeed.h b/drivers/pinctrl/aspeed/pinmux-aspeed.h index a2c0d52e4f7b..d5202241f411 100644 --- a/drivers/pinctrl/aspeed/pinmux-aspeed.h +++ b/drivers/pinctrl/aspeed/pinmux-aspeed.h @@ -508,7 +508,7 @@ struct aspeed_pin_desc { * @idx: The bit index in the register */ #define SIG_DESC_SET(reg, idx) SIG_DESC_IP_BIT(ASPEED_IP_SCU, reg, idx, 1) -#define SIG_DESC_CLEAR(reg, idx) SIG_DESC_IP_BIT(ASPEED_IP_SCU, reg, idx, 0) +#define SIG_DESC_CLEAR(reg, idx) { ASPEED_IP_SCU, reg, BIT_MASK(idx), 0, 0 } #define SIG_DESC_LIST_SYM(sig, group) sig_descs_ ## sig ## _ ## group #define SIG_DESC_LIST_DECL(sig, group, ...) \ From 1550583432535dccaf1956c1d58e2866eacd173e Mon Sep 17 00:00:00 2001 From: Johnny Huang Date: Tue, 8 Oct 2019 15:11:52 +1030 Subject: [PATCH 360/572] pinctrl: aspeed-g6: Fix UART13 group pinmux When UART13G1 is set the pinmux configuration in SCU4B8 for UART13G0 should be cleared. Fixes: 58dc52ad00a0 ("pinctrl: aspeed: Add AST2600 pinmux support") Signed-off-by: Johnny Huang [AJ: Tweak commit message] Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20191008044153.12734-7-andrew@aj.id.au Reviewed-by: Joel Stanley Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index 68b066594461..dc17cf3d3549 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -1262,13 +1262,13 @@ GROUP_DECL(SPI1, AB11, AC11, AA11); #define AD11 206 SIG_EXPR_LIST_DECL_SEMG(AD11, SPI1DQ2, QSPI1, SPI1, SIG_DESC_SET(SCU438, 14)); SIG_EXPR_LIST_DECL_SEMG(AD11, TXD13, UART13G1, UART13, - SIG_DESC_SET(SCU438, 14)); + SIG_DESC_CLEAR(SCU4B8, 2), SIG_DESC_SET(SCU4D8, 14)); PIN_DECL_2(AD11, GPIOZ6, SPI1DQ2, TXD13); #define AF10 207 SIG_EXPR_LIST_DECL_SEMG(AF10, SPI1DQ3, QSPI1, SPI1, SIG_DESC_SET(SCU438, 15)); SIG_EXPR_LIST_DECL_SEMG(AF10, RXD13, UART13G1, UART13, - SIG_DESC_SET(SCU438, 15)); + SIG_DESC_CLEAR(SCU4B8, 3), SIG_DESC_SET(SCU4D8, 15)); PIN_DECL_2(AF10, GPIOZ7, SPI1DQ3, RXD13); GROUP_DECL(QSPI1, AB11, AC11, AA11, AD11, AF10); From d6e7a1a5119c4e719b0d63651f09762d7384bfed Mon Sep 17 00:00:00 2001 From: Johnny Huang Date: Tue, 8 Oct 2019 15:11:53 +1030 Subject: [PATCH 361/572] pinctrl: aspeed-g6: Rename SD3 to EMMC and rework pin groups AST2600 EMMC support 3 types DAT bus sizes (1, 4 and 8-bit), corresponding to 3 groups: EMMCG1, EMMCG4 and EMMCG8 Fixes: 58dc52ad00a0 ("pinctrl: aspeed: Add AST2600 pinmux support") Signed-off-by: Johnny Huang Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20191008044153.12734-8-andrew@aj.id.au Reviewed-by: Joel Stanley Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 72 ++++++++++------------ drivers/pinctrl/aspeed/pinmux-aspeed.h | 1 + 2 files changed, 33 insertions(+), 40 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index dc17cf3d3549..c6800d220920 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -1440,74 +1440,72 @@ FUNC_GROUP_DECL(RGMII2, D4, C2, C1, D3, E4, F5, D2, E3, D1, F4, E2, E1); FUNC_GROUP_DECL(RMII2, D4, C2, C1, D3, D2, D1, F4, E2, E1); #define AB4 232 -SIG_EXPR_LIST_DECL_SESG(AB4, SD3CLK, SD3, SIG_DESC_SET(SCU400, 24)); -PIN_DECL_1(AB4, GPIO18D0, SD3CLK); +SIG_EXPR_LIST_DECL_SEMG(AB4, EMMCCLK, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 24)); +PIN_DECL_1(AB4, GPIO18D0, EMMCCLK); #define AA4 233 -SIG_EXPR_LIST_DECL_SESG(AA4, SD3CMD, SD3, SIG_DESC_SET(SCU400, 25)); -PIN_DECL_1(AA4, GPIO18D1, SD3CMD); +SIG_EXPR_LIST_DECL_SEMG(AA4, EMMCCMD, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 25)); +PIN_DECL_1(AA4, GPIO18D1, EMMCCMD); #define AC4 234 -SIG_EXPR_LIST_DECL_SESG(AC4, SD3DAT0, SD3, SIG_DESC_SET(SCU400, 26)); -PIN_DECL_1(AC4, GPIO18D2, SD3DAT0); +SIG_EXPR_LIST_DECL_SEMG(AC4, EMMCDAT0, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 26)); +PIN_DECL_1(AC4, GPIO18D2, EMMCDAT0); #define AA5 235 -SIG_EXPR_LIST_DECL_SESG(AA5, SD3DAT1, SD3, SIG_DESC_SET(SCU400, 27)); -PIN_DECL_1(AA5, GPIO18D3, SD3DAT1); +SIG_EXPR_LIST_DECL_SEMG(AA5, EMMCDAT1, EMMCG4, EMMC, SIG_DESC_SET(SCU400, 27)); +PIN_DECL_1(AA5, GPIO18D3, EMMCDAT1); #define Y5 236 -SIG_EXPR_LIST_DECL_SESG(Y5, SD3DAT2, SD3, SIG_DESC_SET(SCU400, 28)); -PIN_DECL_1(Y5, GPIO18D4, SD3DAT2); +SIG_EXPR_LIST_DECL_SEMG(Y5, EMMCDAT2, EMMCG4, EMMC, SIG_DESC_SET(SCU400, 28)); +PIN_DECL_1(Y5, GPIO18D4, EMMCDAT2); #define AB5 237 -SIG_EXPR_LIST_DECL_SESG(AB5, SD3DAT3, SD3, SIG_DESC_SET(SCU400, 29)); -PIN_DECL_1(AB5, GPIO18D5, SD3DAT3); +SIG_EXPR_LIST_DECL_SEMG(AB5, EMMCDAT3, EMMCG4, EMMC, SIG_DESC_SET(SCU400, 29)); +PIN_DECL_1(AB5, GPIO18D5, EMMCDAT3); #define AB6 238 -SIG_EXPR_LIST_DECL_SESG(AB6, SD3CD, SD3, SIG_DESC_SET(SCU400, 30)); -PIN_DECL_1(AB6, GPIO18D6, SD3CD); +SIG_EXPR_LIST_DECL_SEMG(AB6, EMMCCD, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 30)); +PIN_DECL_1(AB6, GPIO18D6, EMMCCD); #define AC5 239 -SIG_EXPR_LIST_DECL_SESG(AC5, SD3WP, SD3, SIG_DESC_SET(SCU400, 31)); -PIN_DECL_1(AC5, GPIO18D7, SD3WP); +SIG_EXPR_LIST_DECL_SEMG(AC5, EMMCWP, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 31)); +PIN_DECL_1(AC5, GPIO18D7, EMMCWP); -FUNC_GROUP_DECL(SD3, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5); +GROUP_DECL(EMMCG1, AB4, AA4, AC4, AB6, AC5); +GROUP_DECL(EMMCG4, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5); #define Y1 240 SIG_EXPR_LIST_DECL_SEMG(Y1, FWSPIDCS, FWSPID, FWSPID, SIG_DESC_SET(SCU500, 3)); SIG_EXPR_LIST_DECL_SESG(Y1, VBCS, VB, SIG_DESC_SET(SCU500, 5)); -SIG_EXPR_LIST_DECL_SESG(Y1, SD3DAT4, SD3DAT4, SIG_DESC_SET(SCU404, 0)); -PIN_DECL_3(Y1, GPIO18E0, FWSPIDCS, VBCS, SD3DAT4); -FUNC_GROUP_DECL(SD3DAT4, Y1); +SIG_EXPR_LIST_DECL_SEMG(Y1, EMMCDAT4, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 0)); +PIN_DECL_3(Y1, GPIO18E0, FWSPIDCS, VBCS, EMMCDAT4); #define Y2 241 SIG_EXPR_LIST_DECL_SEMG(Y2, FWSPIDCK, FWSPID, FWSPID, SIG_DESC_SET(SCU500, 3)); SIG_EXPR_LIST_DECL_SESG(Y2, VBCK, VB, SIG_DESC_SET(SCU500, 5)); -SIG_EXPR_LIST_DECL_SESG(Y2, SD3DAT5, SD3DAT5, SIG_DESC_SET(SCU404, 1)); -PIN_DECL_3(Y2, GPIO18E1, FWSPIDCK, VBCK, SD3DAT5); -FUNC_GROUP_DECL(SD3DAT5, Y2); +SIG_EXPR_LIST_DECL_SEMG(Y2, EMMCDAT5, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 1)); +PIN_DECL_3(Y2, GPIO18E1, FWSPIDCK, VBCK, EMMCDAT5); #define Y3 242 SIG_EXPR_LIST_DECL_SEMG(Y3, FWSPIDMOSI, FWSPID, FWSPID, SIG_DESC_SET(SCU500, 3)); SIG_EXPR_LIST_DECL_SESG(Y3, VBMOSI, VB, SIG_DESC_SET(SCU500, 5)); -SIG_EXPR_LIST_DECL_SESG(Y3, SD3DAT6, SD3DAT6, SIG_DESC_SET(SCU404, 2)); -PIN_DECL_3(Y3, GPIO18E2, FWSPIDMOSI, VBMOSI, SD3DAT6); -FUNC_GROUP_DECL(SD3DAT6, Y3); +SIG_EXPR_LIST_DECL_SEMG(Y3, EMMCDAT6, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 2)); +PIN_DECL_3(Y3, GPIO18E2, FWSPIDMOSI, VBMOSI, EMMCDAT6); #define Y4 243 SIG_EXPR_LIST_DECL_SEMG(Y4, FWSPIDMISO, FWSPID, FWSPID, SIG_DESC_SET(SCU500, 3)); SIG_EXPR_LIST_DECL_SESG(Y4, VBMISO, VB, SIG_DESC_SET(SCU500, 5)); -SIG_EXPR_LIST_DECL_SESG(Y4, SD3DAT7, SD3DAT7, SIG_DESC_SET(SCU404, 3)); -PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, SD3DAT7); -FUNC_GROUP_DECL(SD3DAT7, Y4); +SIG_EXPR_LIST_DECL_SEMG(Y4, EMMCDAT7, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 3)); +PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, EMMCDAT7); GROUP_DECL(FWSPID, Y1, Y2, Y3, Y4); GROUP_DECL(FWQSPID, Y1, Y2, Y3, Y4, AE12, AF12); +GROUP_DECL(EMMCG8, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5, Y1, Y2, Y3, Y4); FUNC_DECL_2(FWSPID, FWSPID, FWQSPID); FUNC_GROUP_DECL(VB, Y1, Y2, Y3, Y4); - +FUNC_DECL_3(EMMC, EMMCG1, EMMCG4, EMMCG8); /* * FIXME: Confirm bits and priorities are the right way around for the * following 4 pins @@ -1968,11 +1966,9 @@ static const struct aspeed_pin_group aspeed_g6_groups[] = { ASPEED_PINCTRL_GROUP(SALT9G1), ASPEED_PINCTRL_GROUP(SD1), ASPEED_PINCTRL_GROUP(SD2), - ASPEED_PINCTRL_GROUP(SD3), - ASPEED_PINCTRL_GROUP(SD3DAT4), - ASPEED_PINCTRL_GROUP(SD3DAT5), - ASPEED_PINCTRL_GROUP(SD3DAT6), - ASPEED_PINCTRL_GROUP(SD3DAT7), + ASPEED_PINCTRL_GROUP(EMMCG1), + ASPEED_PINCTRL_GROUP(EMMCG4), + ASPEED_PINCTRL_GROUP(EMMCG8), ASPEED_PINCTRL_GROUP(SGPM1), ASPEED_PINCTRL_GROUP(SGPS1), ASPEED_PINCTRL_GROUP(SIOONCTRL), @@ -2051,6 +2047,7 @@ static const struct aspeed_pin_function aspeed_g6_functions[] = { ASPEED_PINCTRL_FUNC(ADC8), ASPEED_PINCTRL_FUNC(ADC9), ASPEED_PINCTRL_FUNC(BMCINT), + ASPEED_PINCTRL_FUNC(EMMC), ASPEED_PINCTRL_FUNC(ESPI), ASPEED_PINCTRL_FUNC(ESPIALT), ASPEED_PINCTRL_FUNC(FSI1), @@ -2183,11 +2180,6 @@ static const struct aspeed_pin_function aspeed_g6_functions[] = { ASPEED_PINCTRL_FUNC(SALT9), ASPEED_PINCTRL_FUNC(SD1), ASPEED_PINCTRL_FUNC(SD2), - ASPEED_PINCTRL_FUNC(SD3), - ASPEED_PINCTRL_FUNC(SD3DAT4), - ASPEED_PINCTRL_FUNC(SD3DAT5), - ASPEED_PINCTRL_FUNC(SD3DAT6), - ASPEED_PINCTRL_FUNC(SD3DAT7), ASPEED_PINCTRL_FUNC(SGPM1), ASPEED_PINCTRL_FUNC(SGPS1), ASPEED_PINCTRL_FUNC(SIOONCTRL), diff --git a/drivers/pinctrl/aspeed/pinmux-aspeed.h b/drivers/pinctrl/aspeed/pinmux-aspeed.h index d5202241f411..140c5ce9fbc1 100644 --- a/drivers/pinctrl/aspeed/pinmux-aspeed.h +++ b/drivers/pinctrl/aspeed/pinmux-aspeed.h @@ -738,6 +738,7 @@ struct aspeed_pin_desc { static const char *FUNC_SYM(func)[] = { __VA_ARGS__ } #define FUNC_DECL_2(func, one, two) FUNC_DECL_(func, #one, #two) +#define FUNC_DECL_3(func, one, two, three) FUNC_DECL_(func, #one, #two, #three) #define FUNC_GROUP_DECL(func, ...) \ GROUP_DECL(func, __VA_ARGS__); \ From bc88f85c6c09306bd21917e1ae28205e9cd775a7 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 16 Oct 2019 12:24:58 +0100 Subject: [PATCH 362/572] kthread: make __kthread_queue_delayed_work static The __kthread_queue_delayed_work is not exported so make it static, to avoid the following sparse warning: kernel/kthread.c:869:6: warning: symbol '__kthread_queue_delayed_work' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: Linus Torvalds --- kernel/kthread.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 621467c33fef..b262f47046ca 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -866,9 +866,9 @@ void kthread_delayed_work_timer_fn(struct timer_list *t) } EXPORT_SYMBOL(kthread_delayed_work_timer_fn); -void __kthread_queue_delayed_work(struct kthread_worker *worker, - struct kthread_delayed_work *dwork, - unsigned long delay) +static void __kthread_queue_delayed_work(struct kthread_worker *worker, + struct kthread_delayed_work *dwork, + unsigned long delay) { struct timer_list *timer = &dwork->timer; struct kthread_work *work = &dwork->work; From 3874d73e06c9b9dc15de0b7382fc223986d75571 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 14 Oct 2019 16:58:35 -0700 Subject: [PATCH 363/572] md/raid0: fix warning message for parameter default_layout The message should match the parameter, i.e. raid0.default_layout. Fixes: c84a1372df92 ("md/raid0: avoid RAID0 data corruption due to layout confusion.") Cc: NeilBrown Reported-by: Ivan Topolsky Signed-off-by: Song Liu --- drivers/md/raid0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f61693e59684..1e772287b1c8 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -154,7 +154,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) } else { pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n", mdname(mddev)); - pr_err("md/raid0: please set raid.default_layout to 1 or 2\n"); + pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n"); err = -ENOTSUPP; goto abort; } From 19c95f261c6558d4c2cbbfacd2d8bb6501384601 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 15 Oct 2019 18:25:44 +0100 Subject: [PATCH 364/572] arm64: entry.S: Do not preempt from IRQ before all cpufeatures are enabled Preempting from IRQ-return means that the task has its PSTATE saved on the stack, which will get restored when the task is resumed and does the actual IRQ return. However, enabling some CPU features requires modifying the PSTATE. This means that, if a task was scheduled out during an IRQ-return before all CPU features are enabled, the task might restore a PSTATE that does not include the feature enablement changes once scheduled back in. * Task 1: PAN == 0 ---| |--------------- | |<- return from IRQ, PSTATE.PAN = 0 | <- IRQ | +--------+ <- preempt() +-- ^ | reschedule Task 1, PSTATE.PAN == 1 * Init: --------------------+------------------------ ^ | enable_cpu_features set PSTATE.PAN on all CPUs Worse than this, since PSTATE is untouched when task switching is done, a task missing the new bits in PSTATE might affect another task, if both do direct calls to schedule() (outside of IRQ/exception contexts). Fix this by preventing preemption on IRQ-return until features are enabled on all CPUs. This way the only PSTATE values that are saved on the stack are from synchronous exceptions. These are expected to be fatal this early, the exception is BRK for WARN_ON(), but as this uses do_debug_exception() which keeps IRQs masked, it shouldn't call schedule(). Signed-off-by: Julien Thierry [james: Replaced a really cool hack, with an even simpler static key in C. expanded commit message with Julien's cover-letter ascii art] Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 2 +- arch/arm64/kernel/process.c | 18 ++++++++++++++++++ include/linux/sched.h | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e304fe04b098..e1859e010c5f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -680,7 +680,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING orr x24, x24, x0 alternative_else_nop_endif cbnz x24, 1f // preempt count != 0 || NMI return path - bl preempt_schedule_irq // irq en/disable is done inside + bl arm64_preempt_schedule_irq // irq en/disable is done inside 1: #endif diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 1fb2819fc048..71f788cd2b18 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -631,3 +633,19 @@ static int __init tagged_addr_init(void) core_initcall(tagged_addr_init); #endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */ + +asmlinkage void __sched arm64_preempt_schedule_irq(void) +{ + lockdep_assert_irqs_disabled(); + + /* + * Preempting a task from an IRQ means we leave copies of PSTATE + * on the stack. cpufeature's enable calls may modify PSTATE, but + * resuming one of these preempted tasks would undo those changes. + * + * Only allow a task to be preempted once cpufeatures have been + * enabled. + */ + if (static_branch_likely(&arm64_const_caps_ready)) + preempt_schedule_irq(); +} diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c2e56bd8913..67a1d86981a9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -223,6 +223,7 @@ extern long schedule_timeout_uninterruptible(long timeout); extern long schedule_timeout_idle(long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); +asmlinkage void preempt_schedule_irq(void); extern int __must_check io_schedule_prepare(void); extern void io_schedule_finish(int token); From 29a0f5ad87e6f45c984ffffa57b7142d178ff422 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 16 Oct 2019 11:42:57 +0800 Subject: [PATCH 365/572] arm64: sysreg: fix incorrect definition of SYS_PAR_EL1_F The 'F' field of the PAR_EL1 register lives in bit 0, not bit 1. Fix the broken definition in 'sysreg.h'. Fixes: e8620cff9994 ("arm64: sysreg: Add some field definitions for PAR_EL1") Reviewed-by: Mark Rutland Signed-off-by: Yang Yingliang Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 972d196c7714..6e919fafb43d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -212,7 +212,7 @@ #define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0) #define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) -#define SYS_PAR_EL1_F BIT(1) +#define SYS_PAR_EL1_F BIT(0) #define SYS_PAR_EL1_FST GENMASK(6, 1) /*** Statistical Profiling Extension ***/ From 3813733595c0c7c0674d106309b04e871d54dc1c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Oct 2019 12:03:04 +0100 Subject: [PATCH 366/572] arm64: mm: fix inverted PAR_EL1.F check When detecting a spurious EL1 translation fault, we have the CPU retry the translation using an AT S1E1R instruction, and inspect PAR_EL1 to determine if the fault was spurious. When PAR_EL1.F == 0, the AT instruction successfully translated the address without a fault, which implies the original fault was spurious. However, in this case we return false and treat the original fault as if it was not spurious. Invert the return value so that we treat such a case as spurious. Cc: Catalin Marinas Fixes: 42f91093b043 ("arm64: mm: Ignore spurious translation faults taken from the kernel") Tested-by: James Morse Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 855f2a7954e6..9fc6db0bcbad 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -268,8 +268,12 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, par = read_sysreg(par_el1); local_irq_restore(flags); + /* + * If we now have a valid translation, treat the translation fault as + * spurious. + */ if (!(par & SYS_PAR_EL1_F)) - return false; + return true; /* * If we got a different type of fault from the AT instruction, From 597399d0cb91d049fcb78fb45c7694771b583bb7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Oct 2019 21:04:18 -0700 Subject: [PATCH 367/572] arm64: tags: Preserve tags for addresses translated via TTBR1 Sign-extending TTBR1 addresses when converting to an untagged address breaks the documented POSIX semantics for mlock() in some obscure error cases where we end up returning -EINVAL instead of -ENOMEM as a direct result of rewriting the upper address bits. Rework the untagged_addr() macro to preserve the upper address bits for TTBR1 addresses and only clear the tag bits for user addresses. This matches the behaviour of the 'clear_address_tag' assembly macro, so rename that and align the implementations at the same time so that they use the same instruction sequences for the tag manipulation. Link: https://lore.kernel.org/stable/20191014162651.GF19200@arrakis.emea.arm.com/ Reported-by: Jan Stancek Tested-by: Jan Stancek Reviewed-by: Catalin Marinas Tested-by: Catalin Marinas Reviewed-by: Vincenzo Frascino Tested-by: Vincenzo Frascino Reviewed-by: Andrey Konovalov Signed-off-by: Will Deacon --- arch/arm64/include/asm/asm-uaccess.h | 7 +++---- arch/arm64/include/asm/memory.h | 10 ++++++++-- arch/arm64/kernel/entry.S | 4 ++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index f74909ba29bd..5bf963830b17 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -78,10 +78,9 @@ alternative_else_nop_endif /* * Remove the address tag from a virtual address, if present. */ - .macro clear_address_tag, dst, addr - tst \addr, #(1 << 55) - bic \dst, \addr, #(0xff << 56) - csel \dst, \dst, \addr, eq + .macro untagged_addr, dst, addr + sbfx \dst, \addr, #0, #56 + and \dst, \dst, \addr .endm #endif diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index b61b50bf68b1..c23c47360664 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -215,12 +215,18 @@ static inline unsigned long kaslr_offset(void) * up with a tagged userland pointer. Clear the tag to get a sane pointer to * pass on to access_ok(), for instance. */ -#define untagged_addr(addr) \ +#define __untagged_addr(addr) \ ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) +#define untagged_addr(addr) ({ \ + u64 __addr = (__force u64)addr; \ + __addr &= __untagged_addr(__addr); \ + (__force __typeof__(addr))__addr; \ +}) + #ifdef CONFIG_KASAN_SW_TAGS #define __tag_shifted(tag) ((u64)(tag) << 56) -#define __tag_reset(addr) untagged_addr(addr) +#define __tag_reset(addr) __untagged_addr(addr) #define __tag_get(addr) (__u8)((u64)(addr) >> 56) #else #define __tag_shifted(tag) 0UL diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e1859e010c5f..a3a63092eba9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -604,7 +604,7 @@ el1_da: */ mrs x3, far_el1 inherit_daif pstate=x23, tmp=x2 - clear_address_tag x0, x3 + untagged_addr x0, x3 mov x2, sp // struct pt_regs bl do_mem_abort @@ -808,7 +808,7 @@ el0_da: mrs x26, far_el1 ct_user_exit_irqoff enable_daif - clear_address_tag x0, x26 + untagged_addr x0, x26 mov x1, x25 mov x2, sp bl do_mem_abort From bd74708cd979f4934f0744055ce3b47da68733ce Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 16 Oct 2019 00:04:38 -0700 Subject: [PATCH 368/572] Revert "blackhole_netdev: fix syzkaller reported issue" This reverts commit b0818f80c8c1bc215bba276bd61c216014fab23b. Started seeing weird behavior after this patch especially in the IPv6 code path. Haven't root caused it, but since this was applied to net branch, taking a precautionary measure to revert it and look / analyze those failures Revert this now and I'll send a better fix after analysing / fixing the weirdness observed. CC: Eric Dumazet CC: Wei Wang CC: David S. Miller Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 7 +------ net/ipv6/route.c | 15 +++++++++------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4c87594d1389..34ccef18b40e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6996,7 +6996,7 @@ static struct rtnl_af_ops inet6_ops __read_mostly = { int __init addrconf_init(void) { - struct inet6_dev *idev, *bdev; + struct inet6_dev *idev; int i, err; err = ipv6_addr_label_init(); @@ -7036,14 +7036,10 @@ int __init addrconf_init(void) */ rtnl_lock(); idev = ipv6_add_dev(init_net.loopback_dev); - bdev = ipv6_add_dev(blackhole_netdev); rtnl_unlock(); if (IS_ERR(idev)) { err = PTR_ERR(idev); goto errlo; - } else if (IS_ERR(bdev)) { - err = PTR_ERR(bdev); - goto errlo; } ip6_route_init_special_entries(); @@ -7128,7 +7124,6 @@ void addrconf_cleanup(void) addrconf_ifdown(dev, 1); } addrconf_ifdown(init_net.loopback_dev, 2); - addrconf_ifdown(blackhole_netdev, 2); /* * Check hash table. diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 742120728869..a63ff85fe141 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -155,9 +155,10 @@ void rt6_uncached_list_del(struct rt6_info *rt) static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) { + struct net_device *loopback_dev = net->loopback_dev; int cpu; - if (dev == net->loopback_dev) + if (dev == loopback_dev) return; for_each_possible_cpu(cpu) { @@ -170,7 +171,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) struct net_device *rt_dev = rt->dst.dev; if (rt_idev->dev == dev) { - rt->rt6i_idev = in6_dev_get(blackhole_netdev); + rt->rt6i_idev = in6_dev_get(loopback_dev); in6_dev_put(rt_idev); } @@ -385,11 +386,13 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; + struct net_device *loopback_dev = + dev_net(dev)->loopback_dev; - if (idev && idev->dev != dev_net(dev)->loopback_dev) { - struct inet6_dev *ibdev = in6_dev_get(blackhole_netdev); - if (ibdev) { - rt->rt6i_idev = ibdev; + if (idev && idev->dev != loopback_dev) { + struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); + if (loopback_idev) { + rt->rt6i_idev = loopback_idev; in6_dev_put(idev); } } From 128260a41eeba4dd8d4433eab96aeeb7192392a4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Sep 2019 12:00:56 +0100 Subject: [PATCH 369/572] drm/i915/execlists: Refactor -EIO markup of hung requests Pull setting -EIO on the hung requests into its own utility function. Having allowed ourselves to short-circuit submission of completed requests, we can now do the mark_eio() prior to submission and avoid some redundant operations. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190923110056.15176-4-chris@chris-wilson.co.uk (cherry picked from commit 0d7cf7bc15e75bf79f2f65d61d19f896609f816a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 31 ++++++++++++++++------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bdfcc7bdadbf..46005986e95f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -234,6 +234,13 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); +static void mark_eio(struct i915_request *rq) +{ + if (!i915_request_signaled(rq)) + dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); +} + static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + @@ -2574,12 +2581,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) __execlists_reset(engine, true); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) { - if (!i915_request_signaled(rq)) - dma_fence_set_error(&rq->fence, -EIO); - - i915_request_mark_complete(rq); - } + list_for_each_entry(rq, &engine->active.requests, sched.link) + mark_eio(rq); /* Flush the queued requests to the timeline list (for retiring). */ while ((rb = rb_first_cached(&execlists->queue))) { @@ -2587,9 +2590,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { + mark_eio(rq); __i915_request_submit(rq); - dma_fence_set_error(&rq->fence, -EIO); - i915_request_mark_complete(rq); } rb_erase_cached(&p->node, &execlists->queue); @@ -2605,13 +2607,14 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) RB_CLEAR_NODE(rb); spin_lock(&ve->base.active.lock); - if (ve->request) { - ve->request->engine = engine; - __i915_request_submit(ve->request); - dma_fence_set_error(&ve->request->fence, -EIO); - i915_request_mark_complete(ve->request); + rq = fetch_and_zero(&ve->request); + if (rq) { + mark_eio(rq); + + rq->engine = engine; + __i915_request_submit(rq); + ve->base.execlists.queue_priority_hint = INT_MIN; - ve->request = NULL; } spin_unlock(&ve->base.active.lock); } From 0336ab580878f4c5663dfa2b66095821fdc3e588 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 11 Oct 2019 23:20:30 +0300 Subject: [PATCH 370/572] drm/i915: Favor last VBT child device with conflicting AUX ch/DDC pin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first come first served apporoach to handling the VBT child device AUX ch conflicts has backfired. We have machines in the wild where the VBT specifies both port A eDP and port E DP (in that order) with port E being the real one. So let's try to flip the preference around and let the last child device win once again. Cc: stable@vger.kernel.org Cc: Jani Nikula Tested-by: Masami Ichikawa Tested-by: Torsten Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111966 Fixes: 36a0f92020dc ("drm/i915/bios: make child device order the priority order") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191011202030.8829-1-ville.syrjala@linux.intel.com Acked-by: Jani Nikula (cherry picked from commit 41e35ffb380bde1379e4030bb5b2ac824d5139cf) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_bios.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index efb39f350b19..3250c1b8dcca 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1270,7 +1270,7 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("port %c trying to use the same DDC pin (0x%x) as port %c, " "disabling port %c DVI/HDMI support\n", port_name(port), info->alternate_ddc_pin, - port_name(p), port_name(port)); + port_name(p), port_name(p)); /* * If we have multiple ports supposedly sharing the @@ -1278,9 +1278,14 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv, * port. Otherwise they share the same ddc bin and * system couldn't communicate with them separately. * - * Give child device order the priority, first come first - * served. + * Give inverse child device order the priority, + * last one wins. Yes, there are real machines + * (eg. Asrock B250M-HDV) where VBT has both + * port A and port E with the same AUX ch and + * we must pick port E :( */ + info = &dev_priv->vbt.ddi_port_info[p]; + info->supports_dvi = false; info->supports_hdmi = false; info->alternate_ddc_pin = 0; @@ -1316,7 +1321,7 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("port %c trying to use the same AUX CH (0x%x) as port %c, " "disabling port %c DP support\n", port_name(port), info->alternate_aux_channel, - port_name(p), port_name(port)); + port_name(p), port_name(p)); /* * If we have multiple ports supposedlt sharing the @@ -1324,9 +1329,14 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, * port. Otherwise they share the same aux channel * and system couldn't communicate with them separately. * - * Give child device order the priority, first come first - * served. + * Give inverse child device order the priority, + * last one wins. Yes, there are real machines + * (eg. Asrock B250M-HDV) where VBT has both + * port A and port E with the same AUX ch and + * we must pick port E :( */ + info = &dev_priv->vbt.ddi_port_info[p]; + info->supports_dp = false; info->alternate_aux_channel = 0; } From 4f2a572eda67aecb1e7e4fc26cc985fb8158f6e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 28 Sep 2019 09:25:46 +0100 Subject: [PATCH 371/572] drm/i915/userptr: Never allow userptr into the mappable GGTT Daniel Vetter uncovered a nasty cycle in using the mmu-notifiers to invalidate userptr objects which also happen to be pulled into GGTT mmaps. That is when we unbind the userptr object (on mmu invalidation), we revoke all CPU mmaps, which may then recurse into mmu invalidation. We looked for ways of breaking the cycle, but the revocation on invalidation is required and cannot be avoided. The only solution we could see was to not allow such GGTT bindings of userptr objects in the first place. In practice, no one really wants to use a GGTT mmapping of a CPU pointer... Just before Daniel's explosive lockdep patches land in v5.4-rc1, we got a genuine blip from CI: <4>[ 246.793958] ====================================================== <4>[ 246.793972] WARNING: possible circular locking dependency detected <4>[ 246.793989] 5.3.0-gbd6c56f50d15-drmtip_372+ #1 Tainted: G U <4>[ 246.794003] ------------------------------------------------------ <4>[ 246.794017] kswapd0/145 is trying to acquire lock: <4>[ 246.794030] 000000003f565be6 (&dev->struct_mutex/1){+.+.}, at: userptr_mn_invalidate_range_start+0x18f/0x220 [i915] <4>[ 246.794250] but task is already holding lock: <4>[ 246.794263] 000000001799cef9 (&anon_vma->rwsem){++++}, at: page_lock_anon_vma_read+0xe6/0x2a0 <4>[ 246.794291] which lock already depends on the new lock. <4>[ 246.794307] the existing dependency chain (in reverse order) is: <4>[ 246.794322] -> #3 (&anon_vma->rwsem){++++}: <4>[ 246.794344] down_write+0x33/0x70 <4>[ 246.794357] __vma_adjust+0x3d9/0x7b0 <4>[ 246.794370] __split_vma+0x16a/0x180 <4>[ 246.794385] mprotect_fixup+0x2a5/0x320 <4>[ 246.794399] do_mprotect_pkey+0x208/0x2e0 <4>[ 246.794413] __x64_sys_mprotect+0x16/0x20 <4>[ 246.794429] do_syscall_64+0x55/0x1c0 <4>[ 246.794443] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 246.794456] -> #2 (&mapping->i_mmap_rwsem){++++}: <4>[ 246.794478] down_write+0x33/0x70 <4>[ 246.794493] unmap_mapping_pages+0x48/0x130 <4>[ 246.794519] i915_vma_revoke_mmap+0x81/0x1b0 [i915] <4>[ 246.794519] i915_vma_unbind+0x11d/0x4a0 [i915] <4>[ 246.794519] i915_vma_destroy+0x31/0x300 [i915] <4>[ 246.794519] __i915_gem_free_objects+0xb8/0x4b0 [i915] <4>[ 246.794519] drm_file_free.part.0+0x1e6/0x290 <4>[ 246.794519] drm_release+0xa6/0xe0 <4>[ 246.794519] __fput+0xc2/0x250 <4>[ 246.794519] task_work_run+0x82/0xb0 <4>[ 246.794519] do_exit+0x35b/0xdb0 <4>[ 246.794519] do_group_exit+0x34/0xb0 <4>[ 246.794519] __x64_sys_exit_group+0xf/0x10 <4>[ 246.794519] do_syscall_64+0x55/0x1c0 <4>[ 246.794519] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 246.794519] -> #1 (&vm->mutex){+.+.}: <4>[ 246.794519] i915_gem_shrinker_taints_mutex+0x6d/0xe0 [i915] <4>[ 246.794519] i915_address_space_init+0x9f/0x160 [i915] <4>[ 246.794519] i915_ggtt_init_hw+0x55/0x170 [i915] <4>[ 246.794519] i915_driver_probe+0xc9f/0x1620 [i915] <4>[ 246.794519] i915_pci_probe+0x43/0x1b0 [i915] <4>[ 246.794519] pci_device_probe+0x9e/0x120 <4>[ 246.794519] really_probe+0xea/0x3d0 <4>[ 246.794519] driver_probe_device+0x10b/0x120 <4>[ 246.794519] device_driver_attach+0x4a/0x50 <4>[ 246.794519] __driver_attach+0x97/0x130 <4>[ 246.794519] bus_for_each_dev+0x74/0xc0 <4>[ 246.794519] bus_add_driver+0x13f/0x210 <4>[ 246.794519] driver_register+0x56/0xe0 <4>[ 246.794519] do_one_initcall+0x58/0x300 <4>[ 246.794519] do_init_module+0x56/0x1f6 <4>[ 246.794519] load_module+0x25bd/0x2a40 <4>[ 246.794519] __se_sys_finit_module+0xd3/0xf0 <4>[ 246.794519] do_syscall_64+0x55/0x1c0 <4>[ 246.794519] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 246.794519] -> #0 (&dev->struct_mutex/1){+.+.}: <4>[ 246.794519] __lock_acquire+0x15d8/0x1e90 <4>[ 246.794519] lock_acquire+0xa6/0x1c0 <4>[ 246.794519] __mutex_lock+0x9d/0x9b0 <4>[ 246.794519] userptr_mn_invalidate_range_start+0x18f/0x220 [i915] <4>[ 246.794519] __mmu_notifier_invalidate_range_start+0x85/0x110 <4>[ 246.794519] try_to_unmap_one+0x76b/0x860 <4>[ 246.794519] rmap_walk_anon+0x104/0x280 <4>[ 246.794519] try_to_unmap+0xc0/0xf0 <4>[ 246.794519] shrink_page_list+0x561/0xc10 <4>[ 246.794519] shrink_inactive_list+0x220/0x440 <4>[ 246.794519] shrink_node_memcg+0x36e/0x740 <4>[ 246.794519] shrink_node+0xcb/0x490 <4>[ 246.794519] balance_pgdat+0x241/0x580 <4>[ 246.794519] kswapd+0x16c/0x530 <4>[ 246.794519] kthread+0x119/0x130 <4>[ 246.794519] ret_from_fork+0x24/0x50 <4>[ 246.794519] other info that might help us debug this: <4>[ 246.794519] Chain exists of: &dev->struct_mutex/1 --> &mapping->i_mmap_rwsem --> &anon_vma->rwsem <4>[ 246.794519] Possible unsafe locking scenario: <4>[ 246.794519] CPU0 CPU1 <4>[ 246.794519] ---- ---- <4>[ 246.794519] lock(&anon_vma->rwsem); <4>[ 246.794519] lock(&mapping->i_mmap_rwsem); <4>[ 246.794519] lock(&anon_vma->rwsem); <4>[ 246.794519] lock(&dev->struct_mutex/1); <4>[ 246.794519] *** DEADLOCK *** v2: Say no to mmap_ioctl Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111744 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111870 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniel Vetter Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190928082546.3473-1-chris@chris-wilson.co.uk (cherry picked from commit a4311745bba9763e3c965643d4531bd5765b0513) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 7 +++++++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 6 ++++++ drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 3 ++- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 1 + drivers/gpu/drm/i915/i915_gem.c | 3 +++ 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 91051e178021..05289edbafe3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -364,6 +364,7 @@ err: return VM_FAULT_OOM; case -ENOSPC: case -EFAULT: + case -ENODEV: /* bad object, how did you get here! */ return VM_FAULT_SIGBUS; default: WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); @@ -475,10 +476,16 @@ i915_gem_mmap_gtt(struct drm_file *file, if (!obj) return -ENOENT; + if (i915_gem_object_never_bind_ggtt(obj)) { + ret = -ENODEV; + goto out; + } + ret = create_mmap_offset(obj); if (ret == 0) *offset = drm_vma_node_offset_addr(&obj->base.vma_node); +out: i915_gem_object_put(obj); return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 5efb9936e05b..ddf3605bea8e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -152,6 +152,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY; } +static inline bool +i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_NO_GGTT; +} + static inline bool i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index ede0eb4218a8..646859fea224 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -32,7 +32,8 @@ struct drm_i915_gem_object_ops { #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) #define I915_GEM_OBJECT_IS_PROXY BIT(2) -#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3) +#define I915_GEM_OBJECT_NO_GGTT BIT(3) +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(4) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 11b231c187c5..6b3b50f0f6d9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -702,6 +702,7 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_NO_GGTT | I915_GEM_OBJECT_ASYNC_CANCEL, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 95e7c52cf8ed..d0f94f239919 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -969,6 +969,9 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); + if (i915_gem_object_never_bind_ggtt(obj)) + return ERR_PTR(-ENODEV); + if (flags & PIN_MAPPABLE && (!view || view->type == I915_GGTT_VIEW_NORMAL)) { /* If the required space is larger than the available From 0a544a2a728e2e33bb7fc38dd542ecb90ee393eb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Sep 2019 16:28:42 +0100 Subject: [PATCH 372/572] drm/i915: Fixup preempt-to-busy vs resubmission of a virtual request As preempt-to-busy leaves the request on the HW as the resubmission is processed, that request may complete in the background and even cause a second virtual request to enter queue. This second virtual request breaks our "single request in the virtual pipeline" assumptions. Furthermore, as the virtual request may be completed and retired, we lose the reference the virtual engine assumes is held. Normally, just removing the request from the scheduler queue removes it from the engine, but the virtual engine keeps track of its singleton request via its ve->request. This pointer needs protecting with a reference. v2: Drop unnecessary motion of rq->engine = owner Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190923152844.8914-1-chris@chris-wilson.co.uk (cherry picked from commit b647c7df01b75761b4c0b1cb6f4841088c0b1121) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 32 +++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 46005986e95f..06a506c29463 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1243,6 +1243,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) submit = true; last = rq; } + i915_request_put(rq); /* * Hmm, we have a bunch of virtual engine requests, @@ -2613,6 +2614,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) rq->engine = engine; __i915_request_submit(rq); + i915_request_put(rq); ve->base.execlists.queue_priority_hint = INT_MIN; } @@ -3618,6 +3620,8 @@ submit_engine: static void virtual_submit_request(struct i915_request *rq) { struct virtual_engine *ve = to_virtual_engine(rq->engine); + struct i915_request *old; + unsigned long flags; GEM_TRACE("%s: rq=%llx:%lld\n", ve->base.name, @@ -3626,15 +3630,31 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); - GEM_BUG_ON(ve->request); - GEM_BUG_ON(!list_empty(virtual_queue(ve))); + spin_lock_irqsave(&ve->base.active.lock, flags); - ve->base.execlists.queue_priority_hint = rq_prio(rq); - WRITE_ONCE(ve->request, rq); + old = ve->request; + if (old) { /* background completion event from preempt-to-busy */ + GEM_BUG_ON(!i915_request_completed(old)); + __i915_request_submit(old); + i915_request_put(old); + } - list_move_tail(&rq->sched.link, virtual_queue(ve)); + if (i915_request_completed(rq)) { + __i915_request_submit(rq); - tasklet_schedule(&ve->base.execlists.tasklet); + ve->base.execlists.queue_priority_hint = INT_MIN; + ve->request = NULL; + } else { + ve->base.execlists.queue_priority_hint = rq_prio(rq); + ve->request = i915_request_get(rq); + + GEM_BUG_ON(!list_empty(virtual_queue(ve))); + list_move_tail(&rq->sched.link, virtual_queue(ve)); + + tasklet_schedule(&ve->base.execlists.tasklet); + } + + spin_unlock_irqrestore(&ve->base.active.lock, flags); } static struct ve_bond * From 2ca4f6ca4562594ef161e4140c2a5e0e5282967b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Oct 2019 06:04:38 -0700 Subject: [PATCH 373/572] rxrpc: use rcu protection while reading sk->sk_user_data We need to extend the rcu_read_lock() section in rxrpc_error_report() and use rcu_dereference_sk_user_data() instead of plain access to sk->sk_user_data to make sure all rules are respected. The compiler wont reload sk->sk_user_data at will, and RCU rules prevent memory beeing freed too soon. Fixes: f0308fb07080 ("rxrpc: Fix possible NULL pointer access in ICMP handling") Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: Eric Dumazet Cc: David Howells Signed-off-by: David S. Miller --- net/rxrpc/peer_event.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 61451281d74a..48f67a9b1037 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -147,13 +147,16 @@ void rxrpc_error_report(struct sock *sk) { struct sock_exterr_skb *serr; struct sockaddr_rxrpc srx; - struct rxrpc_local *local = sk->sk_user_data; + struct rxrpc_local *local; struct rxrpc_peer *peer; struct sk_buff *skb; - if (unlikely(!local)) + rcu_read_lock(); + local = rcu_dereference_sk_user_data(sk); + if (unlikely(!local)) { + rcu_read_unlock(); return; - + } _enter("%p{%d}", sk, local->debug_id); /* Clear the outstanding error value on the socket so that it doesn't @@ -163,6 +166,7 @@ void rxrpc_error_report(struct sock *sk) skb = sock_dequeue_err_skb(sk); if (!skb) { + rcu_read_unlock(); _leave("UDP socket errqueue empty"); return; } @@ -170,11 +174,11 @@ void rxrpc_error_report(struct sock *sk) serr = SKB_EXT_ERR(skb); if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); + rcu_read_unlock(); rxrpc_free_skb(skb, rxrpc_skb_freed); return; } - rcu_read_lock(); peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx); if (peer && !rxrpc_get_peer_maybe(peer)) peer = NULL; From 3de5ae54712c75cf3c517a288e0a704784ec6cf5 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Wed, 16 Oct 2019 10:30:39 +0800 Subject: [PATCH 374/572] net: phy: Fix "link partner" information disappear issue Some drivers just call phy_ethtool_ksettings_set() to set the links, for those phy drivers that use genphy_read_status(), if autoneg is on, and the link is up, than execute "ethtool -s ethx autoneg on" will cause "link partner" information disappear. The call trace is phy_ethtool_ksettings_set()->phy_start_aneg() ->linkmode_zero(phydev->lp_advertising)->genphy_read_status(), the link didn't change, so genphy_read_status() just return, and phydev->lp_advertising is zero now. This patch moves the clear operation of lp_advertising from phy_start_aneg() to genphy_read_lpa()/genphy_c45_read_lpa(), and if autoneg on and autoneg not complete, just clear what the generic functions care about. Fixes: 88d6272acaaa ("net: phy: avoid unneeded MDIO reads in genphy_read_status") Signed-off-by: Yonglong Liu Reviewed-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 2 ++ drivers/net/phy/phy.c | 3 --- drivers/net/phy/phy_device.c | 11 ++++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 7935593debb1..a1caeee12236 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -323,6 +323,8 @@ int genphy_c45_read_pma(struct phy_device *phydev) { int val; + linkmode_zero(phydev->lp_advertising); + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1); if (val < 0) return val; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 119e6f466056..105d389b58e7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -572,9 +572,6 @@ int phy_start_aneg(struct phy_device *phydev) if (AUTONEG_DISABLE == phydev->autoneg) phy_sanitize_settings(phydev); - /* Invalidate LP advertising flags */ - linkmode_zero(phydev->lp_advertising); - err = phy_config_aneg(phydev); if (err < 0) goto out_unlock; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 9d2bbb13293e..adb66a2fae18 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1787,7 +1787,14 @@ int genphy_read_lpa(struct phy_device *phydev) { int lpa, lpagb; - if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { + if (phydev->autoneg == AUTONEG_ENABLE) { + if (!phydev->autoneg_complete) { + mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, + 0); + mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, 0); + return 0; + } + if (phydev->is_gigabit_capable) { lpagb = phy_read(phydev, MII_STAT1000); if (lpagb < 0) @@ -1815,6 +1822,8 @@ int genphy_read_lpa(struct phy_device *phydev) return lpa; mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa); + } else { + linkmode_zero(phydev->lp_advertising); } return 0; From e497c20e203680aba9ccf7bb475959595908ca7e Mon Sep 17 00:00:00 2001 From: Biao Huang Date: Tue, 15 Oct 2019 11:24:44 +0800 Subject: [PATCH 375/572] net: stmmac: disable/enable ptp_ref_clk in suspend/resume flow disable ptp_ref_clk in suspend flow, and enable it in resume flow. Fixes: f573c0b9c4e0 ("stmmac: move stmmac_clk, pclk, clk_ptp_ref and stmmac_rst to platform structure") Signed-off-by: Biao Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c76a1336a451..8fa13d43b5bc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4742,8 +4742,10 @@ int stmmac_suspend(struct device *dev) stmmac_mac_set(priv, priv->ioaddr, false); pinctrl_pm_select_sleep_state(priv->device); /* Disable clock in case of PWM is off */ - clk_disable(priv->plat->pclk); - clk_disable(priv->plat->stmmac_clk); + if (priv->plat->clk_ptp_ref) + clk_disable_unprepare(priv->plat->clk_ptp_ref); + clk_disable_unprepare(priv->plat->pclk); + clk_disable_unprepare(priv->plat->stmmac_clk); } mutex_unlock(&priv->lock); @@ -4806,8 +4808,10 @@ int stmmac_resume(struct device *dev) } else { pinctrl_pm_select_default_state(priv->device); /* enable the clk previously disabled */ - clk_enable(priv->plat->stmmac_clk); - clk_enable(priv->plat->pclk); + clk_prepare_enable(priv->plat->stmmac_clk); + clk_prepare_enable(priv->plat->pclk); + if (priv->plat->clk_ptp_ref) + clk_prepare_enable(priv->plat->clk_ptp_ref); /* reset the phy so that it's ready */ if (priv->mii) stmmac_mdio_reset(priv->mii); From 700dea5a0bea9f64eba89fae7cb2540326fdfdc1 Mon Sep 17 00:00:00 2001 From: Dmitry Goldin Date: Wed, 9 Oct 2019 13:42:14 +0000 Subject: [PATCH 376/572] kheaders: substituting --sort in archive creation The option --sort=ORDER was only introduced in tar 1.28 (2014), which is rather new and might not be available in some setups. This patch tries to replicate the previous behaviour as closely as possible to fix the kheaders build for older environments. It does not produce identical archives compared to the previous version due to minor sorting differences but produces reproducible results itself in my tests. Reported-by: Andreas Schwab Signed-off-by: Dmitry Goldin Tested-by: Andreas Schwab Tested-by: Quentin Perret Signed-off-by: Masahiro Yamada --- kernel/gen_kheaders.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index aff79e461fc9..5a0fc0b0403a 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -71,10 +71,13 @@ done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1 find $cpio_dir -type f -print0 | xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;' -# Create archive and try to normalize metadata for reproducibility -tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ - --owner=0 --group=0 --sort=name --numeric-owner \ - -Jcf $tarfile -C $cpio_dir/ . > /dev/null +# Create archive and try to normalize metadata for reproducibility. +# For compatibility with older versions of tar, files are fed to tar +# pre-sorted, as --sort=name might not be available. +find $cpio_dir -printf "./%P\n" | LC_ALL=C sort | \ + tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ + --owner=0 --group=0 --numeric-owner --no-recursion \ + -Jcf $tarfile -C $cpio_dir/ -T - > /dev/null echo "$src_files_md5" > kernel/kheaders.md5 echo "$obj_files_md5" >> kernel/kheaders.md5 From 7571b6a17fcc5e4f6903f065a82d0e38011346ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szabolcs=20Sz=C5=91ke?= Date: Fri, 11 Oct 2019 19:19:36 +0200 Subject: [PATCH 377/572] ALSA: usb-audio: Disable quirks for BOSS Katana amplifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BOSS Katana amplifiers cannot be used for recording or playback if quirks are applied BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=195223 Signed-off-by: Szabolcs Szőke Cc: Link: https://lore.kernel.org/r/20191011171937.8013-1-szszoke.code@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 33cd26763c0e..ff5ab24f3bd1 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -348,6 +348,9 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ep = 0x84; ifnum = 0; goto add_sync_ep_from_ifnum; + case USB_ID(0x0582, 0x01d8): /* BOSS Katana */ + /* BOSS Katana amplifiers do not need quirks */ + return 0; } if (attr == USB_ENDPOINT_SYNC_ASYNC && From 8c8967a7dc01a25f57a0757fdca10987773cd1f2 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 17 Oct 2019 16:15:01 +0800 Subject: [PATCH 378/572] ALSA: hda/realtek - Enable headset mic on Asus MJ401TA On Asus MJ401TA (with Realtek ALC256), the headset mic is connected to pin 0x19, with default configuration value 0x411111f0 (indicating no physical connection). Enable this by quirking the pin. Mic jack detection was also tested and found to be working. This enables use of the headset mic on this product. Signed-off-by: Daniel Drake Cc: Link: https://lore.kernel.org/r/20191017081501.17135-1-drake@endlessm.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b5c225a56b98..ce4f11659765 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5881,6 +5881,7 @@ enum { ALC225_FIXUP_WYSE_AUTO_MUTE, ALC225_FIXUP_WYSE_DISABLE_MIC_VREF, ALC286_FIXUP_ACER_AIO_HEADSET_MIC, + ALC256_FIXUP_ASUS_HEADSET_MIC, ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, ALC299_FIXUP_PREDATOR_SPK, ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC, @@ -6930,6 +6931,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE }, + [ALC256_FIXUP_ASUS_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a11020 }, /* headset mic with jack detect */ + { } + }, + .chained = true, + .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE + }, [ALC256_FIXUP_ASUS_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -7126,6 +7136,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), From 1e72e673b9d102ff2e8333e74b3308d012ddf75b Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 14 Oct 2019 18:19:18 +0100 Subject: [PATCH 379/572] EDAC/ghes: Fix Use after free in ghes_edac remove path ghes_edac models a single logical memory controller, and uses a global ghes_init variable to ensure only the first ghes_edac_register() will do anything. ghes_edac is registered the first time a GHES entry in the HEST is probed. There may be multiple entries, so subsequent attempts to register ghes_edac are silently ignored as the work has already been done. When a GHES entry is unregistered, it calls ghes_edac_unregister(), which free()s the memory behind the global variables in ghes_edac. But there may be multiple GHES entries, the next call to ghes_edac_unregister() will dereference the free()d memory, and attempt to free it a second time. This may also be triggered on a platform with one GHES entry, if the driver is unbound/re-bound and unbound. The re-bind step will do nothing because of ghes_init, the second unbind will then do the same work as the first. Doing the unregister work on the first call is unsafe, as another CPU may be processing a notification in ghes_edac_report_mem_error(), using the memory we are about to free. ghes_init is already half of the reference counting. We only need to do the register work for the first call, and the unregister work for the last. Add the unregister check. This means we no longer free ghes_edac's memory while there are GHES entries that may receive a notification. This was detected by KASAN and DEBUG_TEST_DRIVER_REMOVE. [ bp: merge into a single patch. ] Fixes: 0fe5f281f749 ("EDAC, ghes: Model a single, logical memory controller") Reported-by: John Garry Signed-off-by: James Morse Signed-off-by: Borislav Petkov Cc: linux-edac Cc: Mauro Carvalho Chehab Cc: Robert Richter Cc: Tony Luck Cc: Link: https://lkml.kernel.org/r/20191014171919.85044-2-james.morse@arm.com Link: https://lkml.kernel.org/r/304df85b-8b56-b77e-1a11-aa23769f2e7c@huawei.com --- drivers/edac/ghes_edac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index d413a0bdc9ad..0bb62857ffb2 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -553,7 +553,11 @@ void ghes_edac_unregister(struct ghes *ghes) if (!ghes_pvt) return; + if (atomic_dec_return(&ghes_init)) + return; + mci = ghes_pvt->mci; + ghes_pvt = NULL; edac_mc_del_mc(mci->pdev); edac_mc_free(mci); } From b1fc5833357524d5d342737913dbe32ff3557bc5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 7 Oct 2019 11:45:36 +0100 Subject: [PATCH 380/572] stop_machine: Avoid potential race behaviour Both multi_cpu_stop() and set_state() access multi_stop_data::state racily using plain accesses. These are subject to compiler transformations which could break the intended behaviour of the code, and this situation is detected by KCSAN on both arm64 and x86 (splats below). Improve matters by using READ_ONCE() and WRITE_ONCE() to ensure that the compiler cannot elide, replay, or tear loads and stores. In multi_cpu_stop() the two loads of multi_stop_data::state are expected to be a consistent value, so snapshot the value into a temporary variable to ensure this. The state transitions are serialized by atomic manipulation of multi_stop_data::num_threads, and other fields in multi_stop_data are not modified while subject to concurrent reads. KCSAN splat on arm64: | BUG: KCSAN: data-race in multi_cpu_stop+0xa8/0x198 and set_state+0x80/0xb0 | | write to 0xffff00001003bd00 of 4 bytes by task 24 on cpu 3: | set_state+0x80/0xb0 | multi_cpu_stop+0x16c/0x198 | cpu_stopper_thread+0x170/0x298 | smpboot_thread_fn+0x40c/0x560 | kthread+0x1a8/0x1b0 | ret_from_fork+0x10/0x18 | | read to 0xffff00001003bd00 of 4 bytes by task 14 on cpu 1: | multi_cpu_stop+0xa8/0x198 | cpu_stopper_thread+0x170/0x298 | smpboot_thread_fn+0x40c/0x560 | kthread+0x1a8/0x1b0 | ret_from_fork+0x10/0x18 | | Reported by Kernel Concurrency Sanitizer on: | CPU: 1 PID: 14 Comm: migration/1 Not tainted 5.3.0-00007-g67ab35a199f4-dirty #3 | Hardware name: linux,dummy-virt (DT) KCSAN splat on x86: | write to 0xffffb0bac0013e18 of 4 bytes by task 19 on cpu 2: | set_state kernel/stop_machine.c:170 [inline] | ack_state kernel/stop_machine.c:177 [inline] | multi_cpu_stop+0x1a4/0x220 kernel/stop_machine.c:227 | cpu_stopper_thread+0x19e/0x280 kernel/stop_machine.c:516 | smpboot_thread_fn+0x1a8/0x300 kernel/smpboot.c:165 | kthread+0x1b5/0x200 kernel/kthread.c:255 | ret_from_fork+0x35/0x40 arch/x86/entry/entry_64.S:352 | | read to 0xffffb0bac0013e18 of 4 bytes by task 44 on cpu 7: | multi_cpu_stop+0xb4/0x220 kernel/stop_machine.c:213 | cpu_stopper_thread+0x19e/0x280 kernel/stop_machine.c:516 | smpboot_thread_fn+0x1a8/0x300 kernel/smpboot.c:165 | kthread+0x1b5/0x200 kernel/kthread.c:255 | ret_from_fork+0x35/0x40 arch/x86/entry/entry_64.S:352 | | Reported by Kernel Concurrency Sanitizer on: | CPU: 7 PID: 44 Comm: migration/7 Not tainted 5.3.0+ #1 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Signed-off-by: Mark Rutland Signed-off-by: Thomas Gleixner Acked-by: Marco Elver Link: https://lkml.kernel.org/r/20191007104536.27276-1-mark.rutland@arm.com --- kernel/stop_machine.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index c7031a22aa7b..998d50ee2d9b 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -7,6 +7,7 @@ * Copyright (C) 2010 SUSE Linux Products GmbH * Copyright (C) 2010 Tejun Heo */ +#include #include #include #include @@ -167,7 +168,7 @@ static void set_state(struct multi_stop_data *msdata, /* Reset ack counter. */ atomic_set(&msdata->thread_ack, msdata->num_threads); smp_wmb(); - msdata->state = newstate; + WRITE_ONCE(msdata->state, newstate); } /* Last one to ack a state moves to the next state. */ @@ -186,7 +187,7 @@ void __weak stop_machine_yield(const struct cpumask *cpumask) static int multi_cpu_stop(void *data) { struct multi_stop_data *msdata = data; - enum multi_stop_state curstate = MULTI_STOP_NONE; + enum multi_stop_state newstate, curstate = MULTI_STOP_NONE; int cpu = smp_processor_id(), err = 0; const struct cpumask *cpumask; unsigned long flags; @@ -210,8 +211,9 @@ static int multi_cpu_stop(void *data) do { /* Chill out and ensure we re-read multi_stop_state. */ stop_machine_yield(cpumask); - if (msdata->state != curstate) { - curstate = msdata->state; + newstate = READ_ONCE(msdata->state); + if (newstate != curstate) { + curstate = newstate; switch (curstate) { case MULTI_STOP_DISABLE_IRQ: local_irq_disable(); From fd2b007eaec898564e269d1f478a2da0380ecf51 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 17 Oct 2019 10:38:36 +0800 Subject: [PATCH 381/572] btrfs: tracepoints: Fix wrong parameter order for qgroup events [BUG] For btrfs:qgroup_meta_reserve event, the trace event can output garbage: qgroup_meta_reserve: 9c7f6acc-b342-4037-bc47-7f6e4d2232d7: refroot=5(FS_TREE) type=DATA diff=2 The diff should always be alinged to sector size (4k), so there is definitely something wrong. [CAUSE] For the wrong @diff, it's caused by wrong parameter order. The correct parameters are: struct btrfs_root, s64 diff, int type. However the parameters used are: struct btrfs_root, int type, s64 diff. Fixes: 4ee0d8832c2e ("btrfs: qgroup: Update trace events for metadata reservation") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index c4bb69941c77..3ad151655eb8 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3629,7 +3629,7 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, return 0; BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); - trace_qgroup_meta_reserve(root, type, (s64)num_bytes); + trace_qgroup_meta_reserve(root, (s64)num_bytes, type); ret = qgroup_reserve(root, num_bytes, enforce, type); if (ret < 0) return ret; @@ -3676,7 +3676,7 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, */ num_bytes = sub_root_meta_rsv(root, num_bytes, type); BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); - trace_qgroup_meta_reserve(root, type, -(s64)num_bytes); + trace_qgroup_meta_reserve(root, -(s64)num_bytes, type); btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, num_bytes, type); } From 1b2442b4ae0f234daeadd90e153b466332c466d8 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 17 Oct 2019 10:38:37 +0800 Subject: [PATCH 382/572] btrfs: tracepoints: Fix bad entry members of qgroup events [BUG] For btrfs:qgroup_meta_reserve event, the trace event can output garbage: qgroup_meta_reserve: 9c7f6acc-b342-4037-bc47-7f6e4d2232d7: refroot=5(FS_TREE) type=DATA diff=2 qgroup_meta_reserve: 9c7f6acc-b342-4037-bc47-7f6e4d2232d7: refroot=5(FS_TREE) type=0x258792 diff=2 The @type can be completely garbage, as DATA type is not possible for trace_qgroup_meta_reserve() trace event. [CAUSE] Ther are several problems related to qgroup trace events: - Unassigned entry member Member entry::type of trace_qgroup_update_reserve() and trace_qgourp_meta_reserve() is not assigned - Redundant entry member Member entry::type is completely useless in trace_qgroup_meta_convert() Fixes: 4ee0d8832c2e ("btrfs: qgroup: Update trace events for metadata reservation") CC: stable@vger.kernel.org # 4.10+ Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 5df604de4f11..75ae1899452b 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1688,6 +1688,7 @@ TRACE_EVENT(qgroup_update_reserve, __entry->qgid = qgroup->qgroupid; __entry->cur_reserved = qgroup->rsv.values[type]; __entry->diff = diff; + __entry->type = type; ), TP_printk_btrfs("qgid=%llu type=%s cur_reserved=%llu diff=%lld", @@ -1710,6 +1711,7 @@ TRACE_EVENT(qgroup_meta_reserve, TP_fast_assign_btrfs(root->fs_info, __entry->refroot = root->root_key.objectid; __entry->diff = diff; + __entry->type = type; ), TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld", @@ -1726,7 +1728,6 @@ TRACE_EVENT(qgroup_meta_convert, TP_STRUCT__entry_btrfs( __field( u64, refroot ) __field( s64, diff ) - __field( int, type ) ), TP_fast_assign_btrfs(root->fs_info, From 45d02f79b539073b76077836871de6b674e36eb4 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 16 Oct 2019 17:01:18 +0200 Subject: [PATCH 383/572] binder: Don't modify VMA bounds in ->mmap handler binder_mmap() tries to prevent the creation of overly big binder mappings by silently truncating the size of the VMA to 4MiB. However, this violates the API contract of mmap(). If userspace attempts to create a large binder VMA, and later attempts to unmap that VMA, it will call munmap() on a range beyond the end of the VMA, which may have been allocated to another VMA in the meantime. This can lead to userspace memory corruption. The following sequence of calls leads to a segfault without this commit: int main(void) { int binder_fd = open("/dev/binder", O_RDWR); if (binder_fd == -1) err(1, "open binder"); void *binder_mapping = mmap(NULL, 0x800000UL, PROT_READ, MAP_SHARED, binder_fd, 0); if (binder_mapping == MAP_FAILED) err(1, "mmap binder"); void *data_mapping = mmap(NULL, 0x400000UL, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (data_mapping == MAP_FAILED) err(1, "mmap data"); munmap(binder_mapping, 0x800000UL); *(char*)data_mapping = 1; return 0; } Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Acked-by: Todd Kjos Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20191016150119.154756-1-jannh@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 7 ------- drivers/android/binder_alloc.c | 6 ++++-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 5b9ac2122e89..265d9dd46a5e 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -97,10 +97,6 @@ DEFINE_SHOW_ATTRIBUTE(proc); #define SZ_1K 0x400 #endif -#ifndef SZ_4M -#define SZ_4M 0x400000 -#endif - #define FORBIDDEN_MMAP_FLAGS (VM_WRITE) enum { @@ -5177,9 +5173,6 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) if (proc->tsk != current->group_leader) return -EINVAL; - if ((vma->vm_end - vma->vm_start) > SZ_4M) - vma->vm_end = vma->vm_start + SZ_4M; - binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d %lx-%lx (%ld K) vma %lx pagep %lx\n", __func__, proc->pid, vma->vm_start, vma->vm_end, diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index d42a8b2f636a..eb76a823fbb2 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "binder_alloc.h" #include "binder_trace.h" @@ -689,7 +690,9 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, alloc->buffer = (void __user *)vma->vm_start; mutex_unlock(&binder_alloc_mmap_lock); - alloc->pages = kcalloc((vma->vm_end - vma->vm_start) / PAGE_SIZE, + alloc->buffer_size = min_t(unsigned long, vma->vm_end - vma->vm_start, + SZ_4M); + alloc->pages = kcalloc(alloc->buffer_size / PAGE_SIZE, sizeof(alloc->pages[0]), GFP_KERNEL); if (alloc->pages == NULL) { @@ -697,7 +700,6 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, failure_string = "alloc page array"; goto err_alloc_pages_failed; } - alloc->buffer_size = vma->vm_end - vma->vm_start; buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); if (!buffer) { From 564b6bb9d42d31fc80c006658cf38940a9b99616 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 30 Aug 2019 13:22:02 +0300 Subject: [PATCH 384/572] ARM: davinci: dm365: Fix McBSP dma_slave_map entry dm365 have only single McBSP, so the device name is without .0 Fixes: 0c750e1fe481d ("ARM: davinci: dm365: Add dma_slave_map to edma") Signed-off-by: Peter Ujfalusi Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm365.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 8062412be70f..9fc5c73cc0be 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -462,8 +462,8 @@ static s8 dm365_queue_priority_mapping[][2] = { }; static const struct dma_slave_map dm365_edma_map[] = { - { "davinci-mcbsp.0", "tx", EDMA_FILTER_PARAM(0, 2) }, - { "davinci-mcbsp.0", "rx", EDMA_FILTER_PARAM(0, 3) }, + { "davinci-mcbsp", "tx", EDMA_FILTER_PARAM(0, 2) }, + { "davinci-mcbsp", "rx", EDMA_FILTER_PARAM(0, 3) }, { "davinci_voicecodec", "tx", EDMA_FILTER_PARAM(0, 2) }, { "davinci_voicecodec", "rx", EDMA_FILTER_PARAM(0, 3) }, { "spi_davinci.2", "tx", EDMA_FILTER_PARAM(0, 10) }, From 7b21483ccbef1b274a238c0653a03d778b21fbd7 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 22 Jul 2019 15:44:16 +0200 Subject: [PATCH 385/572] ARM: davinci_all_defconfig: enable GPIO backlight DA850 EVM has been converted to use GPIO backlight device for display backlight GPIO control. Enable the GPIO backlight module in davinci_all_defconfig to keep backlight support working. Signed-off-by: Bartosz Golaszewski [nsekhar@ti.com: edits to commit message for context] Signed-off-by: Sekhar Nori --- arch/arm/configs/davinci_all_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index b34970ce6b31..2fbc11116d15 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -167,6 +167,7 @@ CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_DA8XX=y CONFIG_BACKLIGHT_PWM=m +CONFIG_BACKLIGHT_GPIO=m CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m From 13bd677a472d534bf100bab2713efc3f9e3f5978 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 16 Oct 2019 09:21:50 -0400 Subject: [PATCH 386/572] dm cache: fix bugs when a GFP_NOWAIT allocation fails GFP_NOWAIT allocation can fail anytime - it doesn't wait for memory being available and it fails if the mempool is exhausted and there is not enough memory. If we go down this path: map_bio -> mg_start -> alloc_migration -> mempool_alloc(GFP_NOWAIT) we can see that map_bio() doesn't check the return value of mg_start(), and the bio is leaked. If we go down this path: map_bio -> mg_start -> mg_lock_writes -> alloc_prison_cell -> dm_bio_prison_alloc_cell_v2 -> mempool_alloc(GFP_NOWAIT) -> mg_lock_writes -> mg_complete the bio is ended with an error - it is unacceptable because it could cause filesystem corruption if the machine ran out of memory temporarily. Change GFP_NOWAIT to GFP_NOIO, so that the mempool code will properly wait until memory becomes available. mempool_alloc with GFP_NOIO can't fail, so remove the code paths that deal with allocation failure. Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index d249cf8ac277..8346e6d1816c 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -542,7 +542,7 @@ static void wake_migration_worker(struct cache *cache) static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache) { - return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT); + return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO); } static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell) @@ -554,9 +554,7 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache) { struct dm_cache_migration *mg; - mg = mempool_alloc(&cache->migration_pool, GFP_NOWAIT); - if (!mg) - return NULL; + mg = mempool_alloc(&cache->migration_pool, GFP_NOIO); memset(mg, 0, sizeof(*mg)); @@ -664,10 +662,6 @@ static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bi struct dm_bio_prison_cell_v2 *cell_prealloc, *cell; cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */ - if (!cell_prealloc) { - defer_bio(cache, bio); - return false; - } build_key(oblock, end, &key); r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell); @@ -1493,11 +1487,6 @@ static int mg_lock_writes(struct dm_cache_migration *mg) struct dm_bio_prison_cell_v2 *prealloc; prealloc = alloc_prison_cell(cache); - if (!prealloc) { - DMERR_LIMIT("%s: alloc_prison_cell failed", cache_device_name(cache)); - mg_complete(mg, false); - return -ENOMEM; - } /* * Prevent writes to the block, but allow reads to continue. @@ -1535,11 +1524,6 @@ static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio } mg = alloc_migration(cache); - if (!mg) { - policy_complete_background_work(cache->policy, op, false); - background_work_end(cache); - return -ENOMEM; - } mg->op = op; mg->overwrite_bio = bio; @@ -1628,10 +1612,6 @@ static int invalidate_lock(struct dm_cache_migration *mg) struct dm_bio_prison_cell_v2 *prealloc; prealloc = alloc_prison_cell(cache); - if (!prealloc) { - invalidate_complete(mg, false); - return -ENOMEM; - } build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key); r = dm_cell_lock_v2(cache->prison, &key, @@ -1669,10 +1649,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock, return -EPERM; mg = alloc_migration(cache); - if (!mg) { - background_work_end(cache); - return -ENOMEM; - } mg->overwrite_bio = bio; mg->invalidate_cblock = cblock; From 94989e318b2f11e217e86bee058088064fa9a2e9 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 17 Oct 2019 17:04:11 +0200 Subject: [PATCH 387/572] ALSA: hda - Force runtime PM on Nvidia HDMI codecs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Przemysław Kopa reports that since commit b516ea586d71 ("PCI: Enable NVIDIA HDA controllers"), the discrete GPU Nvidia GeForce GT 540M on his 2011 Samsung laptop refuses to runtime suspend, resulting in a power regression and excessive heat. Rivera Valdez witnesses the same issue with a GeForce GT 525M (GF108M) of the same era, as does another Arch Linux user named "R0AR" with a more recent GeForce GTX 1050 Ti (GP107M). The commit exposes the discrete GPU's HDA controller and all four codecs on the controller do not set the CLKSTOP and EPSS bits in the Supported Power States Response. They also do not set the PS-ClkStopOk bit in the Get Power State Response. hda_codec_runtime_suspend() therefore does not call snd_hdac_codec_link_down(), which prevents each codec and the PCI device from runtime suspending. The same issue is present on some AMD discrete GPUs and we addressed it by forcing runtime PM despite the bits not being set, see commit 57cb54e53bdd ("ALSA: hda - Force to link down at runtime suspend on ATI/AMD HDMI"). Do the same for Nvidia HDMI codecs. Fixes: b516ea586d71 ("PCI: Enable NVIDIA HDA controllers") Link: https://bbs.archlinux.org/viewtopic.php?pid=1865512 Link: https://bugs.freedesktop.org/show_bug.cgi?id=75985#c81 Reported-by: Przemysław Kopa Reported-by: Rivera Valdez Signed-off-by: Lukas Wunner Cc: Daniel Drake Cc: stable@vger.kernel.org # v5.3+ Link: https://lore.kernel.org/r/3086bc75135c1e3567c5bc4f3cc4ff5cbf7a56c2.1571324194.git.lukas@wunner.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index bca5de78e9ad..795cbda32cbb 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -3474,6 +3474,8 @@ static int patch_nvhdmi(struct hda_codec *codec) nvhdmi_chmap_cea_alloc_validate_get_type; spec->chmap.ops.chmap_validate = nvhdmi_chmap_validate; + codec->link_down_at_suspend = 1; + generic_acomp_init(codec, &nvhdmi_audio_ops, nvhdmi_port2pin); return 0; From 283ea345934d277e30c841c577e0e2142b4bfcae Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 17 Oct 2019 16:22:37 +0200 Subject: [PATCH 388/572] coccinelle: api/devm_platform_ioremap_resource: remove useless script While it is useful for new drivers to use devm_platform_ioremap_resource, this script is currently used to spam maintainers, often updating very old drivers. The net benefit is the removal of 2 lines of code in the driver but the review load for the maintainers is huge. As of now, more that 560 patches have been sent, some of them obviously broken, as in: https://lore.kernel.org/lkml/9bbcce19c777583815c92ce3c2ff2586@www.loen.fr/ Remove the script to reduce the spam. Signed-off-by: Alexandre Belloni Acked-by: Julia Lawall Signed-off-by: Linus Torvalds --- .../api/devm_platform_ioremap_resource.cocci | 60 ------------------- 1 file changed, 60 deletions(-) delete mode 100644 scripts/coccinelle/api/devm_platform_ioremap_resource.cocci diff --git a/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci b/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci deleted file mode 100644 index 56a2e261d61d..000000000000 --- a/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci +++ /dev/null @@ -1,60 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/// Use devm_platform_ioremap_resource helper which wraps -/// platform_get_resource() and devm_ioremap_resource() together. -/// -// Confidence: High -// Copyright: (C) 2019 Himanshu Jha GPLv2. -// Copyright: (C) 2019 Julia Lawall, Inria/LIP6. GPLv2. -// Keywords: platform_get_resource, devm_ioremap_resource, -// Keywords: devm_platform_ioremap_resource - -virtual patch -virtual report - -@r depends on patch && !report@ -expression e1, e2, arg1, arg2, arg3; -identifier id; -@@ - -( -- id = platform_get_resource(arg1, IORESOURCE_MEM, arg2); -| -- struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2); -) - ... when != id -- e1 = devm_ioremap_resource(arg3, id); -+ e1 = devm_platform_ioremap_resource(arg1, arg2); - ... when != id -? id = e2 - -@r1 depends on patch && !report@ -identifier r.id; -type T; -@@ - -- T *id; - ...when != id - -@r2 depends on report && !patch@ -identifier id; -expression e1, e2, arg1, arg2, arg3; -position j0; -@@ - -( - id = platform_get_resource(arg1, IORESOURCE_MEM, arg2); -| - struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2); -) - ... when != id - e1@j0 = devm_ioremap_resource(arg3, id); - ... when != id -? id = e2 - -@script:python depends on report && !patch@ -e1 << r2.e1; -j0 << r2.j0; -@@ - -msg = "WARNING: Use devm_platform_ioremap_resource for %s" % (e1) -coccilib.report.print_report(j0[0], msg) From c7967fc1499beb9b70bb9d33525fb0b384af8883 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 15 Oct 2019 10:54:39 +0100 Subject: [PATCH 389/572] Btrfs: fix qgroup double free after failure to reserve metadata for delalloc If we fail to reserve metadata for delalloc operations we end up releasing the previously reserved qgroup amount twice, once explicitly under the 'out_qgroup' label by calling btrfs_qgroup_free_meta_prealloc() and once again, under label 'out_fail', by calling btrfs_inode_rsv_release() with a value of 'true' for its 'qgroup_free' argument, which results in btrfs_qgroup_free_meta_prealloc() being called again, so we end up having a double free. Also if we fail to reserve the necessary qgroup amount, we jump to the label 'out_fail', which calls btrfs_inode_rsv_release() and that in turns calls btrfs_qgroup_free_meta_prealloc(), even though we weren't able to reserve any qgroup amount. So we freed some amount we never reserved. So fix this by removing the call to btrfs_inode_rsv_release() in the failure path, since it's not necessary at all as we haven't changed the inode's block reserve in any way at this point. Fixes: c8eaeac7b73434 ("btrfs: reserve delalloc metadata differently") CC: stable@vger.kernel.org # 5.2+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delalloc-space.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index 571e7b31ea2f..db9f2c58eb4a 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -381,7 +381,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) out_qgroup: btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve); out_fail: - btrfs_inode_rsv_release(inode, true); if (delalloc_lock) mutex_unlock(&inode->delalloc_mutex); return ret; From ba0b084ac309283db6e329785c1dc4f45fdbd379 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 16 Oct 2019 16:28:52 +0100 Subject: [PATCH 390/572] Btrfs: check for the full sync flag while holding the inode lock during fsync We were checking for the full fsync flag in the inode before locking the inode, which is racy, since at that that time it might not be set but after we acquire the inode lock some other task set it. One case where this can happen is on a system low on memory and some concurrent task failed to allocate an extent map and therefore set the full sync flag on the inode, to force the next fsync to work in full mode. A consequence of missing the full fsync flag set is hitting the problems fixed by commit 0c713cbab620 ("Btrfs: fix race between ranged fsync and writeback of adjacent ranges"), BUG_ON() when dropping extents from a log tree, hitting assertion failures at tree-log.c:copy_items() or all sorts of weird inconsistencies after replaying a log due to file extents items representing ranges that overlap. So just move the check such that it's done after locking the inode and before starting writeback again. Fixes: 0c713cbab620 ("Btrfs: fix race between ranged fsync and writeback of adjacent ranges") CC: stable@vger.kernel.org # 5.2+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e955e7fa9201..435a502a3226 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2067,25 +2067,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct btrfs_trans_handle *trans; struct btrfs_log_ctx ctx; int ret = 0, err; - u64 len; - /* - * If the inode needs a full sync, make sure we use a full range to - * avoid log tree corruption, due to hole detection racing with ordered - * extent completion for adjacent ranges, and assertion failures during - * hole detection. - */ - if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, - &BTRFS_I(inode)->runtime_flags)) { - start = 0; - end = LLONG_MAX; - } - - /* - * The range length can be represented by u64, we have to do the typecasts - * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() - */ - len = (u64)end - (u64)start + 1; trace_btrfs_sync_file(file, datasync); btrfs_init_log_ctx(&ctx, inode); @@ -2111,6 +2093,19 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); + /* + * If the inode needs a full sync, make sure we use a full range to + * avoid log tree corruption, due to hole detection racing with ordered + * extent completion for adjacent ranges, and assertion failures during + * hole detection. Do this while holding the inode lock, to avoid races + * with other tasks. + */ + if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags)) { + start = 0; + end = LLONG_MAX; + } + /* * Before we acquired the inode's lock, someone may have dirtied more * pages in the target range. We need to make sure that writeback for @@ -2138,8 +2133,11 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) /* * We have to do this here to avoid the priority inversion of waiting on * IO of a lower priority task while holding a transaction open. + * + * Also, the range length can be represented by u64, we have to do the + * typecasts to avoid signed overflow if it's [0, LLONG_MAX]. */ - ret = btrfs_wait_ordered_range(inode, start, len); + ret = btrfs_wait_ordered_range(inode, start, (u64)end - (u64)start + 1); if (ret) { up_write(&BTRFS_I(inode)->dio_sem); inode_unlock(inode); From af0de1303c4e8f44fadd7b4c593f09f22324b04f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 17 Oct 2019 15:25:47 +0200 Subject: [PATCH 391/572] usb: hso: obey DMA rules in tiocmget The serial state information must not be embedded into another data structure, as this interferes with cache handling for DMA on architectures without cache coherence.. That would result in data corruption on some architectures Allocating it separately. v2: fix syntax error Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index a505b2ab88b8..74849da031fa 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -186,7 +186,7 @@ struct hso_tiocmget { int intr_completed; struct usb_endpoint_descriptor *endp; struct urb *urb; - struct hso_serial_state_notification serial_state_notification; + struct hso_serial_state_notification *serial_state_notification; u16 prev_UART_state_bitmap; struct uart_icount icount; }; @@ -1432,7 +1432,7 @@ static int tiocmget_submit_urb(struct hso_serial *serial, usb_rcvintpipe(usb, tiocmget->endp-> bEndpointAddress & 0x7F), - &tiocmget->serial_state_notification, + tiocmget->serial_state_notification, sizeof(struct hso_serial_state_notification), tiocmget_intr_callback, serial, tiocmget->endp->bInterval); @@ -1479,7 +1479,7 @@ static void tiocmget_intr_callback(struct urb *urb) /* wIndex should be the USB interface number of the port to which the * notification applies, which should always be the Modem port. */ - serial_state_notification = &tiocmget->serial_state_notification; + serial_state_notification = tiocmget->serial_state_notification; if (serial_state_notification->bmRequestType != BM_REQUEST_TYPE || serial_state_notification->bNotification != B_NOTIFICATION || le16_to_cpu(serial_state_notification->wValue) != W_VALUE || @@ -2565,6 +2565,8 @@ static void hso_free_tiomget(struct hso_serial *serial) usb_free_urb(tiocmget->urb); tiocmget->urb = NULL; serial->tiocmget = NULL; + kfree(tiocmget->serial_state_notification); + tiocmget->serial_state_notification = NULL; kfree(tiocmget); } } @@ -2615,10 +2617,13 @@ static struct hso_device *hso_create_bulk_serial_device( num_urbs = 2; serial->tiocmget = kzalloc(sizeof(struct hso_tiocmget), GFP_KERNEL); + serial->tiocmget->serial_state_notification + = kzalloc(sizeof(struct hso_serial_state_notification), + GFP_KERNEL); /* it isn't going to break our heart if serial->tiocmget * allocation fails don't bother checking this. */ - if (serial->tiocmget) { + if (serial->tiocmget && serial->tiocmget->serial_state_notification) { tiocmget = serial->tiocmget; tiocmget->endp = hso_get_ep(interface, USB_ENDPOINT_XFER_INT, From 8398b375a9e3f5e4bba9bcdfed152a8a247dee01 Mon Sep 17 00:00:00 2001 From: Florin Chiculita Date: Wed, 16 Oct 2019 10:36:22 +0300 Subject: [PATCH 392/572] dpaa2-eth: add irq for the dpmac connect/disconnect event Add IRQ for the DPNI endpoint change event, resolving the issue when a dynamically created DPNI gets a randomly generated hw address when the endpoint is a DPMAC object. Signed-off-by: Florin Chiculita Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 6 +++++- drivers/net/ethernet/freescale/dpaa2/dpni.h | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 162d7d8fb295..5acd734a216b 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -3306,6 +3306,9 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg) if (status & DPNI_IRQ_EVENT_LINK_CHANGED) link_state_update(netdev_priv(net_dev)); + if (status & DPNI_IRQ_EVENT_ENDPOINT_CHANGED) + set_mac_addr(netdev_priv(net_dev)); + return IRQ_HANDLED; } @@ -3331,7 +3334,8 @@ static int setup_irqs(struct fsl_mc_device *ls_dev) } err = dpni_set_irq_mask(ls_dev->mc_io, 0, ls_dev->mc_handle, - DPNI_IRQ_INDEX, DPNI_IRQ_EVENT_LINK_CHANGED); + DPNI_IRQ_INDEX, DPNI_IRQ_EVENT_LINK_CHANGED | + DPNI_IRQ_EVENT_ENDPOINT_CHANGED); if (err < 0) { dev_err(&ls_dev->dev, "dpni_set_irq_mask(): %d\n", err); goto free_irq; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h index fd583911b6c0..ee0711d06b3a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h @@ -133,9 +133,12 @@ int dpni_reset(struct fsl_mc_io *mc_io, */ #define DPNI_IRQ_INDEX 0 /** - * IRQ event - indicates a change in link state + * IRQ events: + * indicates a change in link state + * indicates a change in endpoint */ #define DPNI_IRQ_EVENT_LINK_CHANGED 0x00000001 +#define DPNI_IRQ_EVENT_ENDPOINT_CHANGED 0x00000002 int dpni_set_irq_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, From a690af4f16f9bd1f976a4257e1529b6323a8d0e1 Mon Sep 17 00:00:00 2001 From: Ioana Radulescu Date: Wed, 16 Oct 2019 10:36:23 +0300 Subject: [PATCH 393/572] dpaa2-eth: Fix TX FQID values Depending on when MC connects the DPNI to a MAC, Tx FQIDs may not be available during probe time. Read the FQIDs each time the link goes up to avoid using invalid values. In case an error occurs or an invalid value is retrieved, fall back to QDID-based enqueueing. Fixes: 1fa0f68c9255 ("dpaa2-eth: Use FQ-based DPIO enqueue API") Signed-off-by: Ioana Radulescu Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- .../net/ethernet/freescale/dpaa2/dpaa2-eth.c | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 5acd734a216b..19379bae0144 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1235,6 +1235,8 @@ static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv, bool enable) priv->rx_td_enabled = enable; } +static void update_tx_fqids(struct dpaa2_eth_priv *priv); + static int link_state_update(struct dpaa2_eth_priv *priv) { struct dpni_link_state state = {0}; @@ -1261,6 +1263,7 @@ static int link_state_update(struct dpaa2_eth_priv *priv) goto out; if (state.up) { + update_tx_fqids(priv); netif_carrier_on(priv->net_dev); netif_tx_start_all_queues(priv->net_dev); } else { @@ -2533,6 +2536,47 @@ static int set_pause(struct dpaa2_eth_priv *priv) return 0; } +static void update_tx_fqids(struct dpaa2_eth_priv *priv) +{ + struct dpni_queue_id qid = {0}; + struct dpaa2_eth_fq *fq; + struct dpni_queue queue; + int i, j, err; + + /* We only use Tx FQIDs for FQID-based enqueue, so check + * if DPNI version supports it before updating FQIDs + */ + if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_ENQUEUE_FQID_VER_MAJOR, + DPNI_ENQUEUE_FQID_VER_MINOR) < 0) + return; + + for (i = 0; i < priv->num_fqs; i++) { + fq = &priv->fq[i]; + if (fq->type != DPAA2_TX_CONF_FQ) + continue; + for (j = 0; j < dpaa2_eth_tc_count(priv); j++) { + err = dpni_get_queue(priv->mc_io, 0, priv->mc_token, + DPNI_QUEUE_TX, j, fq->flowid, + &queue, &qid); + if (err) + goto out_err; + + fq->tx_fqid[j] = qid.fqid; + if (fq->tx_fqid[j] == 0) + goto out_err; + } + } + + priv->enqueue = dpaa2_eth_enqueue_fq; + + return; + +out_err: + netdev_info(priv->net_dev, + "Error reading Tx FQID, fallback to QDID-based enqueue\n"); + priv->enqueue = dpaa2_eth_enqueue_qd; +} + /* Configure the DPNI object this interface is associated with */ static int setup_dpni(struct fsl_mc_device *ls_dev) { From c9ad4c1049f7e0e8d59e975963dda002af47d93e Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Wed, 16 Oct 2019 09:22:05 +0100 Subject: [PATCH 394/572] net: stmmac: fix argument to stmmac_pcs_ctrl_ane() The stmmac_pcs_ctrl_ane() expects a register address as argument 1, but for some reason the mac_device_info is being passed. Fix the warning (and possible bug) from sparse: drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:2613:17: warning: incorrect type in argument 1 (different address spaces) drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:2613:17: expected void [noderef] *ioaddr drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:2613:17: got struct mac_device_info *hw Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8fa13d43b5bc..3dfd04e0506a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2610,7 +2610,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) } if (priv->hw->pcs) - stmmac_pcs_ctrl_ane(priv, priv->hw, 1, priv->hw->ps, 0); + stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, priv->hw->ps, 0); /* set TX and RX rings length */ stmmac_set_rings_length(priv); From 7f238ca984b9c6a89e2339c661440c775d617386 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 16 Oct 2019 15:33:23 +0200 Subject: [PATCH 395/572] net: dsa: microchip: Do not reinit mutexes on KSZ87xx The KSZ87xx driver calls mutex_init() on mutexes already inited in ksz_common.c ksz_switch_register(). Do not do it twice, drop the reinitialization. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: David S. Miller Cc: Florian Fainelli Cc: George McCollister Cc: Tristram Ha Cc: Woojung Huh Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index a23d3ffdf0c4..24a5e99f7fd5 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1224,10 +1224,6 @@ static int ksz8795_switch_init(struct ksz_device *dev) { int i; - mutex_init(&dev->stats_mutex); - mutex_init(&dev->alu_mutex); - mutex_init(&dev->vlan_mutex); - dev->ds->ops = &ksz8795_switch_ops; for (i = 0; i < ARRAY_SIZE(ksz8795_switch_chips); i++) { From 013572a236ef53cbd1e315e0acf2ee632cc816d4 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 16 Oct 2019 15:33:24 +0200 Subject: [PATCH 396/572] net: dsa: microchip: Add shared regmap mutex The KSZ driver uses one regmap per register width (8/16/32), each with it's own lock, but accessing the same set of registers. In theory, it is possible to create a race condition between these regmaps, although the underlying bus (SPI or I2C) locking should assure nothing bad will really happen and the accesses would be correct. To make the driver do the right thing, add one single shared mutex for all the regmaps used by the driver instead. This assures that even if some future hardware is on a bus which does not serialize the accesses the same way SPI or I2C does, nothing bad will happen. Note that the status_mutex was unused and only initied, hence it was renamed and repurposed as the regmap mutex. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: David S. Miller Cc: Florian Fainelli Cc: George McCollister Cc: Tristram Ha Cc: Woojung Huh Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795_spi.c | 7 ++++--- drivers/net/dsa/microchip/ksz9477_i2c.c | 6 ++++-- drivers/net/dsa/microchip/ksz9477_spi.c | 6 ++++-- drivers/net/dsa/microchip/ksz_common.c | 2 +- drivers/net/dsa/microchip/ksz_common.h | 16 +++++++++++++++- 5 files changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c index d0f8153e86b7..8b00f8e6c02f 100644 --- a/drivers/net/dsa/microchip/ksz8795_spi.c +++ b/drivers/net/dsa/microchip/ksz8795_spi.c @@ -25,6 +25,7 @@ KSZ_REGMAP_TABLE(ksz8795, 16, SPI_ADDR_SHIFT, static int ksz8795_spi_probe(struct spi_device *spi) { + struct regmap_config rc; struct ksz_device *dev; int i, ret; @@ -33,9 +34,9 @@ static int ksz8795_spi_probe(struct spi_device *spi) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(ksz8795_regmap_config); i++) { - dev->regmap[i] = devm_regmap_init_spi(spi, - &ksz8795_regmap_config - [i]); + rc = ksz8795_regmap_config[i]; + rc.lock_arg = &dev->regmap_mutex; + dev->regmap[i] = devm_regmap_init_spi(spi, &rc); if (IS_ERR(dev->regmap[i])) { ret = PTR_ERR(dev->regmap[i]); dev_err(&spi->dev, diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index 0b1e01f0873d..fdffd9e0c518 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -17,6 +17,7 @@ KSZ_REGMAP_TABLE(ksz9477, not_used, 16, 0, 0); static int ksz9477_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *i2c_id) { + struct regmap_config rc; struct ksz_device *dev; int i, ret; @@ -25,8 +26,9 @@ static int ksz9477_i2c_probe(struct i2c_client *i2c, return -ENOMEM; for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) { - dev->regmap[i] = devm_regmap_init_i2c(i2c, - &ksz9477_regmap_config[i]); + rc = ksz9477_regmap_config[i]; + rc.lock_arg = &dev->regmap_mutex; + dev->regmap[i] = devm_regmap_init_i2c(i2c, &rc); if (IS_ERR(dev->regmap[i])) { ret = PTR_ERR(dev->regmap[i]); dev_err(&i2c->dev, diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index f4198d6f72be..c5f64959a184 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -24,6 +24,7 @@ KSZ_REGMAP_TABLE(ksz9477, 32, SPI_ADDR_SHIFT, static int ksz9477_spi_probe(struct spi_device *spi) { + struct regmap_config rc; struct ksz_device *dev; int i, ret; @@ -32,8 +33,9 @@ static int ksz9477_spi_probe(struct spi_device *spi) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) { - dev->regmap[i] = devm_regmap_init_spi(spi, - &ksz9477_regmap_config[i]); + rc = ksz9477_regmap_config[i]; + rc.lock_arg = &dev->regmap_mutex; + dev->regmap[i] = devm_regmap_init_spi(spi, &rc); if (IS_ERR(dev->regmap[i])) { ret = PTR_ERR(dev->regmap[i]); dev_err(&spi->dev, diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index b0b870f0c252..fe47180c908b 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -436,7 +436,7 @@ int ksz_switch_register(struct ksz_device *dev, } mutex_init(&dev->dev_mutex); - mutex_init(&dev->stats_mutex); + mutex_init(&dev->regmap_mutex); mutex_init(&dev->alu_mutex); mutex_init(&dev->vlan_mutex); diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 64c2677693d2..a20ebb749377 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -47,7 +47,7 @@ struct ksz_device { const char *name; struct mutex dev_mutex; /* device access */ - struct mutex stats_mutex; /* status access */ + struct mutex regmap_mutex; /* regmap access */ struct mutex alu_mutex; /* ALU access */ struct mutex vlan_mutex; /* vlan access */ const struct ksz_dev_ops *dev_ops; @@ -290,6 +290,18 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset, ksz_write32(dev, dev->dev_ops->get_port_addr(port, offset), data); } +static inline void ksz_regmap_lock(void *__mtx) +{ + struct mutex *mtx = __mtx; + mutex_lock(mtx); +} + +static inline void ksz_regmap_unlock(void *__mtx) +{ + struct mutex *mtx = __mtx; + mutex_unlock(mtx); +} + /* Regmap tables generation */ #define KSZ_SPI_OP_RD 3 #define KSZ_SPI_OP_WR 2 @@ -314,6 +326,8 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset, .write_flag_mask = \ KSZ_SPI_OP_FLAG_MASK(KSZ_SPI_OP_WR, swp, \ regbits, regpad), \ + .lock = ksz_regmap_lock, \ + .unlock = ksz_regmap_unlock, \ .reg_format_endian = REGMAP_ENDIAN_BIG, \ .val_format_endian = REGMAP_ENDIAN_BIG \ } From ce584a8e2885c7b59dfacba42db39761243cacb2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Oct 2019 18:07:19 -0400 Subject: [PATCH 397/572] drm/amdgpu/uvd6: fix allocation size in enc ring test (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to allocate a large enough buffer for the session info, otherwise the IB test can overwrite other memory. v2: - session info is 128K according to mesa - use the same session info for create and destroy Bug: https://bugzilla.kernel.org/show_bug.cgi?id=204241 Acked-by: Christian König Reviewed-by: James Zhu Tested-by: James Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 31 ++++++++++++++++++--------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 670784a78512..217084d56ab8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -206,13 +206,14 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) * Open up a stream for HW test */ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -220,15 +221,15 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00010000; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -268,13 +269,14 @@ err: */ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -282,15 +284,15 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00010000; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -327,13 +329,20 @@ err: static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; + struct amdgpu_bo *bo = NULL; long r; - r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL); + r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, NULL); + if (r) + return r; + + r = uvd_v6_0_enc_get_create_msg(ring, 1, bo, NULL); if (r) goto error; - r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence); + r = uvd_v6_0_enc_get_destroy_msg(ring, 1, bo, &fence); if (r) goto error; @@ -345,6 +354,8 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } From 5d230bc91f6c15e5d281f2851502918d98b9e770 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Oct 2019 18:08:59 -0400 Subject: [PATCH 398/572] drm/amdgpu/uvd7: fix allocation size in enc ring test (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to allocate a large enough buffer for the session info, otherwise the IB test can overwrite other memory. v2: - session info is 128K according to mesa - use the same session info for create and destroy Bug: https://bugzilla.kernel.org/show_bug.cgi?id=204241 Acked-by: Christian König Reviewed-by: James Zhu Tested-by: James Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 33 ++++++++++++++++++--------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 01f658fa72c6..0995378d8263 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -214,13 +214,14 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) * Open up a stream for HW test */ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -228,15 +229,15 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -275,13 +276,14 @@ err: * Close up a stream for HW test or if userspace failed to do so */ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo *bo, + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -289,15 +291,15 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; @@ -334,13 +336,20 @@ err: static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; + struct amdgpu_bo *bo = NULL; long r; - r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); + r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, NULL); + if (r) + return r; + + r = uvd_v7_0_enc_get_create_msg(ring, 1, bo, NULL); if (r) goto error; - r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence); + r = uvd_v7_0_enc_get_destroy_msg(ring, 1, bo, &fence); if (r) goto error; @@ -352,6 +361,8 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } From c81fffc2c9450750dd7a54a36a788a860ab0425d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Oct 2019 18:09:41 -0400 Subject: [PATCH 399/572] drm/amdgpu/vcn: fix allocation size in enc ring test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to allocate a large enough buffer for the session info, otherwise the IB test can overwrite other memory. - Session info is 128K according to mesa - Use the same session info for create and destroy Bug: https://bugzilla.kernel.org/show_bug.cgi?id=204241 Acked-by: Christian König Reviewed-by: James Zhu Tested-by: James Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 35 ++++++++++++++++--------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 7a6beb2e7c4e..3199e4a5ff12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -569,13 +569,14 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) } static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo *bo, + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -583,14 +584,14 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x0000000b; ib->ptr[ib->length_dw++] = 0x00000014; @@ -621,13 +622,14 @@ err: } static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo *bo, + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -635,14 +637,14 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x0000000b; ib->ptr[ib->length_dw++] = 0x00000014; @@ -675,13 +677,20 @@ err: int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; + struct amdgpu_bo *bo = NULL; long r; - r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); + r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, NULL); + if (r) + return r; + + r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL); if (r) goto error; - r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); + r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence); if (r) goto error; @@ -693,6 +702,8 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } From 209620b422945ee03cebb03f726e706d537b692d Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 3 Oct 2019 14:18:25 -0400 Subject: [PATCH 400/572] drm/amdgpu: user pages array memory leak fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit user_pages array should always be freed after validation regardless if user pages are changed after bo is created because with HMM change parse bo always allocate user pages array to get user pages for userptr bo. v2: remove unused local variable and amend commit v3: add back get user pages in gem_userptr_ioctl, to detect application bug where an userptr VMA is not ananymous memory and reject it. Bugzilla: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1844962 Signed-off-by: Philip Yang Tested-by: Joe Barnett Reviewed-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.3 --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2e53feed40e2..82823d9a8ba8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -536,7 +536,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, list_for_each_entry(lobj, validated, tv.head) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo); - bool binding_userptr = false; struct mm_struct *usermm; usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); @@ -553,7 +552,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); - binding_userptr = true; } if (p->evictable == lobj) @@ -563,10 +561,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, if (r) return r; - if (binding_userptr) { - kvfree(lobj->user_pages); - lobj->user_pages = NULL; - } + kvfree(lobj->user_pages); + lobj->user_pages = NULL; } return 0; } From 3122051edc7c27cc08534be730f4c7c180919b8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 19 Sep 2019 10:38:57 +0200 Subject: [PATCH 401/572] drm/amdgpu: fix potential VM faults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we allocate new page tables under memory pressure we should not evict old ones. Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 1fead0e8b890..7289e1b4fb60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -453,7 +453,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, .interruptible = (bp->type != ttm_bo_type_kernel), .no_wait_gpu = false, .resv = bp->resv, - .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT + .flags = bp->type != ttm_bo_type_kernel ? + TTM_OPT_FLAG_ALLOW_RES_EVICT : 0 }; struct amdgpu_bo *bo; unsigned long page_align, size = bp->size; From de51a5019ff32960218da8fd899fa3f361b031e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 18 Sep 2019 19:42:14 +0200 Subject: [PATCH 402/572] drm/amdgpu: fix error handling in amdgpu_bo_list_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to drop normal and userptr BOs separately. Signed-off-by: Christian König Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 61e38e43ad1d..85b0515c0fdc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -140,7 +140,12 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, return 0; error_free: - while (i--) { + for (i = 0; i < last_entry; ++i) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo); + + amdgpu_bo_unref(&bo); + } + for (i = first_userptr; i < num_entries; ++i) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo); amdgpu_bo_unref(&bo); From ee027828c40faa92a7ef4c2b0641bbb3f4be95d3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 17 Oct 2019 11:36:47 -0400 Subject: [PATCH 403/572] drm/amdgpu/vce: fix allocation size in enc ring test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to allocate a large enough buffer for the feedback buffer, otherwise the IB test can overwrite other memory. Reviewed-by: James Zhu Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 20 +++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 1 + 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index b70b3c45bb29..65044b1b3d4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -429,13 +429,14 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) * Open up a stream for HW test */ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 1024; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -444,7 +445,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); /* stitch together an VCE create msg */ ib->length_dw = 0; @@ -476,8 +477,8 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[ib->length_dw++] = 0x00000014; /* len */ ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000001; for (i = ib->length_dw; i < ib_size_dw; ++i) @@ -1110,13 +1111,20 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; + struct amdgpu_bo *bo = NULL; long r; /* skip vce ring1/2 ib test for now, since it's not reliable */ if (ring != &ring->adev->vce.ring[0]) return 0; - r = amdgpu_vce_get_create_msg(ring, 1, NULL); + r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, NULL); + if (r) + return r; + + r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL); if (r) goto error; @@ -1132,5 +1140,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 30ea54dd9117..e802f7d9db0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -59,6 +59,7 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, struct dma_fence **fence); int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, bool direct, struct dma_fence **fence); From 491381ce07ca57f68c49c79a8a43da5b60749e32 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Oct 2019 09:20:46 -0600 Subject: [PATCH 404/572] io_uring: fix up O_NONBLOCK handling for sockets We've got two issues with the non-regular file handling for non-blocking IO: 1) We don't want to re-do a short read in full for a non-regular file, as we can't just read the data again. 2) For non-regular files that don't support non-blocking IO attempts, we need to punt to async context even if the file is opened as non-blocking. Otherwise the caller always gets -EAGAIN. Add two new request flags to handle these cases. One is just a cache of the inode S_ISREG() status, the other tells io_uring that we always need to punt this request to async context, even if REQ_F_NOWAIT is set. Cc: stable@vger.kernel.org Reported-by: Hrvoje Zeba Tested-by: Hrvoje Zeba Signed-off-by: Jens Axboe --- fs/io_uring.c | 57 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d2cb277da2f4..b7d4085d6ffd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -322,6 +322,8 @@ struct io_kiocb { #define REQ_F_FAIL_LINK 256 /* fail rest of links */ #define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */ #define REQ_F_TIMEOUT 1024 /* timeout request */ +#define REQ_F_ISREG 2048 /* regular file */ +#define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */ u64 user_data; u32 result; u32 sequence; @@ -914,26 +916,26 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, return ret; } -static void kiocb_end_write(struct kiocb *kiocb) +static void kiocb_end_write(struct io_kiocb *req) { - if (kiocb->ki_flags & IOCB_WRITE) { - struct inode *inode = file_inode(kiocb->ki_filp); + /* + * Tell lockdep we inherited freeze protection from submission + * thread. + */ + if (req->flags & REQ_F_ISREG) { + struct inode *inode = file_inode(req->file); - /* - * Tell lockdep we inherited freeze protection from submission - * thread. - */ - if (S_ISREG(inode->i_mode)) - __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); - file_end_write(kiocb->ki_filp); + __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); } + file_end_write(req->file); } static void io_complete_rw(struct kiocb *kiocb, long res, long res2) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); - kiocb_end_write(kiocb); + if (kiocb->ki_flags & IOCB_WRITE) + kiocb_end_write(req); if ((req->flags & REQ_F_LINK) && res != req->result) req->flags |= REQ_F_FAIL_LINK; @@ -945,7 +947,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); - kiocb_end_write(kiocb); + if (kiocb->ki_flags & IOCB_WRITE) + kiocb_end_write(req); if ((req->flags & REQ_F_LINK) && res != req->result) req->flags |= REQ_F_FAIL_LINK; @@ -1059,8 +1062,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, if (!req->file) return -EBADF; - if (force_nonblock && !io_file_supports_async(req->file)) - force_nonblock = false; + if (S_ISREG(file_inode(req->file)->i_mode)) + req->flags |= REQ_F_ISREG; + + /* + * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so + * we know to async punt it even if it was opened O_NONBLOCK + */ + if (force_nonblock && !io_file_supports_async(req->file)) { + req->flags |= REQ_F_MUST_PUNT; + return -EAGAIN; + } kiocb->ki_pos = READ_ONCE(sqe->off); kiocb->ki_flags = iocb_flags(kiocb->ki_filp); @@ -1081,7 +1093,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, return ret; /* don't allow async punt if RWF_NOWAIT was requested */ - if (kiocb->ki_flags & IOCB_NOWAIT) + if ((kiocb->ki_flags & IOCB_NOWAIT) || + (req->file->f_flags & O_NONBLOCK)) req->flags |= REQ_F_NOWAIT; if (force_nonblock) @@ -1382,7 +1395,9 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, * need async punt anyway, so it's more efficient to do it * here. */ - if (force_nonblock && ret2 > 0 && ret2 < read_size) + if (force_nonblock && !(req->flags & REQ_F_NOWAIT) && + (req->flags & REQ_F_ISREG) && + ret2 > 0 && ret2 < read_size) ret2 = -EAGAIN; /* Catch -EAGAIN return for forced non-blocking submission */ if (!force_nonblock || ret2 != -EAGAIN) { @@ -1447,7 +1462,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, * released so that it doesn't complain about the held lock when * we return to userspace. */ - if (S_ISREG(file_inode(file)->i_mode)) { + if (req->flags & REQ_F_ISREG) { __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); __sb_writers_release(file_inode(file)->i_sb, @@ -2282,7 +2297,13 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, int ret; ret = __io_submit_sqe(ctx, req, s, force_nonblock); - if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { + + /* + * We async punt it if the file wasn't marked NOWAIT, or if the file + * doesn't support non-blocking read/write attempts + */ + if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) || + (req->flags & REQ_F_MUST_PUNT))) { struct io_uring_sqe *sqe_copy; sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL); From 8b07a65ad30e5612d9590fb50468ff4fa314cfc7 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Thu, 17 Oct 2019 12:12:35 +0800 Subject: [PATCH 405/572] io_uring: fix logic error in io_timeout If ctx->cached_sq_head < nxt_sq_head, we should add UINT_MAX to tmp, not tmp_nxt. Fixes: 5da0fb1ab34c ("io_uring: consider the overflow of sequence for timeout req") Signed-off-by: yangerkun Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b7d4085d6ffd..1d03afd74368 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1949,7 +1949,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) * once there is some timeout req still be valid. */ if (ctx->cached_sq_head < nxt_sq_head) - tmp_nxt += UINT_MAX; + tmp += UINT_MAX; if (tmp >= tmp_nxt) break; From 8b95599c55ed24b36cf44a4720067cfe67edbcb4 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 16 Oct 2019 15:35:06 +0200 Subject: [PATCH 406/572] net: phy: micrel: Discern KSZ8051 and KSZ8795 PHYs The KSZ8051 PHY and the KSZ8794/KSZ8795/KSZ8765 switch share exactly the same PHY ID. Since KSZ8051 is higher in the ksphy_driver[] list of PHYs in the micrel PHY driver, it is used even with the KSZ87xx switch. This is wrong, since the KSZ8051 configures registers of the PHY which are not present on the simplified KSZ87xx switch PHYs and misconfigures other registers of the KSZ87xx switch PHYs. Fortunatelly, it is possible to tell apart the KSZ8051 PHY from the KSZ87xx switch by checking the Basic Status register Bit 0, which is read-only and indicates presence of the Extended Capability Registers. The KSZ8051 PHY has those registers while the KSZ87xx switch does not. This patch implements simple check for the presence of this bit for both the KSZ8051 PHY and KSZ87xx switch, to let both use the correct PHY driver instance. Fixes: 9d162ed69f51 ("net: phy: micrel: add support for KSZ8795") Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: David S. Miller Cc: Florian Fainelli Cc: George McCollister Cc: Heiner Kallweit Cc: Sean Nyekjaer Cc: Tristram Ha Cc: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 2fea5541c35a..a0444e28c6e7 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -341,6 +341,35 @@ static int ksz8041_config_aneg(struct phy_device *phydev) return genphy_config_aneg(phydev); } +static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, + const u32 ksz_phy_id) +{ + int ret; + + if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id) + return 0; + + ret = phy_read(phydev, MII_BMSR); + if (ret < 0) + return ret; + + /* KSZ8051 PHY and KSZ8794/KSZ8795/KSZ8765 switch share the same + * exact PHY ID. However, they can be told apart by the extended + * capability registers presence. The KSZ8051 PHY has them while + * the switch does not. + */ + ret &= BMSR_ERCAP; + if (ksz_phy_id == PHY_ID_KSZ8051) + return ret; + else + return !ret; +} + +static int ksz8051_match_phy_device(struct phy_device *phydev) +{ + return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051); +} + static int ksz8081_config_init(struct phy_device *phydev) { /* KSZPHY_OMSO_FACTORY_TEST is set at de-assertion of the reset line @@ -364,6 +393,11 @@ static int ksz8061_config_init(struct phy_device *phydev) return kszphy_config_init(phydev); } +static int ksz8795_match_phy_device(struct phy_device *phydev) +{ + return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8795); +} + static int ksz9021_load_values_from_of(struct phy_device *phydev, const struct device_node *of_node, u16 reg, @@ -1017,8 +1051,6 @@ static struct phy_driver ksphy_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, }, { - .phy_id = PHY_ID_KSZ8051, - .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8051", /* PHY_BASIC_FEATURES */ .driver_data = &ksz8051_type, @@ -1029,6 +1061,7 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, + .match_phy_device = ksz8051_match_phy_device, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -1141,13 +1174,12 @@ static struct phy_driver ksphy_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, }, { - .phy_id = PHY_ID_KSZ8795, - .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8795", /* PHY_BASIC_FEATURES */ .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, .read_status = ksz8873mll_read_status, + .match_phy_device = ksz8795_match_phy_device, .suspend = genphy_suspend, .resume = genphy_resume, }, { From 1d951ba3da67bbc7a9b0e05987e09552c2060e18 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 16 Oct 2019 15:35:07 +0200 Subject: [PATCH 407/572] net: phy: micrel: Update KSZ87xx PHY name The KSZ8795 PHY ID is in fact used by KSZ8794/KSZ8795/KSZ8765 switches. Update the PHY ID and name to reflect that, as this family of switches is commonly refered to as KSZ87xx Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: David S. Miller Cc: Florian Fainelli Cc: George McCollister Cc: Heiner Kallweit Cc: Sean Nyekjaer Cc: Tristram Ha Cc: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 4 ++-- include/linux/micrel_phy.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index a0444e28c6e7..63dedec0433d 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -395,7 +395,7 @@ static int ksz8061_config_init(struct phy_device *phydev) static int ksz8795_match_phy_device(struct phy_device *phydev) { - return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8795); + return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX); } static int ksz9021_load_values_from_of(struct phy_device *phydev, @@ -1174,7 +1174,7 @@ static struct phy_driver ksphy_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, }, { - .name = "Micrel KSZ8795", + .name = "Micrel KSZ87XX Switch", /* PHY_BASIC_FEATURES */ .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index ad24554f11f9..75f880c25bb8 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -31,7 +31,7 @@ #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 -#define PHY_ID_KSZ8795 0x00221550 +#define PHY_ID_KSZ87XX 0x00221550 #define PHY_ID_KSZ9477 0x00221631 From 595e0651d0296bad2491a4a29a7a43eae6328b02 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 16 Oct 2019 20:52:09 +0200 Subject: [PATCH 408/572] ipv4: Return -ENETUNREACH if we can't create route but saddr is valid ...instead of -EINVAL. An issue was found with older kernel versions while unplugging a NFS client with pending RPCs, and the wrong error code here prevented it from recovering once link is back up with a configured address. Incidentally, this is not an issue anymore since commit 4f8943f80883 ("SUNRPC: Replace direct task wakeups from softirq context"), included in 5.2-rc7, had the effect of decoupling the forwarding of this error by using SO_ERROR in xs_wake_error(), as pointed out by Benjamin Coddington. To the best of my knowledge, this isn't currently causing any further issue, but the error code doesn't look appropriate anyway, and we might hit this in other paths as well. In detail, as analysed by Gonzalo Siero, once the route is deleted because the interface is down, and can't be resolved and we return -EINVAL here, this ends up, courtesy of inet_sk_rebuild_header(), as the socket error seen by tcp_write_err(), called by tcp_retransmit_timer(). In turn, tcp_write_err() indirectly calls xs_error_report(), which wakes up the RPC pending tasks with a status of -EINVAL. This is then seen by call_status() in the SUN RPC implementation, which aborts the RPC call calling rpc_exit(), instead of handling this as a potentially temporary condition, i.e. as a timeout. Return -EINVAL only if the input parameters passed to ip_route_output_key_hash_rcu() are actually invalid (this is the case if the specified source address is multicast, limited broadcast or all zeroes), but return -ENETUNREACH in all cases where, at the given moment, the given source address doesn't allow resolving the route. While at it, drop the initialisation of err to -ENETUNREACH, which was added to __ip_route_output_key() back then by commit 0315e3827048 ("net: Fix behaviour of unreachable, blackhole and prohibit routes"), but actually had no effect, as it was, and is, overwritten by the fib_lookup() return code assignment, and anyway ignored in all other branches, including the if (fl4->saddr) one: I find this rather confusing, as it would look like -ENETUNREACH is the "default" error, while that statement has no effect. Also note that after commit fc75fc8339e7 ("ipv4: dont create routes on down devices"), we would get -ENETUNREACH if the device is down, but -EINVAL if the source address is specified and we can't resolve the route, and this appears to be rather inconsistent. Reported-by: Stefan Walter Analysed-by: Benjamin Coddington Analysed-by: Gonzalo Siero Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv4/route.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 14654876127e..5bc172abd143 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2470,14 +2470,17 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, int orig_oif = fl4->flowi4_oif; unsigned int flags = 0; struct rtable *rth; - int err = -ENETUNREACH; + int err; if (fl4->saddr) { - rth = ERR_PTR(-EINVAL); if (ipv4_is_multicast(fl4->saddr) || ipv4_is_lbcast(fl4->saddr) || - ipv4_is_zeronet(fl4->saddr)) + ipv4_is_zeronet(fl4->saddr)) { + rth = ERR_PTR(-EINVAL); goto out; + } + + rth = ERR_PTR(-ENETUNREACH); /* I removed check for oif == dev_out->oif here. It was wrong for two reasons: From 5018c59607a511cdee743b629c76206d9c9e6d7b Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 16 Oct 2019 12:03:15 -0700 Subject: [PATCH 409/572] ipv4: fix race condition between route lookup and invalidation Jesse and Ido reported the following race condition: - Received packet A is forwarded and cached dst entry is taken from the nexthop ('nhc->nhc_rth_input'). Calls skb_dst_set() - Given Jesse has busy routers ("ingesting full BGP routing tables from multiple ISPs"), route is added / deleted and rt_cache_flush() is called - Received packet B tries to use the same cached dst entry from t0, but rt_cache_valid() is no longer true and it is replaced in rt_cache_route() by the newer one. This calls dst_dev_put() on the original dst entry which assigns the blackhole netdev to 'dst->dev' - dst_input(skb) is called on packet A and it is dropped due to 'dst->dev' being the blackhole netdev There are 2 issues in the v4 routing code: 1. A per-netns counter is used to do the validation of the route. That means whenever a route is changed in the netns, users of all routes in the netns needs to redo lookup. v6 has an implementation of only updating fn_sernum for routes that are affected. 2. When rt_cache_valid() returns false, rt_cache_route() is called to throw away the current cache, and create a new one. This seems unnecessary because as long as this route does not change, the route cache does not need to be recreated. To fully solve the above 2 issues, it probably needs quite some code changes and requires careful testing, and does not suite for net branch. So this patch only tries to add the deleted cached rt into the uncached list, so user could still be able to use it to receive packets until it's done. Fixes: 95c47f9cf5e0 ("ipv4: call dst_dev_put() properly") Signed-off-by: Wei Wang Reported-by: Ido Schimmel Reported-by: Jesse Hathaway Tested-by: Jesse Hathaway Acked-by: Martin KaFai Lau Cc: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5bc172abd143..621f83434b24 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1482,7 +1482,7 @@ static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) prev = cmpxchg(p, orig, rt); if (prev == orig) { if (orig) { - dst_dev_put(&orig->dst); + rt_add_uncached_list(orig); dst_release(&orig->dst); } } else { From 0cc76d2b14ffcd00a99d6ec3b9ebad39c659701a Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 17 Oct 2019 05:14:08 +0800 Subject: [PATCH 410/572] net: Update address for MediaTek ethernet driver in MAINTAINERS Update maintainers for MediaTek ethernet driver with Mark Lee. He is familiar with MediaTek mt762x series ethernet devices and will keep following maintenance from the vendor side. Signed-off-by: Sean Wang Signed-off-by: Mark Lee Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 49e4b004917b..60c8b337eb88 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10258,7 +10258,7 @@ MEDIATEK ETHERNET DRIVER M: Felix Fietkau M: John Crispin M: Sean Wang -M: Nelson Chang +M: Mark Lee L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/mediatek/ From 27e84243cb63601a10e366afe3e2d05bb03c1cb5 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Mon, 14 Oct 2019 20:29:04 +0200 Subject: [PATCH 411/572] scsi: target: core: Do not overwrite CDB byte 1 passthrough_parse_cdb() - used by TCMU and PSCSI - attepts to reset the LUN field of SCSI-2 CDBs (bits 5,6,7 of byte 1). The current code is wrong as for newer commands not having the LUN field it overwrites relevant command bits (e.g. for SECURITY PROTOCOL IN / OUT). We think this code was unnecessary from the beginning or at least it is no longer useful. So we remove it entirely. Link: https://lore.kernel.org/r/12498eab-76fd-eaad-1316-c2827badb76a@ts.fujitsu.com Signed-off-by: Bodo Stroesser Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/target/target_core_device.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 04bf2acd3800..2d19f0e332b0 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1074,27 +1074,6 @@ passthrough_parse_cdb(struct se_cmd *cmd, struct se_device *dev = cmd->se_dev; unsigned int size; - /* - * Clear a lun set in the cdb if the initiator talking to use spoke - * and old standards version, as we can't assume the underlying device - * won't choke up on it. - */ - switch (cdb[0]) { - case READ_10: /* SBC - RDProtect */ - case READ_12: /* SBC - RDProtect */ - case READ_16: /* SBC - RDProtect */ - case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */ - case VERIFY: /* SBC - VRProtect */ - case VERIFY_16: /* SBC - VRProtect */ - case WRITE_VERIFY: /* SBC - VRProtect */ - case WRITE_VERIFY_12: /* SBC - VRProtect */ - case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */ - break; - default: - cdb[1] &= 0x1f; /* clear logical unit number */ - break; - } - /* * For REPORT LUNS we always need to emulate the response, for everything * else, pass it up. From 134993456c28c2ae14bd953236eb0742fe23d577 Mon Sep 17 00:00:00 2001 From: Don Brace Date: Mon, 14 Oct 2019 13:03:58 -0500 Subject: [PATCH 412/572] scsi: hpsa: add missing hunks in reset-patch Correct returning from reset before outstanding commands are completed for the device. Link: https://lore.kernel.org/r/157107623870.17997.11208813089704833029.stgit@brunhilda Reviewed-by: Scott Benesh Reviewed-by: Kevin Barnett Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 43a6b5350775..d93a5b20b9d1 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -5474,6 +5474,8 @@ static int hpsa_ciss_submit(struct ctlr_info *h, return SCSI_MLQUEUE_HOST_BUSY; } + c->device = dev; + enqueue_cmd_and_start_io(h, c); /* the cmd'll come back via intr handler in complete_scsi_command() */ return 0; @@ -5545,6 +5547,7 @@ static int hpsa_ioaccel_submit(struct ctlr_info *h, hpsa_cmd_init(h, c->cmdindex, c); c->cmd_type = CMD_SCSI; c->scsi_cmd = cmd; + c->device = dev; rc = hpsa_scsi_ioaccel_raid_map(h, c); if (rc < 0) /* scsi_dma_map failed. */ rc = SCSI_MLQUEUE_HOST_BUSY; @@ -5552,6 +5555,7 @@ static int hpsa_ioaccel_submit(struct ctlr_info *h, hpsa_cmd_init(h, c->cmdindex, c); c->cmd_type = CMD_SCSI; c->scsi_cmd = cmd; + c->device = dev; rc = hpsa_scsi_ioaccel_direct_map(h, c); if (rc < 0) /* scsi_dma_map failed. */ rc = SCSI_MLQUEUE_HOST_BUSY; From 77c301287ebae86cc71d03eb3806f271cb14da79 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Tue, 15 Oct 2019 21:05:56 +0800 Subject: [PATCH 413/572] scsi: core: try to get module before removing device We have a test case like block/001 in blktests, which will create a scsi device by loading scsi_debug module and then try to delete the device by sysfs interface. At the same time, it may remove the scsi_debug module. And getting a invalid paging request BUG_ON as following: [ 34.625854] BUG: unable to handle page fault for address: ffffffffa0016bb8 [ 34.629189] Oops: 0000 [#1] SMP PTI [ 34.629618] CPU: 1 PID: 450 Comm: bash Tainted: G W 5.4.0-rc3+ #473 [ 34.632524] RIP: 0010:scsi_proc_hostdir_rm+0x5/0xa0 [ 34.643555] CR2: ffffffffa0016bb8 CR3: 000000012cd88000 CR4: 00000000000006e0 [ 34.644545] Call Trace: [ 34.644907] scsi_host_dev_release+0x6b/0x1f0 [ 34.645511] device_release+0x74/0x110 [ 34.646046] kobject_put+0x116/0x390 [ 34.646559] put_device+0x17/0x30 [ 34.647041] scsi_target_dev_release+0x2b/0x40 [ 34.647652] device_release+0x74/0x110 [ 34.648186] kobject_put+0x116/0x390 [ 34.648691] put_device+0x17/0x30 [ 34.649157] scsi_device_dev_release_usercontext+0x2e8/0x360 [ 34.649953] execute_in_process_context+0x29/0x80 [ 34.650603] scsi_device_dev_release+0x20/0x30 [ 34.651221] device_release+0x74/0x110 [ 34.651732] kobject_put+0x116/0x390 [ 34.652230] sysfs_unbreak_active_protection+0x3f/0x50 [ 34.652935] sdev_store_delete.cold.4+0x71/0x8f [ 34.653579] dev_attr_store+0x1b/0x40 [ 34.654103] sysfs_kf_write+0x3d/0x60 [ 34.654603] kernfs_fop_write+0x174/0x250 [ 34.655165] __vfs_write+0x1f/0x60 [ 34.655639] vfs_write+0xc7/0x280 [ 34.656117] ksys_write+0x6d/0x140 [ 34.656591] __x64_sys_write+0x1e/0x30 [ 34.657114] do_syscall_64+0xb1/0x400 [ 34.657627] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 34.658335] RIP: 0033:0x7f156f337130 During deleting scsi target, the scsi_debug module have been removed. Then, sdebug_driver_template belonged to the module cannot be accessd, resulting in scsi_proc_hostdir_rm() BUG_ON. To fix the bug, we add scsi_device_get() in sdev_store_delete() to try to increase refcount of module, avoiding the module been removed. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191015130556.18061-1-yuyufen@huawei.com Signed-off-by: Yufen Yu Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 64c96c7828ee..6d7362e7367e 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -730,6 +730,14 @@ sdev_store_delete(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kernfs_node *kn; + struct scsi_device *sdev = to_scsi_device(dev); + + /* + * We need to try to get module, avoiding the module been removed + * during delete. + */ + if (scsi_device_get(sdev)) + return -ENODEV; kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); WARN_ON_ONCE(!kn); @@ -744,9 +752,10 @@ sdev_store_delete(struct device *dev, struct device_attribute *attr, * state into SDEV_DEL. */ device_remove_file(dev, attr); - scsi_remove_device(to_scsi_device(dev)); + scsi_remove_device(sdev); if (kn) sysfs_unbreak_active_protection(kn); + scsi_device_put(sdev); return count; }; static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete); From 1052b41b25cbadcb85ff04c3b46663e21168dd3e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 17 Oct 2019 17:00:19 +0200 Subject: [PATCH 414/572] scsi: lpfc: remove left-over BUILD_NVME defines The BUILD_NVME define never got defined anywhere, causing NVMe commands to be treated as SCSI commands when freeing the buffers. This was causing a stuck discovery and a horrible crash in lpfc_set_rrq_active() later on. Link: https://lore.kernel.org/r/20191017150019.75769-1-hare@suse.de Fixes: c00f62e6c546 ("scsi: lpfc: Merge per-protocol WQ/CQ pairs into single per-cpu pair") Signed-off-by: Hannes Reinecke Reviewed-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 2 -- drivers/scsi/lpfc/lpfc_scsi.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index de64880c6c60..2c1e085bd3ce 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -9053,7 +9053,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) } } -#if defined(BUILD_NVME) /* Clear NVME stats */ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { @@ -9061,7 +9060,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) sizeof(phba->sli4_hba.hdwq[idx].nvme_cstat)); } } -#endif /* Clear SCSI stats */ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) { diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index fe1097666de4..6822cd9ff8f1 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -528,7 +528,6 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba, list_del_init(&psb->list); psb->exch_busy = 0; psb->status = IOSTAT_SUCCESS; -#ifdef BUILD_NVME if (psb->cur_iocbq.iocb_flag == LPFC_IO_NVME) { qp->abts_nvme_io_bufs--; spin_unlock(&qp->abts_io_buf_list_lock); @@ -536,7 +535,6 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba, lpfc_sli4_nvme_xri_aborted(phba, axri, psb); return; } -#endif qp->abts_scsi_io_bufs--; spin_unlock(&qp->abts_io_buf_list_lock); From 56a0b978d42f58c7e3ba715cf65af487d427524d Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 15 Oct 2019 22:07:31 +0800 Subject: [PATCH 415/572] ACPI: CPPC: Set pcc_data[pcc_ss_id] to NULL in acpi_cppc_processor_exit() When enabling KASAN and DEBUG_TEST_DRIVER_REMOVE, I find this KASAN warning: [ 20.872057] BUG: KASAN: use-after-free in pcc_data_alloc+0x40/0xb8 [ 20.878226] Read of size 4 at addr ffff00236cdeb684 by task swapper/0/1 [ 20.884826] [ 20.886309] CPU: 19 PID: 1 Comm: swapper/0 Not tainted 5.4.0-rc1-00009-ge7f7df3db5bf-dirty #289 [ 20.894994] Hardware name: Huawei D06 /D06, BIOS Hisilicon D06 UEFI RC0 - V1.16.01 03/15/2019 [ 20.903505] Call trace: [ 20.905942] dump_backtrace+0x0/0x200 [ 20.909593] show_stack+0x14/0x20 [ 20.912899] dump_stack+0xd4/0x130 [ 20.916291] print_address_description.isra.9+0x6c/0x3b8 [ 20.921592] __kasan_report+0x12c/0x23c [ 20.925417] kasan_report+0xc/0x18 [ 20.928808] __asan_load4+0x94/0xb8 [ 20.932286] pcc_data_alloc+0x40/0xb8 [ 20.935938] acpi_cppc_processor_probe+0x4e8/0xb08 [ 20.940717] __acpi_processor_start+0x48/0xb0 [ 20.945062] acpi_processor_start+0x40/0x60 [ 20.949235] really_probe+0x118/0x548 [ 20.952887] driver_probe_device+0x7c/0x148 [ 20.957059] device_driver_attach+0x94/0xa0 [ 20.961231] __driver_attach+0xa4/0x110 [ 20.965055] bus_for_each_dev+0xe8/0x158 [ 20.968966] driver_attach+0x30/0x40 [ 20.972531] bus_add_driver+0x234/0x2f0 [ 20.976356] driver_register+0xbc/0x1d0 [ 20.980182] acpi_processor_driver_init+0x40/0xe4 [ 20.984875] do_one_initcall+0xb4/0x254 [ 20.988700] kernel_init_freeable+0x24c/0x2f8 [ 20.993047] kernel_init+0x10/0x118 [ 20.996524] ret_from_fork+0x10/0x18 [ 21.000087] [ 21.001567] Allocated by task 1: [ 21.004785] save_stack+0x28/0xc8 [ 21.008089] __kasan_kmalloc.isra.9+0xbc/0xd8 [ 21.012435] kasan_kmalloc+0xc/0x18 [ 21.015913] pcc_data_alloc+0x94/0xb8 [ 21.019564] acpi_cppc_processor_probe+0x4e8/0xb08 [ 21.024343] __acpi_processor_start+0x48/0xb0 [ 21.028689] acpi_processor_start+0x40/0x60 [ 21.032860] really_probe+0x118/0x548 [ 21.036512] driver_probe_device+0x7c/0x148 [ 21.040684] device_driver_attach+0x94/0xa0 [ 21.044855] __driver_attach+0xa4/0x110 [ 21.048680] bus_for_each_dev+0xe8/0x158 [ 21.052591] driver_attach+0x30/0x40 [ 21.056155] bus_add_driver+0x234/0x2f0 [ 21.059980] driver_register+0xbc/0x1d0 [ 21.063805] acpi_processor_driver_init+0x40/0xe4 [ 21.068497] do_one_initcall+0xb4/0x254 [ 21.072322] kernel_init_freeable+0x24c/0x2f8 [ 21.076667] kernel_init+0x10/0x118 [ 21.080144] ret_from_fork+0x10/0x18 [ 21.083707] [ 21.085186] Freed by task 1: [ 21.088056] save_stack+0x28/0xc8 [ 21.091360] __kasan_slab_free+0x118/0x180 [ 21.095445] kasan_slab_free+0x10/0x18 [ 21.099183] kfree+0x80/0x268 [ 21.102139] acpi_cppc_processor_exit+0x1a8/0x1b8 [ 21.106832] acpi_processor_stop+0x70/0x80 [ 21.110917] really_probe+0x174/0x548 [ 21.114568] driver_probe_device+0x7c/0x148 [ 21.118740] device_driver_attach+0x94/0xa0 [ 21.122912] __driver_attach+0xa4/0x110 [ 21.126736] bus_for_each_dev+0xe8/0x158 [ 21.130648] driver_attach+0x30/0x40 [ 21.134212] bus_add_driver+0x234/0x2f0 [ 21.0x10/0x18 [ 21.161764] [ 21.163244] The buggy address belongs to the object at ffff00236cdeb600 [ 21.163244] which belongs to the cache kmalloc-256 of size 256 [ 21.175750] The buggy address is located 132 bytes inside of [ 21.175750] 256-byte region [ffff00236cdeb600, ffff00236cdeb700) [ 21.187473] The buggy address belongs to the page: [ 21.192254] page:fffffe008d937a00 refcount:1 mapcount:0 mapping:ffff002370c0fa00 index:0x0 compound_mapcount: 0 [ 21.202331] flags: 0x1ffff00000010200(slab|head) [ 21.206940] raw: 1ffff00000010200 dead000000000100 dead000000000122 ffff002370c0fa00 [ 21.214671] raw: 0000000000000000 00000000802a002a 00000001ffffffff 0000000000000000 [ 21.222400] page dumped because: kasan: bad access detected [ 21.227959] [ 21.229438] Memory state around the buggy address: [ 21.234218] ffff00236cdeb580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 21.241427] ffff00236cdeb600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 21.248637] >ffff00236cdeb680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 21.255845] ^ [ 21.259062] ffff00236cdeb700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 21.266272] ffff00236cdeb780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 21.273480] ================================================================== It seems that global pcc_data[pcc_ss_id] can be freed in acpi_cppc_processor_exit(), but we may later reference this value, so NULLify it when freed. Also remove the useless setting of data "pcc_channel_acquired", which we're about to free. Fixes: 85b1407bf6d2 ("ACPI / CPPC: Make CPPC ACPI driver aware of PCC subspace IDs") Signed-off-by: John Garry Cc: 4.15+ # 4.15+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 3b2525908dd8..a1a858ad4d18 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -905,8 +905,8 @@ void acpi_cppc_processor_exit(struct acpi_processor *pr) pcc_data[pcc_ss_id]->refcount--; if (!pcc_data[pcc_ss_id]->refcount) { pcc_mbox_free_channel(pcc_data[pcc_ss_id]->pcc_channel); - pcc_data[pcc_ss_id]->pcc_channel_acquired = 0; kfree(pcc_data[pcc_ss_id]); + pcc_data[pcc_ss_id] = NULL; } } } From f7c0f50f1857c1cf013466fcea4dc98d116bf456 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Tue, 15 Oct 2019 12:35:02 +0200 Subject: [PATCH 416/572] x86/hyperv: Set pv_info.name to "Hyper-V" Michael reported that the x86/hyperv initialization code prints the following dmesg when running in a VM on Hyper-V: [ 0.000738] Booting paravirtualized kernel on bare hardware Let the x86/hyperv initialization code set pv_info.name to "Hyper-V" so dmesg reports correctly: [ 0.000172] Booting paravirtualized kernel on Hyper-V [ tglx: Folded build fix provided by Yue ] Reported-by: Michael Kelley Signed-off-by: Andrea Parri Signed-off-by: Thomas Gleixner Reviewed-by: Wei Liu Reviewed-by: Michael Kelley Cc: YueHaibing Link: https://lkml.kernel.org/r/20191015103502.13156-1-parri.andrea@gmail.com --- arch/x86/kernel/cpu/mshyperv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 267daad8c036..c656d92cd708 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -216,6 +216,10 @@ static void __init ms_hyperv_init_platform(void) int hv_host_info_ecx; int hv_host_info_edx; +#ifdef CONFIG_PARAVIRT + pv_info.name = "Hyper-V"; +#endif + /* * Extract the features and hints */ From 228d120051a2234356690924c1f42e07e54e1eaf Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Sun, 29 Sep 2019 09:13:52 +0800 Subject: [PATCH 417/572] x86/boot/acpi: Move get_cmdline_acpi_rsdp() under #ifdef guard When building with "EXTRA_CFLAGS=-Wall" gcc warns: arch/x86/boot/compressed/acpi.c:29:30: warning: get_cmdline_acpi_rsdp defined but not used [-Wunused-function] get_cmdline_acpi_rsdp() is only used when CONFIG_RANDOMIZE_BASE and CONFIG_MEMORY_HOTREMOVE are both enabled, so any build where one of these config options is disabled has this issue. Move the function under the same ifdef guard as the call site. [ tglx: Add context to the changelog so it becomes useful ] Fixes: 41fa1ee9c6d6 ("acpi: Ignore acpi_rsdp kernel param when the kernel has been locked down") Signed-off-by: Zhenzhong Duan Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/1569719633-32164-1-git-send-email-zhenzhong.duan@oracle.com --- arch/x86/boot/compressed/acpi.c | 48 ++++++++++++++++----------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 149795c369f2..25019d42ae93 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -20,30 +20,6 @@ */ struct mem_vector immovable_mem[MAX_NUMNODES*2]; -/* - * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex - * digits, and '\0' for termination. - */ -#define MAX_ADDR_LEN 19 - -static acpi_physical_address get_cmdline_acpi_rsdp(void) -{ - acpi_physical_address addr = 0; - -#ifdef CONFIG_KEXEC - char val[MAX_ADDR_LEN] = { }; - int ret; - - ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN); - if (ret < 0) - return 0; - - if (kstrtoull(val, 16, &addr)) - return 0; -#endif - return addr; -} - /* * Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and * ACPI_TABLE_GUID are found, take the former, which has more features. @@ -298,6 +274,30 @@ acpi_physical_address get_rsdp_addr(void) } #if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE) +/* + * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex + * digits, and '\0' for termination. + */ +#define MAX_ADDR_LEN 19 + +static acpi_physical_address get_cmdline_acpi_rsdp(void) +{ + acpi_physical_address addr = 0; + +#ifdef CONFIG_KEXEC + char val[MAX_ADDR_LEN] = { }; + int ret; + + ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN); + if (ret < 0) + return 0; + + if (kstrtoull(val, 16, &addr)) + return 0; +#endif + return addr; +} + /* Compute SRAT address from RSDP. */ static unsigned long get_acpi_srat_table(void) { From a2b11184389489dcca4fa899b290fc2751ea55ac Mon Sep 17 00:00:00 2001 From: Matthias Maennich Date: Fri, 18 Oct 2019 10:31:40 +0100 Subject: [PATCH 418/572] modpost: delegate updating namespaces to separate function Let the function 'sym_update_namespace' take care of updating the namespace for a symbol. While this currently only replaces one single location where namespaces are updated, in a following patch, this function will get more call sites. The function signature is intentionally close to sym_update_crc and taking the name by char* seems like unnecessary work as the symbol has to be looked up again. In a later patch of this series, this concern will be addressed. This function ensures that symbol::namespace is either NULL or has a valid non-empty value. Previously, the empty string was considered 'no namespace' as well and this lead to confusion. Acked-by: Will Deacon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Masahiro Yamada Signed-off-by: Matthias Maennich Signed-off-by: Jessica Yu --- scripts/mod/modpost.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 936d3ad23c83..4041a265d2b6 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -362,6 +362,25 @@ static char *sym_extract_namespace(const char **symname) return namespace; } +static void sym_update_namespace(const char *symname, const char *namespace) +{ + struct symbol *s = find_symbol(symname); + + /* + * That symbol should have been created earlier and thus this is + * actually an assertion. + */ + if (!s) { + merror("Could not update namespace(%s) for symbol %s\n", + namespace, symname); + return; + } + + free(s->namespace); + s->namespace = + namespace && namespace[0] ? NOFAIL(strdup(namespace)) : NULL; +} + /** * Add an exported symbol - it may have already been added without a * CRC, in this case just update the CRC @@ -383,8 +402,7 @@ static struct symbol *sym_add_exported(const char *name, const char *namespace, s->module = mod; } } - free(s->namespace); - s->namespace = namespace ? strdup(namespace) : NULL; + sym_update_namespace(name, namespace); s->preloaded = 0; s->vmlinux = is_vmlinux(mod->name); s->kernel = 0; @@ -2196,7 +2214,7 @@ static int check_exports(struct module *mod) else basename = mod->name; - if (exp->namespace && exp->namespace[0]) { + if (exp->namespace) { add_namespace(&mod->required_namespaces, exp->namespace); From 9ae5bd1847566e079ffb7607394389ac54815f2b Mon Sep 17 00:00:00 2001 From: Matthias Maennich Date: Fri, 18 Oct 2019 10:31:41 +0100 Subject: [PATCH 419/572] modpost: make updating the symbol namespace explicit Setting the symbol namespace of a symbol within sym_add_exported feels displaced and lead to issues in the current implementation of symbol namespaces. This patch makes updating the namespace an explicit call to decouple it from adding a symbol to the export list. Acked-by: Will Deacon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Masahiro Yamada Signed-off-by: Matthias Maennich Signed-off-by: Jessica Yu --- scripts/mod/modpost.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 4041a265d2b6..c43f1b1532f7 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -385,8 +385,8 @@ static void sym_update_namespace(const char *symname, const char *namespace) * Add an exported symbol - it may have already been added without a * CRC, in this case just update the CRC **/ -static struct symbol *sym_add_exported(const char *name, const char *namespace, - struct module *mod, enum export export) +static struct symbol *sym_add_exported(const char *name, struct module *mod, + enum export export) { struct symbol *s = find_symbol(name); @@ -402,7 +402,6 @@ static struct symbol *sym_add_exported(const char *name, const char *namespace, s->module = mod; } } - sym_update_namespace(name, namespace); s->preloaded = 0; s->vmlinux = is_vmlinux(mod->name); s->kernel = 0; @@ -764,7 +763,8 @@ static void handle_modversions(struct module *mod, struct elf_info *info, if (strstarts(symname, "__ksymtab_")) { name = symname + strlen("__ksymtab_"); namespace = sym_extract_namespace(&name); - sym_add_exported(name, namespace, mod, export); + sym_add_exported(name, mod, export); + sym_update_namespace(name, namespace); free(namespace); } if (strcmp(symname, "init_module") == 0) @@ -2472,12 +2472,12 @@ static void read_dump(const char *fname, unsigned int kernel) mod = new_module(modname); mod->skip = 1; } - s = sym_add_exported(symname, namespace, mod, - export_no(export)); + s = sym_add_exported(symname, mod, export_no(export)); s->kernel = kernel; s->preloaded = 1; s->is_static = 0; sym_update_crc(symname, mod, crc, export_no(export)); + sym_update_namespace(symname, namespace); } release_file(file, size); return; From 69923208431e097ce3830647aee98e5bd3e889c8 Mon Sep 17 00:00:00 2001 From: Matthias Maennich Date: Fri, 18 Oct 2019 10:31:42 +0100 Subject: [PATCH 420/572] symbol namespaces: revert to previous __ksymtab name scheme The introduction of Symbol Namespaces changed the naming schema of the __ksymtab entries from __kysmtab__symbol to __ksymtab_NAMESPACE.symbol. That caused some breakages in tools that depend on the name layout in either the binaries(vmlinux,*.ko) or in System.map. E.g. kmod's depmod would not be able to read System.map without a patch to support symbol namespaces. A warning reported by depmod for namespaced symbols would look like depmod: WARNING: [...]/uas.ko needs unknown symbol usb_stor_adjust_quirks In order to address this issue, revert to the original naming scheme and rather read the __kstrtabns_ entries and their corresponding values from __ksymtab_strings to update the namespace values for symbols. After having read all symbols and handled them in handle_modversions(), the symbols are created. In a second pass, read the __kstrtabns_ entries and update the namespaces accordingly. Fixes: 8651ec01daed ("module: add support for symbol namespaces.") Reported-by: Stefan Wahren Suggested-by: Masahiro Yamada Acked-by: Will Deacon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Masahiro Yamada Signed-off-by: Matthias Maennich Signed-off-by: Jessica Yu --- include/linux/export.h | 14 +++++--------- scripts/mod/modpost.c | 33 ++++++++++++++++++--------------- scripts/mod/modpost.h | 1 + 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/include/linux/export.h b/include/linux/export.h index 621158ecd2e2..941d075f03d6 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -18,8 +18,6 @@ extern struct module __this_module; #define THIS_MODULE ((struct module *)0) #endif -#define NS_SEPARATOR "." - #ifdef CONFIG_MODVERSIONS /* Mark the CRC weak since genksyms apparently decides not to * generate a checksums for some symbols */ @@ -48,11 +46,11 @@ extern struct module __this_module; * absolute relocations that require runtime processing on relocatable * kernels. */ -#define __KSYMTAB_ENTRY_NS(sym, sec, ns) \ +#define __KSYMTAB_ENTRY_NS(sym, sec) \ __ADDRESSABLE(sym) \ asm(" .section \"___ksymtab" sec "+" #sym "\", \"a\" \n" \ " .balign 4 \n" \ - "__ksymtab_" #ns NS_SEPARATOR #sym ": \n" \ + "__ksymtab_" #sym ": \n" \ " .long " #sym "- . \n" \ " .long __kstrtab_" #sym "- . \n" \ " .long __kstrtabns_" #sym "- . \n" \ @@ -74,16 +72,14 @@ struct kernel_symbol { int namespace_offset; }; #else -#define __KSYMTAB_ENTRY_NS(sym, sec, ns) \ - static const struct kernel_symbol __ksymtab_##sym##__##ns \ - asm("__ksymtab_" #ns NS_SEPARATOR #sym) \ +#define __KSYMTAB_ENTRY_NS(sym, sec) \ + static const struct kernel_symbol __ksymtab_##sym \ __attribute__((section("___ksymtab" sec "+" #sym), used)) \ __aligned(sizeof(void *)) \ = { (unsigned long)&sym, __kstrtab_##sym, __kstrtabns_##sym } #define __KSYMTAB_ENTRY(sym, sec) \ static const struct kernel_symbol __ksymtab_##sym \ - asm("__ksymtab_" #sym) \ __attribute__((section("___ksymtab" sec "+" #sym), used)) \ __aligned(sizeof(void *)) \ = { (unsigned long)&sym, __kstrtab_##sym, NULL } @@ -115,7 +111,7 @@ struct kernel_symbol { static const char __kstrtabns_##sym[] \ __attribute__((section("__ksymtab_strings"), used, aligned(1))) \ = #ns; \ - __KSYMTAB_ENTRY_NS(sym, sec, ns) + __KSYMTAB_ENTRY_NS(sym, sec) #define ___EXPORT_SYMBOL(sym, sec) \ ___export_symbol_common(sym, sec); \ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index c43f1b1532f7..d2a30a7b3f07 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -348,18 +348,11 @@ static enum export export_from_sec(struct elf_info *elf, unsigned int sec) return export_unknown; } -static char *sym_extract_namespace(const char **symname) +static const char *namespace_from_kstrtabns(struct elf_info *info, + Elf_Sym *kstrtabns) { - char *namespace = NULL; - char *ns_separator; - - ns_separator = strchr(*symname, '.'); - if (ns_separator) { - namespace = NOFAIL(strndup(*symname, ns_separator - *symname)); - *symname = ns_separator + 1; - } - - return namespace; + char *value = info->ksymtab_strings + kstrtabns->st_value; + return value[0] ? value : NULL; } static void sym_update_namespace(const char *symname, const char *namespace) @@ -600,6 +593,10 @@ static int parse_elf(struct elf_info *info, const char *filename) info->export_unused_gpl_sec = i; else if (strcmp(secname, "__ksymtab_gpl_future") == 0) info->export_gpl_future_sec = i; + else if (strcmp(secname, "__ksymtab_strings") == 0) + info->ksymtab_strings = (void *)hdr + + sechdrs[i].sh_offset - + sechdrs[i].sh_addr; if (sechdrs[i].sh_type == SHT_SYMTAB) { unsigned int sh_link_idx; @@ -689,7 +686,6 @@ static void handle_modversions(struct module *mod, struct elf_info *info, enum export export; bool is_crc = false; const char *name; - char *namespace; if ((!is_vmlinux(mod->name) || mod->is_dot_o) && strstarts(symname, "__ksymtab")) @@ -762,10 +758,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info, /* All exported symbols */ if (strstarts(symname, "__ksymtab_")) { name = symname + strlen("__ksymtab_"); - namespace = sym_extract_namespace(&name); sym_add_exported(name, mod, export); - sym_update_namespace(name, namespace); - free(namespace); } if (strcmp(symname, "init_module") == 0) mod->has_init = 1; @@ -2061,6 +2054,16 @@ static void read_symbols(const char *modname) handle_moddevtable(mod, &info, sym, symname); } + /* Apply symbol namespaces from __kstrtabns_ entries. */ + for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { + symname = remove_dot(info.strtab + sym->st_name); + + if (strstarts(symname, "__kstrtabns_")) + sym_update_namespace(symname + strlen("__kstrtabns_"), + namespace_from_kstrtabns(&info, + sym)); + } + // check for static EXPORT_SYMBOL_* functions && global vars for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { unsigned char bind = ELF_ST_BIND(sym->st_info); diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index 92a926d375d2..ad271bc6c313 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -143,6 +143,7 @@ struct elf_info { Elf_Section export_gpl_sec; Elf_Section export_unused_gpl_sec; Elf_Section export_gpl_future_sec; + char *ksymtab_strings; char *strtab; char *modinfo; unsigned int modinfo_len; From a4f40484e7f1dff56bb9f286cc59ffa36e0259eb Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Fri, 18 Oct 2019 10:53:14 +0800 Subject: [PATCH 421/572] nvme-pci: Set the prp2 correctly when using more than 4k page In the current code, the nvme is using a fixed 4k PRP entry size, but if the kernel use a page size which is more than 4k, we should consider the situation that the bv_offset may be larger than the dev->ctrl.page_size. Otherwise we may miss setting the prp2 and then cause the command can't be executed correctly. Fixes: dff824b2aadb ("nvme-pci: optimize mapping of small single segment requests") Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Kevin Hao Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4b181969c432..869f462e6b6e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -773,7 +773,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, struct bio_vec *bv) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset; + unsigned int offset = bv->bv_offset & (dev->ctrl.page_size - 1); + unsigned int first_prp_len = dev->ctrl.page_size - offset; iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); if (dma_mapping_error(dev->dev, iod->first_dma)) From 46ac18c347b00be29b265c28209b0f3c38a1f142 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 18 Oct 2019 11:34:22 +0200 Subject: [PATCH 422/572] iommu/amd: Check PM_LEVEL_SIZE() condition in locked section The increase_address_space() function has to check the PM_LEVEL_SIZE() condition again under the domain->lock to avoid a false trigger of the WARN_ON_ONCE() and to avoid that the address space is increase more often than necessary. Reported-by: Qian Cai Fixes: 754265bcab78 ("iommu/amd: Fix race in increase_address_space()") Reviewed-by: Jerry Snitselaar Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index bcd89ea50a8b..dd555078258c 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1464,6 +1464,7 @@ static void free_pagetable(struct protection_domain *domain) * to 64 bits. */ static bool increase_address_space(struct protection_domain *domain, + unsigned long address, gfp_t gfp) { unsigned long flags; @@ -1472,8 +1473,8 @@ static bool increase_address_space(struct protection_domain *domain, spin_lock_irqsave(&domain->lock, flags); - if (WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) - /* address space already 64 bit large */ + if (address <= PM_LEVEL_SIZE(domain->mode) || + WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); @@ -1506,7 +1507,7 @@ static u64 *alloc_pte(struct protection_domain *domain, BUG_ON(!is_power_of_2(page_size)); while (address > PM_LEVEL_SIZE(domain->mode)) - *updated = increase_address_space(domain, gfp) || *updated; + *updated = increase_address_space(domain, address, gfp) || *updated; level = domain->mode - 1; pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; From 9c24eaf81cc44d4bb38081c99eafd72ed85cf7f3 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 8 Oct 2019 10:33:57 -0400 Subject: [PATCH 423/572] iommu/vt-d: Return the correct dma mask when we are bypassing the IOMMU We must return a mask covering the full physical RAM when bypassing the IOMMU mapping. Also, in iommu_need_mapping, we need to check using dma_direct_get_required_mask to ensure that the device's dma_mask can cover physical RAM before deciding to bypass IOMMU mapping. Based on an earlier patch from Christoph Hellwig. Fixes: 249baa547901 ("dma-mapping: provide a better default ->get_required_mask") Signed-off-by: Arvind Sankar Reviewed-by: Lu Baolu Acked-by: Joerg Roedel Signed-off-by: Christoph Hellwig --- drivers/iommu/intel-iommu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 3f974919d3bd..79e35b3180ac 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3471,7 +3471,7 @@ static bool iommu_need_mapping(struct device *dev) if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask) dma_mask = dev->coherent_dma_mask; - if (dma_mask >= dma_get_required_mask(dev)) + if (dma_mask >= dma_direct_get_required_mask(dev)) return false; /* @@ -3775,6 +3775,13 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele return nelems; } +static u64 intel_get_required_mask(struct device *dev) +{ + if (!iommu_need_mapping(dev)) + return dma_direct_get_required_mask(dev); + return DMA_BIT_MASK(32); +} + static const struct dma_map_ops intel_dma_ops = { .alloc = intel_alloc_coherent, .free = intel_free_coherent, @@ -3787,6 +3794,7 @@ static const struct dma_map_ops intel_dma_ops = { .dma_supported = dma_direct_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .get_required_mask = intel_get_required_mask, }; static void From 19feeee5c5af114f3cb8ad2772680b7a1d16f043 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Mon, 26 Aug 2019 19:14:51 -0500 Subject: [PATCH 424/572] ARM: OMAP2+: Plug in device_enable/idle ops for IOMMUs The OMAP IOMMU driver requires the device_enable/idle platform data ops on all the IOMMU devices to be able to enable and disable the clocks after commit db8918f61d51 ("iommu/omap: streamline enable/disable through runtime pm callbacks"). Plug in these pdata ops for all the existing IOMMUs through pdata quirks to maintain functionality. Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/pdata-quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index d942a3357090..ac5da6ad2017 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -89,6 +89,8 @@ static struct iommu_platform_data omap3_iommu_pdata = { .reset_name = "mmu", .assert_reset = omap_device_assert_hardreset, .deassert_reset = omap_device_deassert_hardreset, + .device_enable = omap_device_enable, + .device_idle = omap_device_idle, }; static int omap3_sbc_t3730_twl_callback(struct device *dev, @@ -424,6 +426,8 @@ static struct iommu_platform_data omap4_iommu_pdata = { .reset_name = "mmu_cache", .assert_reset = omap_device_assert_hardreset, .deassert_reset = omap_device_deassert_hardreset, + .device_enable = omap_device_enable, + .device_idle = omap_device_idle, }; #endif From 0af3e1a491dde06dfcb0b063d5b543dfebded0f9 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Mon, 26 Aug 2019 19:14:52 -0500 Subject: [PATCH 425/572] ARM: OMAP2+: Add pdata for OMAP3 ISP IOMMU The OMAP3 ISP IOMMU does not have any reset lines, so it didn't need any pdata previously. The OMAP IOMMU driver now requires the platform data ops for device_enable/idle on all the IOMMU devices after commit db8918f61d51 ("iommu/omap: streamline enable/disable through runtime pm callbacks") to enable/disable the clocks properly and maintain the reference count and the omap_hwmod state machine. So, add these callbacks through iommu pdata quirks for the OMAP3 ISP IOMMU. Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/pdata-quirks.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index ac5da6ad2017..2efd18e8824c 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -93,6 +93,11 @@ static struct iommu_platform_data omap3_iommu_pdata = { .device_idle = omap_device_idle, }; +static struct iommu_platform_data omap3_iommu_isp_pdata = { + .device_enable = omap_device_enable, + .device_idle = omap_device_idle, +}; + static int omap3_sbc_t3730_twl_callback(struct device *dev, unsigned gpio, unsigned ngpio) @@ -621,6 +626,8 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = { #ifdef CONFIG_ARCH_OMAP3 OF_DEV_AUXDATA("ti,omap2-iommu", 0x5d000000, "5d000000.mmu", &omap3_iommu_pdata), + OF_DEV_AUXDATA("ti,omap2-iommu", 0x480bd400, "480bd400.mmu", + &omap3_iommu_isp_pdata), OF_DEV_AUXDATA("ti,omap3-smartreflex-core", 0x480cb000, "480cb000.smartreflex", &omap_sr_pdata[OMAP_SR_CORE]), OF_DEV_AUXDATA("ti,omap3-smartreflex-mpu-iva", 0x480c9000, From c7d8669f46ba97f6a8e14d6e9b8d6c39e2c07727 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 17 Oct 2019 11:21:44 -0700 Subject: [PATCH 426/572] bus: ti-sysc: Fix watchdog quirk handling I noticed that when probed with ti-sysc, watchdog can trigger on am3, am4 and dra7 causing a device reset. Turns out I made several mistakes implementing the watchdog quirk handling: 1. We must do both writes to spr register 2. We must also call the reset quirk on disable 3. On am3 and am4 we need to also set swsup quirk flag I probably only tested this earlier with watchdog service running when the watchdog never gets disabled. Fixes: 4e23be473e30 ("bus: ti-sysc: Add support for module specific reset quirks") Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index ad50efb470aa..2b6670daf7fc 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -74,6 +74,7 @@ static const char * const clock_names[SYSC_MAX_CLOCKS] = { * @clk_disable_quirk: module specific clock disable quirk * @reset_done_quirk: module specific reset done quirk * @module_enable_quirk: module specific enable quirk + * @module_disable_quirk: module specific disable quirk */ struct sysc { struct device *dev; @@ -100,6 +101,7 @@ struct sysc { void (*clk_disable_quirk)(struct sysc *sysc); void (*reset_done_quirk)(struct sysc *sysc); void (*module_enable_quirk)(struct sysc *sysc); + void (*module_disable_quirk)(struct sysc *sysc); }; static void sysc_parse_dts_quirks(struct sysc *ddata, struct device_node *np, @@ -959,6 +961,9 @@ static int sysc_disable_module(struct device *dev) if (ddata->offsets[SYSC_SYSCONFIG] == -ENODEV) return 0; + if (ddata->module_disable_quirk) + ddata->module_disable_quirk(ddata); + regbits = ddata->cap->regbits; reg = sysc_read(ddata, ddata->offsets[SYSC_SYSCONFIG]); @@ -1248,6 +1253,9 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { SYSC_MODULE_QUIRK_SGX), SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0, SYSC_MODULE_QUIRK_WDT), + /* Watchdog on am3 and am4 */ + SYSC_QUIRK("wdt", 0x44e35000, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0, + SYSC_MODULE_QUIRK_WDT | SYSC_QUIRK_SWSUP_SIDLE), #ifdef DEBUG SYSC_QUIRK("adc", 0, 0, 0x10, -1, 0x47300001, 0xffffffff, 0), @@ -1440,14 +1448,14 @@ static void sysc_reset_done_quirk_wdt(struct sysc *ddata) !(val & 0x10), 100, MAX_MODULE_SOFTRESET_WAIT); if (error) - dev_warn(ddata->dev, "wdt disable spr failed\n"); + dev_warn(ddata->dev, "wdt disable step1 failed\n"); - sysc_write(ddata, wps, 0x5555); + sysc_write(ddata, spr, 0x5555); error = readl_poll_timeout(ddata->module_va + wps, val, !(val & 0x10), 100, MAX_MODULE_SOFTRESET_WAIT); if (error) - dev_warn(ddata->dev, "wdt disable wps failed\n"); + dev_warn(ddata->dev, "wdt disable step2 failed\n"); } static void sysc_init_module_quirks(struct sysc *ddata) @@ -1471,8 +1479,10 @@ static void sysc_init_module_quirks(struct sysc *ddata) if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_SGX) ddata->module_enable_quirk = sysc_module_enable_quirk_sgx; - if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_WDT) + if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_WDT) { ddata->reset_done_quirk = sysc_reset_done_quirk_wdt; + ddata->module_disable_quirk = sysc_reset_done_quirk_wdt; + } } static int sysc_clockdomain_init(struct sysc *ddata) From 7de48402faa32298c3551ea32c76ccb4f9d3025d Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 16 Oct 2019 16:06:29 -0700 Subject: [PATCH 427/572] net: bcmgenet: don't set phydev->link from MAC When commit 28b2e0d2cd13 ("net: phy: remove parameter new_link from phy_mac_interrupt()") removed the new_link parameter it set the phydev->link state from the MAC before invoking phy_mac_interrupt(). However, once commit 88d6272acaaa ("net: phy: avoid unneeded MDIO reads in genphy_read_status") was added this initialization prevents the proper determination of the connection parameters by the function genphy_read_status(). This commit removes that initialization to restore the proper functionality. Fixes: 88d6272acaaa ("net: phy: avoid unneeded MDIO reads in genphy_read_status") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 12cb77ef1081..10d68017ff6c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2612,10 +2612,8 @@ static void bcmgenet_irq_task(struct work_struct *work) spin_unlock_irq(&priv->lock); /* Link UP/DOWN event */ - if (status & UMAC_IRQ_LINK_EVENT) { - priv->dev->phydev->link = !!(status & UMAC_IRQ_LINK_UP); + if (status & UMAC_IRQ_LINK_EVENT) phy_mac_interrupt(priv->dev->phydev); - } } /* bcmgenet_isr1: handle Rx and Tx priority queues */ From fe586b823372a9f43f90e2c6aa0573992ce7ccb7 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 16 Oct 2019 16:06:30 -0700 Subject: [PATCH 428/572] net: phy: bcm7xxx: define soft_reset for 40nm EPHY The internal 40nm EPHYs use a "Workaround for putting the PHY in IDDQ mode." These PHYs require a soft reset to restore functionality after they are powered back up. This commit defines the soft_reset function to use genphy_soft_reset during phy_init_hw to accommodate this. Fixes: 6e2d85ec0559 ("net: phy: Stop with excessive soft reset") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 8fc33867e524..af8eabe7a6d4 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -572,6 +572,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .name = _name, \ /* PHY_BASIC_FEATURES */ \ .flags = PHY_IS_INTERNAL, \ + .soft_reset = genphy_soft_reset, \ .config_init = bcm7xxx_config_init, \ .suspend = bcm7xxx_suspend, \ .resume = bcm7xxx_config_init, \ From 1f515486275a08a17a2c806b844cca18f7de5b34 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 16 Oct 2019 16:06:31 -0700 Subject: [PATCH 429/572] net: bcmgenet: soft reset 40nm EPHYs before MAC init It turns out that the "Workaround for putting the PHY in IDDQ mode" used by the internal EPHYs on 40nm Set-Top Box chips when powering down puts the interface to the GENET MAC in a state that can cause subsequent MAC resets to be incomplete. Rather than restore the forced soft reset when powering up internal PHYs, this commit moves the invocation of phy_init_hw earlier in the MAC initialization sequence to just before the MAC reset in the open and resume functions. This allows the interface to be stable and allows the MAC resets to be successful. The bcmgenet_mii_probe() function is split in two to accommodate this. The new function bcmgenet_mii_connect() handles the first half of the functionality before the MAC initialization, and the bcmgenet_mii_config() function is extended to provide the remaining PHY configuration following the MAC initialization. Fixes: 484bfa1507bf ("Revert "net: bcmgenet: Software reset EPHY after power on"") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 28 +++-- .../net/ethernet/broadcom/genet/bcmgenet.h | 2 +- drivers/net/ethernet/broadcom/genet/bcmmii.c | 114 ++++++++---------- 3 files changed, 70 insertions(+), 74 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 10d68017ff6c..f0937c650e3c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2872,6 +2872,12 @@ static int bcmgenet_open(struct net_device *dev) if (priv->internal_phy) bcmgenet_power_up(priv, GENET_POWER_PASSIVE); + ret = bcmgenet_mii_connect(dev); + if (ret) { + netdev_err(dev, "failed to connect to PHY\n"); + goto err_clk_disable; + } + /* take MAC out of reset */ bcmgenet_umac_reset(priv); @@ -2881,6 +2887,12 @@ static int bcmgenet_open(struct net_device *dev) reg = bcmgenet_umac_readl(priv, UMAC_CMD); priv->crc_fwd_en = !!(reg & CMD_CRC_FWD); + ret = bcmgenet_mii_config(dev, true); + if (ret) { + netdev_err(dev, "unsupported PHY\n"); + goto err_disconnect_phy; + } + bcmgenet_set_hw_addr(priv, dev->dev_addr); if (priv->internal_phy) { @@ -2896,7 +2908,7 @@ static int bcmgenet_open(struct net_device *dev) ret = bcmgenet_init_dma(priv); if (ret) { netdev_err(dev, "failed to initialize DMA\n"); - goto err_clk_disable; + goto err_disconnect_phy; } /* Always enable ring 16 - descriptor ring */ @@ -2919,25 +2931,19 @@ static int bcmgenet_open(struct net_device *dev) goto err_irq0; } - ret = bcmgenet_mii_probe(dev); - if (ret) { - netdev_err(dev, "failed to connect to PHY\n"); - goto err_irq1; - } - bcmgenet_netif_start(dev); netif_tx_start_all_queues(dev); return 0; -err_irq1: - free_irq(priv->irq1, priv); err_irq0: free_irq(priv->irq0, priv); err_fini_dma: bcmgenet_dma_teardown(priv); bcmgenet_fini_dma(priv); +err_disconnect_phy: + phy_disconnect(dev->phydev); err_clk_disable: if (priv->internal_phy) bcmgenet_power_down(priv, GENET_POWER_PASSIVE); @@ -3618,6 +3624,8 @@ static int bcmgenet_resume(struct device *d) if (priv->internal_phy) bcmgenet_power_up(priv, GENET_POWER_PASSIVE); + phy_init_hw(dev->phydev); + bcmgenet_umac_reset(priv); init_umac(priv); @@ -3626,8 +3634,6 @@ static int bcmgenet_resume(struct device *d) if (priv->wolopts) clk_disable_unprepare(priv->clk_wol); - phy_init_hw(dev->phydev); - /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev, false); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index dbc69d8fa05f..7fbf573d8d52 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -720,8 +720,8 @@ GENET_IO_MACRO(rbuf, GENET_RBUF_OFF); /* MDIO routines */ int bcmgenet_mii_init(struct net_device *dev); +int bcmgenet_mii_connect(struct net_device *dev); int bcmgenet_mii_config(struct net_device *dev, bool init); -int bcmgenet_mii_probe(struct net_device *dev); void bcmgenet_mii_exit(struct net_device *dev); void bcmgenet_phy_power_set(struct net_device *dev, bool enable); void bcmgenet_mii_setup(struct net_device *dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index e7c291bf4ed1..17bb8d60a157 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -173,6 +173,46 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) bcmgenet_fixed_phy_link_update); } +int bcmgenet_mii_connect(struct net_device *dev) +{ + struct bcmgenet_priv *priv = netdev_priv(dev); + struct device_node *dn = priv->pdev->dev.of_node; + struct phy_device *phydev; + u32 phy_flags = 0; + int ret; + + /* Communicate the integrated PHY revision */ + if (priv->internal_phy) + phy_flags = priv->gphy_rev; + + /* Initialize link state variables that bcmgenet_mii_setup() uses */ + priv->old_link = -1; + priv->old_speed = -1; + priv->old_duplex = -1; + priv->old_pause = -1; + + if (dn) { + phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, + phy_flags, priv->phy_interface); + if (!phydev) { + pr_err("could not attach to PHY\n"); + return -ENODEV; + } + } else { + phydev = dev->phydev; + phydev->dev_flags = phy_flags; + + ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, + priv->phy_interface); + if (ret) { + pr_err("could not attach to PHY\n"); + return -ENODEV; + } + } + + return 0; +} + int bcmgenet_mii_config(struct net_device *dev, bool init) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -266,71 +306,21 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); } - if (init) + if (init) { + linkmode_copy(phydev->advertising, phydev->supported); + + /* The internal PHY has its link interrupts routed to the + * Ethernet MAC ISRs. On GENETv5 there is a hardware issue + * that prevents the signaling of link UP interrupts when + * the link operates at 10Mbps, so fallback to polling for + * those versions of GENET. + */ + if (priv->internal_phy && !GENET_IS_V5(priv)) + phydev->irq = PHY_IGNORE_INTERRUPT; + dev_info(kdev, "configuring instance for %s\n", phy_name); - - return 0; -} - -int bcmgenet_mii_probe(struct net_device *dev) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - struct device_node *dn = priv->pdev->dev.of_node; - struct phy_device *phydev; - u32 phy_flags = 0; - int ret; - - /* Communicate the integrated PHY revision */ - if (priv->internal_phy) - phy_flags = priv->gphy_rev; - - /* Initialize link state variables that bcmgenet_mii_setup() uses */ - priv->old_link = -1; - priv->old_speed = -1; - priv->old_duplex = -1; - priv->old_pause = -1; - - if (dn) { - phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, - phy_flags, priv->phy_interface); - if (!phydev) { - pr_err("could not attach to PHY\n"); - return -ENODEV; - } - } else { - phydev = dev->phydev; - phydev->dev_flags = phy_flags; - - ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, - priv->phy_interface); - if (ret) { - pr_err("could not attach to PHY\n"); - return -ENODEV; - } } - /* Configure port multiplexer based on what the probed PHY device since - * reading the 'max-speed' property determines the maximum supported - * PHY speed which is needed for bcmgenet_mii_config() to configure - * things appropriately. - */ - ret = bcmgenet_mii_config(dev, true); - if (ret) { - phy_disconnect(dev->phydev); - return ret; - } - - linkmode_copy(phydev->advertising, phydev->supported); - - /* The internal PHY has its link interrupts routed to the - * Ethernet MAC ISRs. On GENETv5 there is a hardware issue - * that prevents the signaling of link UP interrupts when - * the link operates at 10Mbps, so fallback to polling for - * those versions of GENET. - */ - if (priv->internal_phy && !GENET_IS_V5(priv)) - dev->phydev->irq = PHY_IGNORE_INTERRUPT; - return 0; } From 25382b991d252aed961cd434176240f9de6bb15f Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 16 Oct 2019 16:06:32 -0700 Subject: [PATCH 430/572] net: bcmgenet: reset 40nm EPHY on energy detect The EPHY integrated into the 40nm Set-Top Box devices can falsely detect energy when connected to a disabled peer interface. When the peer interface is enabled the EPHY will detect and report the link as active, but on occasion may get into a state where it is not able to exchange data with the connected GENET MAC. This issue has not been observed when the link parameters are auto-negotiated; however, it has been observed with a manually configured link. It has been empirically determined that issuing a soft reset to the EPHY when energy is detected prevents it from getting into this bad state. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index f0937c650e3c..0f138280315a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2018,6 +2018,8 @@ static void bcmgenet_link_intr_enable(struct bcmgenet_priv *priv) */ if (priv->internal_phy) { int0_enable |= UMAC_IRQ_LINK_EVENT; + if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) + int0_enable |= UMAC_IRQ_PHY_DET_R; } else if (priv->ext_phy) { int0_enable |= UMAC_IRQ_LINK_EVENT; } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { @@ -2611,9 +2613,14 @@ static void bcmgenet_irq_task(struct work_struct *work) priv->irq0_stat = 0; spin_unlock_irq(&priv->lock); + if (status & UMAC_IRQ_PHY_DET_R && + priv->dev->phydev->autoneg != AUTONEG_ENABLE) + phy_init_hw(priv->dev->phydev); + /* Link UP/DOWN event */ if (status & UMAC_IRQ_LINK_EVENT) phy_mac_interrupt(priv->dev->phydev); + } /* bcmgenet_isr1: handle Rx and Tx priority queues */ @@ -2708,7 +2715,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } /* all other interested interrupts handled in bottom half */ - status &= UMAC_IRQ_LINK_EVENT; + status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_PHY_DET_R); if (status) { /* Save irq status for bottom-half processing. */ spin_lock_irqsave(&priv->lock, flags); From 9669fffc1415bb0c30e5d2ec98a8e1c3a418cb9c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Oct 2019 18:00:56 -0700 Subject: [PATCH 431/572] net: ensure correct skb->tstamp in various fragmenters Thomas found that some forwarded packets would be stuck in FQ packet scheduler because their skb->tstamp contained timestamps far in the future. We thought we addressed this point in commit 8203e2d844d3 ("net: clear skb->tstamp in forwarding paths") but there is still an issue when/if a packet needs to be fragmented. In order to meet EDT requirements, we have to make sure all fragments get the original skb->tstamp. Note that this original skb->tstamp should be zero in forwarding path, but might have a non zero value in output path if user decided so. Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC") Signed-off-by: Eric Dumazet Reported-by: Thomas Bartschies Signed-off-by: David S. Miller --- net/bridge/netfilter/nf_conntrack_bridge.c | 3 +++ net/ipv4/ip_output.c | 3 +++ net/ipv6/ip6_output.c | 3 +++ net/ipv6/netfilter.c | 3 +++ 4 files changed, 12 insertions(+) diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 8842798c29e6..506d6141e44e 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -33,6 +33,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; unsigned int hlen, ll_rs, mtu; + ktime_t tstamp = skb->tstamp; struct ip_frag_state state; struct iphdr *iph; int err; @@ -80,6 +81,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, if (iter.frag) ip_fraglist_prepare(skb, &iter); + skb->tstamp = tstamp; err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -104,6 +106,7 @@ slow_path: goto blackhole; } + skb2->tstamp = tstamp; err = output(net, sk, data, skb2); if (err) goto blackhole; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 28fca408812c..814b9b8882a0 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -771,6 +771,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct rtable *rt = skb_rtable(skb); unsigned int mtu, hlen, ll_rs; struct ip_fraglist_iter iter; + ktime_t tstamp = skb->tstamp; struct ip_frag_state state; int err = 0; @@ -846,6 +847,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ip_fraglist_prepare(skb, &iter); } + skb->tstamp = tstamp; err = output(net, sk, skb); if (!err) @@ -900,6 +902,7 @@ slow_path: /* * Put this fragment into the sending queue. */ + skb2->tstamp = tstamp; err = output(net, sk, skb2); if (err) goto fail; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index edadee4a7e76..71827b56c006 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -768,6 +768,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, inet6_sk(skb->sk) : NULL; struct ip6_frag_state state; unsigned int mtu, hlen, nexthdr_offset; + ktime_t tstamp = skb->tstamp; int hroom, err = 0; __be32 frag_id; u8 *prevhdr, nexthdr = 0; @@ -855,6 +856,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); + skb->tstamp = tstamp; err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), @@ -913,6 +915,7 @@ slow_path: /* * Put this fragment into the sending queue. */ + frag->tstamp = tstamp; err = output(net, sk, frag); if (err) goto fail; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index a9bff556d3b2..409e79b84a83 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -119,6 +119,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; + ktime_t tstamp = skb->tstamp; struct ip6_frag_state state; u8 *prevhdr, nexthdr = 0; unsigned int mtu, hlen; @@ -183,6 +184,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); + skb->tstamp = tstamp; err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -215,6 +217,7 @@ slow_path: goto blackhole; } + skb2->tstamp = tstamp; err = output(net, sk, data, skb2); if (err) goto blackhole; From 22e58665a01006d05f0239621f7d41cacca96cc4 Mon Sep 17 00:00:00 2001 From: Junya Monden Date: Wed, 16 Oct 2019 14:42:55 +0200 Subject: [PATCH 432/572] ASoC: rsnd: Reinitialize bit clock inversion flag for every format setting Unlike other format-related DAI parameters, rdai->bit_clk_inv flag is not properly re-initialized when setting format for new stream processing. The inversion, if requested, is then applied not to default, but to a previous value, which leads to SCKP bit in SSICR register being set incorrectly. Fix this by re-setting the flag to its initial value, determined by format. Fixes: 1a7889ca8aba3 ("ASoC: rsnd: fixup SND_SOC_DAIFMT_xB_xF behavior") Cc: Andrew Gabbasov Cc: Jiada Wang Cc: Timo Wischer Cc: stable@vger.kernel.org # v3.17+ Signed-off-by: Junya Monden Signed-off-by: Eugeniu Rosca Acked-by: Kuninori Morimoto Link: https://lore.kernel.org/r/20191016124255.7442-1-erosca@de.adit-jv.com Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index bda5b958d0dc..e9596c2096cd 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -761,6 +761,7 @@ static int rsnd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) } /* set format */ + rdai->bit_clk_inv = 0; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: rdai->sys_delay = 0; From e5f0d490fb718254a884453e47fcd48493cd67ea Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Thu, 17 Oct 2019 10:50:44 +0800 Subject: [PATCH 433/572] ASoC: Intel: sof-rt5682: add a check for devm_clk_get sof_audio_probe misses a check for devm_clk_get and may cause problems. Add a check for it to fix the bug. Signed-off-by: Chuhong Yuan Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191017025044.31474-1-hslester96@gmail.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_rt5682.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c index 5ce643d62faf..4f6e58c3954a 100644 --- a/sound/soc/intel/boards/sof_rt5682.c +++ b/sound/soc/intel/boards/sof_rt5682.c @@ -603,6 +603,15 @@ static int sof_audio_probe(struct platform_device *pdev) /* need to get main clock from pmc */ if (sof_rt5682_quirk & SOF_RT5682_MCLK_BYTCHT_EN) { ctx->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3"); + if (IS_ERR(ctx->mclk)) { + ret = PTR_ERR(ctx->mclk); + + dev_err(&pdev->dev, + "Failed to get MCLK from pmc_plt_clk_3: %d\n", + ret); + return ret; + } + ret = clk_prepare_enable(ctx->mclk); if (ret < 0) { dev_err(&pdev->dev, From 9b7a7f921689d6c254e5acd670be631ebd82d54d Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Fri, 18 Oct 2019 10:20:40 +0200 Subject: [PATCH 434/572] ASoC: stm32: sai: fix sysclk management on shutdown The commit below, adds a call to sysclk callback on shutdown. This introduces a regression in stm32 SAI driver, as some clock services are called twice, leading to unbalanced calls. Move processing related to mclk from shutdown to sysclk callback. When requested frequency is 0, assume shutdown and release mclk. Fixes: 2458adb8f92a ("SoC: simple-card-utils: set 0Hz to sysclk when shutdown") Signed-off-by: Olivier Moysan Link: https://lore.kernel.org/r/20191018082040.31022-1-olivier.moysan@st.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index d7501f88aaa6..a4060813bc74 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -505,10 +505,20 @@ static int stm32_sai_set_sysclk(struct snd_soc_dai *cpu_dai, if (dir == SND_SOC_CLOCK_OUT && sai->sai_mclk) { ret = regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, SAI_XCR1_NODIV, - (unsigned int)~SAI_XCR1_NODIV); + freq ? 0 : SAI_XCR1_NODIV); if (ret < 0) return ret; + /* Assume shutdown if requested frequency is 0Hz */ + if (!freq) { + /* Release mclk rate only if rate was actually set */ + if (sai->mclk_rate) { + clk_rate_exclusive_put(sai->sai_mclk); + sai->mclk_rate = 0; + } + return 0; + } + /* If master clock is used, set parent clock now */ ret = stm32_sai_set_parent_clock(sai, freq); if (ret) @@ -1093,15 +1103,6 @@ static void stm32_sai_shutdown(struct snd_pcm_substream *substream, regmap_update_bits(sai->regmap, STM_SAI_IMR_REGX, SAI_XIMR_MASK, 0); - regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, SAI_XCR1_NODIV, - SAI_XCR1_NODIV); - - /* Release mclk rate only if rate was actually set */ - if (sai->mclk_rate) { - clk_rate_exclusive_put(sai->sai_mclk); - sai->mclk_rate = 0; - } - clk_disable_unprepare(sai->sai_ck); spin_lock_irqsave(&sai->irq_lock, flags); From 95a32c98055f664f9b3f34c41e153d4dcedd0eff Mon Sep 17 00:00:00 2001 From: Dragos Tarcatu Date: Fri, 18 Oct 2019 07:38:06 -0500 Subject: [PATCH 435/572] ASoC: SOF: control: return true when kcontrol values change All the kcontrol put() functions are currently returning 0 when successful. This does not go well with alsamixer as it does not seem to get notified on SND_CTL_EVENT_MASK_VALUE callbacks when values change for (some of) the sof kcontrols. This patch fixes that by returning true for volume, switch and enum type kcontrols when values do change in put(). Signed-off-by: Dragos Tarcatu Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191018123806.18063-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/control.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/sound/soc/sof/control.c b/sound/soc/sof/control.c index a4983f90ff5b..2b8711eda362 100644 --- a/sound/soc/sof/control.c +++ b/sound/soc/sof/control.c @@ -60,13 +60,16 @@ int snd_sof_volume_put(struct snd_kcontrol *kcontrol, struct snd_sof_dev *sdev = scontrol->sdev; struct sof_ipc_ctrl_data *cdata = scontrol->control_data; unsigned int i, channels = scontrol->num_channels; + bool change = false; + u32 value; /* update each channel */ for (i = 0; i < channels; i++) { - cdata->chanv[i].value = - mixer_to_ipc(ucontrol->value.integer.value[i], + value = mixer_to_ipc(ucontrol->value.integer.value[i], scontrol->volume_table, sm->max + 1); + change = change || (value != cdata->chanv[i].value); cdata->chanv[i].channel = i; + cdata->chanv[i].value = value; } /* notify DSP of mixer updates */ @@ -76,8 +79,7 @@ int snd_sof_volume_put(struct snd_kcontrol *kcontrol, SOF_CTRL_TYPE_VALUE_CHAN_GET, SOF_CTRL_CMD_VOLUME, true); - - return 0; + return change; } int snd_sof_switch_get(struct snd_kcontrol *kcontrol, @@ -105,11 +107,15 @@ int snd_sof_switch_put(struct snd_kcontrol *kcontrol, struct snd_sof_dev *sdev = scontrol->sdev; struct sof_ipc_ctrl_data *cdata = scontrol->control_data; unsigned int i, channels = scontrol->num_channels; + bool change = false; + u32 value; /* update each channel */ for (i = 0; i < channels; i++) { - cdata->chanv[i].value = ucontrol->value.integer.value[i]; + value = ucontrol->value.integer.value[i]; + change = change || (value != cdata->chanv[i].value); cdata->chanv[i].channel = i; + cdata->chanv[i].value = value; } /* notify DSP of mixer updates */ @@ -120,7 +126,7 @@ int snd_sof_switch_put(struct snd_kcontrol *kcontrol, SOF_CTRL_CMD_SWITCH, true); - return 0; + return change; } int snd_sof_enum_get(struct snd_kcontrol *kcontrol, @@ -148,11 +154,15 @@ int snd_sof_enum_put(struct snd_kcontrol *kcontrol, struct snd_sof_dev *sdev = scontrol->sdev; struct sof_ipc_ctrl_data *cdata = scontrol->control_data; unsigned int i, channels = scontrol->num_channels; + bool change = false; + u32 value; /* update each channel */ for (i = 0; i < channels; i++) { - cdata->chanv[i].value = ucontrol->value.enumerated.item[i]; + value = ucontrol->value.enumerated.item[i]; + change = change || (value != cdata->chanv[i].value); cdata->chanv[i].channel = i; + cdata->chanv[i].value = value; } /* notify DSP of enum updates */ @@ -163,7 +173,7 @@ int snd_sof_enum_put(struct snd_kcontrol *kcontrol, SOF_CTRL_CMD_ENUM, true); - return 0; + return change; } int snd_sof_bytes_get(struct snd_kcontrol *kcontrol, From 2e978795bb4c14293bf6ecf32621d32529706aef Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 17 Oct 2019 10:11:03 +0300 Subject: [PATCH 436/572] mlxsw: spectrum_trap: Push Ethernet header before reporting trap devlink maintains packets and bytes statistics for each trap. Since eth_type_trans() was called to set the skb's protocol, the data pointer no longer points to the start of the packet and the bytes accounting is off by 14 bytes. Fix this by pushing the skb's data pointer to the start of the packet. Fixes: b5ce611fd96e ("mlxsw: spectrum: Add devlink-trap support") Reported-by: Alex Kushnarov Tested-by: Alex Kushnarov Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 899450b28621..7c03b661ae7e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -99,6 +99,7 @@ static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, devlink = priv_to_devlink(mlxsw_sp->core); in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, local_port); + skb_push(skb, ETH_HLEN); devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port); consume_skb(skb); } From ec3359b685db834c249953db6ac5717bf5cde425 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 17 Oct 2019 14:44:02 +0200 Subject: [PATCH 437/572] vsock/virtio: send a credit update when buffer size is changed When the user application set a new buffer size value, we should update the remote peer about this change, since it uses this information to calculate the credit available. Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a666ef8fc54e..db127a69f5c3 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -458,6 +458,9 @@ void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) vvs->buf_size_max = val; vvs->buf_size = val; vvs->buf_alloc = val; + + virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, + NULL); } EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); From ae6fcfbf5f03de3407b809aaee319330d3dc7f8b Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 17 Oct 2019 14:44:03 +0200 Subject: [PATCH 438/572] vsock/virtio: discard packets if credit is not respected If the remote peer doesn't respect the credit information (buf_alloc, fwd_cnt), sending more data than it can send, we should drop the packets to prevent a malicious peer from using all of our memory. This is patch follows the VIRTIO spec: "VIRTIO_VSOCK_OP_RW data packets MUST only be transmitted when the peer has sufficient free buffer space for the payload" Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index db127a69f5c3..481f7f8a1655 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -204,10 +204,14 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, return virtio_transport_get_ops()->send_pkt(pkt); } -static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, +static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) { + if (vvs->rx_bytes + pkt->len > vvs->buf_alloc) + return false; + vvs->rx_bytes += pkt->len; + return true; } static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, @@ -879,14 +883,18 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, struct virtio_vsock_pkt *pkt) { struct virtio_vsock_sock *vvs = vsk->trans; - bool free_pkt = false; + bool can_enqueue, free_pkt = false; pkt->len = le32_to_cpu(pkt->hdr.len); pkt->off = 0; spin_lock_bh(&vvs->rx_lock); - virtio_transport_inc_rx_pkt(vvs, pkt); + can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt); + if (!can_enqueue) { + free_pkt = true; + goto out; + } /* Try to copy small packets into the buffer of last packet queued, * to avoid wasting memory queueing the entire buffer with a small From 38b4fe320119859c11b1dc06f6b4987a16344fa1 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 17 Oct 2019 21:29:26 +0200 Subject: [PATCH 439/572] net: usb: lan78xx: Connect PHY before registering MAC As soon as the netdev is registers, the kernel can start using the interface. If the driver connects the MAC to the PHY after the netdev is registered, there is a race condition where the interface can be opened without having the PHY connected. Change the order to close this race condition. Fixes: 92571a1aae40 ("lan78xx: Connect phy early") Reported-by: Daniel Wagner Signed-off-by: Andrew Lunn Tested-by: Daniel Wagner Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 58f5a219fb65..62948098191f 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3782,10 +3782,14 @@ static int lan78xx_probe(struct usb_interface *intf, /* driver requires remote-wakeup capability during autosuspend. */ intf->needs_remote_wakeup = 1; + ret = lan78xx_phy_init(dev); + if (ret < 0) + goto out4; + ret = register_netdev(netdev); if (ret != 0) { netif_err(dev, probe, netdev, "couldn't register the device\n"); - goto out4; + goto out5; } usb_set_intfdata(intf, dev); @@ -3798,14 +3802,10 @@ static int lan78xx_probe(struct usb_interface *intf, pm_runtime_set_autosuspend_delay(&udev->dev, DEFAULT_AUTOSUSPEND_DELAY); - ret = lan78xx_phy_init(dev); - if (ret < 0) - goto out5; - return 0; out5: - unregister_netdev(netdev); + phy_disconnect(netdev->phydev); out4: usb_free_urb(dev->urb_intr); out3: From 9fa8c9c647be624e91b09ecffa7cd97ee0600b40 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Fri, 18 Oct 2019 09:20:34 +0800 Subject: [PATCH 440/572] tracing: Fix "gfp_t" format for synthetic events In the format of synthetic events, the "gfp_t" is shown as "signed:1", but in fact the "gfp_t" is "unsigned", should be shown as "signed:0". The issue can be reproduced by the following commands: echo 'memlatency u64 lat; unsigned int order; gfp_t gfp_flags; int migratetype' > /sys/kernel/debug/tracing/synthetic_events cat /sys/kernel/debug/tracing/events/synthetic/memlatency/format name: memlatency ID: 2233 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:u64 lat; offset:8; size:8; signed:0; field:unsigned int order; offset:16; size:4; signed:0; field:gfp_t gfp_flags; offset:24; size:4; signed:1; field:int migratetype; offset:32; size:4; signed:1; print fmt: "lat=%llu, order=%u, gfp_flags=%x, migratetype=%d", REC->lat, REC->order, REC->gfp_flags, REC->migratetype Link: http://lkml.kernel.org/r/20191018012034.6404-1-zhengjun.xing@linux.intel.com Reviewed-by: Tom Zanussi Signed-off-by: Zhengjun Xing Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 57648c5aa679..7482a1466ebf 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -679,6 +679,8 @@ static bool synth_field_signed(char *type) { if (str_has_prefix(type, "u")) return false; + if (strcmp(type, "gfp_t") == 0) + return false; return true; } From 7a6f22d7479b7a0b68eadd308a997dd64dda7dae Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 18 Oct 2019 17:19:54 +0200 Subject: [PATCH 441/572] USB: ldusb: fix read info leaks Fix broken read implementation, which could be used to trigger slab info leaks. The driver failed to check if the custom ring buffer was still empty when waking up after having waited for more data. This would happen on every interrupt-in completion, even if no data had been added to the ring buffer (e.g. on disconnect events). Due to missing sanity checks and uninitialised (kmalloced) ring-buffer entries, this meant that huge slab info leaks could easily be triggered. Note that the empty-buffer check after wakeup is enough to fix the info leak on disconnect, but let's clear the buffer on allocation and add a sanity check to read() to prevent further leaks. Fixes: 2824bd250f0b ("[PATCH] USB: add ldusb driver") Cc: stable # 2.6.13 Reported-by: syzbot+6fe95b826644f7f12b0b@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191018151955.25135-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/ldusb.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 147c90c2a4e5..15b5f06fb0b3 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -464,7 +464,7 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, /* wait for data */ spin_lock_irq(&dev->rbsl); - if (dev->ring_head == dev->ring_tail) { + while (dev->ring_head == dev->ring_tail) { dev->interrupt_in_done = 0; spin_unlock_irq(&dev->rbsl); if (file->f_flags & O_NONBLOCK) { @@ -474,12 +474,17 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, retval = wait_event_interruptible(dev->read_wait, dev->interrupt_in_done); if (retval < 0) goto unlock_exit; - } else { - spin_unlock_irq(&dev->rbsl); + + spin_lock_irq(&dev->rbsl); } + spin_unlock_irq(&dev->rbsl); /* actual_buffer contains actual_length + interrupt_in_buffer */ actual_buffer = (size_t *)(dev->ring_buffer + dev->ring_tail * (sizeof(size_t)+dev->interrupt_in_endpoint_size)); + if (*actual_buffer > dev->interrupt_in_endpoint_size) { + retval = -EIO; + goto unlock_exit; + } bytes_to_read = min(count, *actual_buffer); if (bytes_to_read < *actual_buffer) dev_warn(&dev->intf->dev, "Read buffer overflow, %zd bytes dropped\n", @@ -690,10 +695,9 @@ static int ld_usb_probe(struct usb_interface *intf, const struct usb_device_id * dev_warn(&intf->dev, "Interrupt out endpoint not found (using control endpoint instead)\n"); dev->interrupt_in_endpoint_size = usb_endpoint_maxp(dev->interrupt_in_endpoint); - dev->ring_buffer = - kmalloc_array(ring_buffer_size, - sizeof(size_t) + dev->interrupt_in_endpoint_size, - GFP_KERNEL); + dev->ring_buffer = kcalloc(ring_buffer_size, + sizeof(size_t) + dev->interrupt_in_endpoint_size, + GFP_KERNEL); if (!dev->ring_buffer) goto error; dev->interrupt_in_buffer = kmalloc(dev->interrupt_in_endpoint_size, GFP_KERNEL); From 626c45d223e22090511acbfb481e0ece1de1356d Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 13 Oct 2019 12:53:23 +0200 Subject: [PATCH 442/572] ARM: dts: bcm2837-rpi-cm3: Avoid leds-gpio probing issue bcm2835-rpi.dtsi defines the behavior of the ACT LED, which is available on all Raspberry Pi boards. But there is no driver for this particual GPIO on CM3 in mainline yet, so this node was left incomplete without the actual GPIO definition. Since commit 025bf37725f1 ("gpio: Fix return value mismatch of function gpiod_get_from_of_node()") this causing probe issues of the leds-gpio driver for users of the CM3 dtsi file. leds-gpio: probe of leds failed with error -2 Until we have the necessary GPIO driver hide the ACT node for CM3 to avoid this. Reported-by: Fredrik Yhlen Signed-off-by: Stefan Wahren Fixes: a54fe8a6cf66 ("ARM: dts: add Raspberry Pi Compute Module 3 and IO board") Cc: Linus Walleij Cc: Krzysztof Kozlowski Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi b/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi index 7c3cb7ece6cb..925cb37c22f0 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi +++ b/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi @@ -9,6 +9,14 @@ reg = <0 0x40000000>; }; + leds { + /* + * Since there is no upstream GPIO driver yet, + * remove the incomplete node. + */ + /delete-node/ act; + }; + reg_3v3: fixed-regulator { compatible = "regulator-fixed"; regulator-name = "3V3"; From 9794476942d8704cfbdef8d5f13427673ab70dcd Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 17 Oct 2019 10:58:01 +0300 Subject: [PATCH 443/572] usb: cdns3: Error out if USB_DR_MODE_UNKNOWN in cdns3_core_init_role() USB_DR_MODE_UNKNOWN should be treated as error as it is done in cdns3_drd_update_mode(). Fixes: 02ffc26df96b ("usb: cdns3: fix cdns3_core_init_role()") Signed-off-by: Roger Quadros Link: https://lore.kernel.org/r/20191017075801.8734-1-rogerq@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 1109dc5a4c39..c2123ef8d8a3 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -166,7 +166,6 @@ static int cdns3_core_init_role(struct cdns3 *cdns) goto err; switch (cdns->dr_mode) { - case USB_DR_MODE_UNKNOWN: case USB_DR_MODE_OTG: ret = cdns3_hw_role_switch(cdns); if (ret) @@ -182,6 +181,9 @@ static int cdns3_core_init_role(struct cdns3 *cdns) if (ret) goto err; break; + default: + ret = -EINVAL; + goto err; } return ret; From 0ad8f7aa9f7ea839409f7f15f155a73f4f2d08b3 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 16 Oct 2019 11:23:16 -0700 Subject: [PATCH 444/572] MAINTAINERS: Use @kernel.org address for Paul Burton Switch to using my paulburton@kernel.org email address in order to avoid subject mangling that's being imposed on my previous address. Signed-off-by: Paul Burton Signed-off-by: Paul Burton Cc: linux-kernel@vger.kernel.org --- .mailmap | 3 ++- MAINTAINERS | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.mailmap b/.mailmap index edcac87e76c8..10b27ecb61c0 100644 --- a/.mailmap +++ b/.mailmap @@ -196,7 +196,8 @@ Oleksij Rempel Oleksij Rempel Paolo 'Blaisorblade' Giarrusso Patrick Mochel -Paul Burton +Paul Burton +Paul Burton Peter A Jonsson Peter Oruba Peter Oruba diff --git a/MAINTAINERS b/MAINTAINERS index a69e6db80c79..6c4dc607074a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3096,7 +3096,7 @@ S: Supported F: arch/arm64/net/ BPF JIT for MIPS (32-BIT AND 64-BIT) -M: Paul Burton +M: Paul Burton L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Maintained @@ -8001,7 +8001,7 @@ S: Maintained F: drivers/usb/atm/ueagle-atm.c IMGTEC ASCII LCD DRIVER -M: Paul Burton +M: Paul Burton S: Maintained F: Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt F: drivers/auxdisplay/img-ascii-lcd.c @@ -10828,7 +10828,7 @@ F: drivers/usb/image/microtek.* MIPS M: Ralf Baechle -M: Paul Burton +M: Paul Burton M: James Hogan L: linux-mips@vger.kernel.org W: http://www.linux-mips.org/ @@ -10842,7 +10842,7 @@ F: arch/mips/ F: drivers/platform/mips/ MIPS BOSTON DEVELOPMENT BOARD -M: Paul Burton +M: Paul Burton L: linux-mips@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/clock/img,boston-clock.txt @@ -10852,7 +10852,7 @@ F: drivers/clk/imgtec/clk-boston.c F: include/dt-bindings/clock/boston-clock.h MIPS GENERIC PLATFORM -M: Paul Burton +M: Paul Burton L: linux-mips@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/power/mti,mips-cpc.txt From 8a1bef4193e81c8afae4d2f107f1c09c8ce89470 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Wed, 16 Oct 2019 14:40:24 +0100 Subject: [PATCH 445/572] mips: vdso: Fix __arch_get_hw_counter() On some MIPS variants (e.g. MIPS r1), vDSO clock_mode is set to VDSO_CLOCK_NONE. When VDSO_CLOCK_NONE is set the expected kernel behavior is to fallback on syscalls. To do that the generic vDSO library expects UULONG_MAX as return value of __arch_get_hw_counter(). Fix __arch_get_hw_counter() on MIPS defining a __VDSO_USE_SYSCALL case that addressed the described scenario. Reported-by: Maxime Bizon Signed-off-by: Vincenzo Frascino Tested-by: Maxime Bizon Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org --- arch/mips/include/asm/vdso/gettimeofday.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h index e78462e8ca2e..b08825531e9f 100644 --- a/arch/mips/include/asm/vdso/gettimeofday.h +++ b/arch/mips/include/asm/vdso/gettimeofday.h @@ -24,6 +24,8 @@ #define VDSO_HAS_CLOCK_GETRES 1 +#define __VDSO_USE_SYSCALL ULLONG_MAX + #ifdef CONFIG_MIPS_CLOCK_VSYSCALL static __always_inline long gettimeofday_fallback( @@ -205,7 +207,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) break; #endif default: - cycle_now = 0; + cycle_now = __VDSO_USE_SYSCALL; break; } From b9959c7a347d6adbb558fba7e36e9fef3cba3b07 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 18 Oct 2019 18:41:16 -0400 Subject: [PATCH 446/572] filldir[64]: remove WARN_ON_ONCE() for bad directory entries This was always meant to be a temporary thing, just for testing and to see if it actually ever triggered. The only thing that reported it was syzbot doing disk image fuzzing, and then that warning is expected. So let's just remove it before -rc4, because the extra sanity testing should probably go to -stable, but we don't want the warning to do so. Reported-by: syzbot+3031f712c7ad5dd4d926@syzkaller.appspotmail.com Fixes: 8a23eb804ca4 ("Make filldir[64]() verify the directory entry filename is valid") Signed-off-by: Linus Torvalds --- fs/readdir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/readdir.c b/fs/readdir.c index 6e2623e57b2e..d26d5ea4de7b 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -105,9 +105,9 @@ EXPORT_SYMBOL(iterate_dir); */ static int verify_dirent_name(const char *name, int len) { - if (WARN_ON_ONCE(!len)) + if (!len) return -EIO; - if (WARN_ON_ONCE(memchr(name, '/', len))) + if (memchr(name, '/', len)) return -EIO; return 0; } From 641fe2e9387a36f9ee01d7c69382d1fe147a5e98 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 18 Oct 2019 20:19:16 -0700 Subject: [PATCH 447/572] drivers/base/memory.c: don't access uninitialized memmaps in soft_offline_page_store() Uninitialized memmaps contain garbage and in the worst case trigger kernel BUGs, especially with CONFIG_PAGE_POISONING. They should not get touched. Right now, when trying to soft-offline a PFN that resides on a memory block that was never onlined, one gets a misleading error with CONFIG_PAGE_POISONING: :/# echo 5637144576 > /sys/devices/system/memory/soft_offline_page [ 23.097167] soft offline: 0x150000 page already poisoned But the actual result depends on the garbage in the memmap. soft_offline_page() can only work with online pages, it returns -EIO in case of ZONE_DEVICE. Make sure to only forward pages that are online (iow, managed by the buddy) and, therefore, have an initialized memmap. Add a check against pfn_to_online_page() and similarly return -EIO. Link: http://lkml.kernel.org/r/20191010141200.8985-1-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319] Signed-off-by: David Hildenbrand Acked-by: Naoya Horiguchi Acked-by: Michal Hocko Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 6bea4f3f8040..55907c27075b 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -540,6 +540,9 @@ static ssize_t soft_offline_page_store(struct device *dev, pfn >>= PAGE_SHIFT; if (!pfn_valid(pfn)) return -ENXIO; + /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ + if (!pfn_to_online_page(pfn)) + return -EIO; ret = soft_offline_page(pfn_to_page(pfn), 0); return ret == 0 ? count : ret; } From aad5f69bc161af489dbb5934868bd347282f0764 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 18 Oct 2019 20:19:20 -0700 Subject: [PATCH 448/572] fs/proc/page.c: don't access uninitialized memmaps in fs/proc/page.c There are three places where we access uninitialized memmaps, namely: - /proc/kpagecount - /proc/kpageflags - /proc/kpagecgroup We have initialized memmaps either when the section is online or when the page was initialized to the ZONE_DEVICE. Uninitialized memmaps contain garbage and in the worst case trigger kernel BUGs, especially with CONFIG_PAGE_POISONING. For example, not onlining a DIMM during boot and calling /proc/kpagecount with CONFIG_PAGE_POISONING: :/# cat /proc/kpagecount > tmp.test BUG: unable to handle page fault for address: fffffffffffffffe #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 114616067 P4D 114616067 PUD 114618067 PMD 0 Oops: 0000 [#1] SMP NOPTI CPU: 0 PID: 469 Comm: cat Not tainted 5.4.0-rc1-next-20191004+ #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.4 RIP: 0010:kpagecount_read+0xce/0x1e0 Code: e8 09 83 e0 3f 48 0f a3 02 73 2d 4c 89 e7 48 c1 e7 06 48 03 3d ab 51 01 01 74 1d 48 8b 57 08 480 RSP: 0018:ffffa14e409b7e78 EFLAGS: 00010202 RAX: fffffffffffffffe RBX: 0000000000020000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 00007f76b5595000 RDI: fffff35645000000 RBP: 00007f76b5595000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000140000 R13: 0000000000020000 R14: 00007f76b5595000 R15: ffffa14e409b7f08 FS: 00007f76b577d580(0000) GS:ffff8f41bd400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffffffffffffe CR3: 0000000078960000 CR4: 00000000000006f0 Call Trace: proc_reg_read+0x3c/0x60 vfs_read+0xc5/0x180 ksys_read+0x68/0xe0 do_syscall_64+0x5c/0xa0 entry_SYSCALL_64_after_hwframe+0x49/0xbe For now, let's drop support for ZONE_DEVICE from the three pseudo files in order to fix this. To distinguish offline memory (with garbage memmap) from ZONE_DEVICE memory with properly initialized memmaps, we would have to check get_dev_pagemap() and pfn_zone_device_reserved() right now. The usage of both (especially, special casing devmem) is frowned upon and needs to be reworked. The fundamental issue we have is: if (pfn_to_online_page(pfn)) { /* memmap initialized */ } else if (pfn_valid(pfn)) { /* * ??? * a) offline memory. memmap garbage. * b) devmem: memmap initialized to ZONE_DEVICE. * c) devmem: reserved for driver. memmap garbage. * (d) devmem: memmap currently initializing - garbage) */ } We'll leave the pfn_zone_device_reserved() check in stable_page_flags() in place as that function is also used from memory failure. We now no longer dump information about pages that are not in use anymore - offline. Link: http://lkml.kernel.org/r/20191009142435.3975-2-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319] Signed-off-by: David Hildenbrand Reported-by: Qian Cai Acked-by: Michal Hocko Cc: Dan Williams Cc: Alexey Dobriyan Cc: Stephen Rothwell Cc: Toshiki Fukasawa Cc: Pankaj gupta Cc: Mike Rapoport Cc: Anthony Yznaga Cc: "Aneesh Kumar K.V" Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/page.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/proc/page.c b/fs/proc/page.c index 544d1ee15aee..7c952ee732e6 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -42,10 +42,12 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, return -EINVAL; while (count > 0) { - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - else - ppage = NULL; + /* + * TODO: ZONE_DEVICE support requires to identify + * memmaps that were actually initialized. + */ + ppage = pfn_to_online_page(pfn); + if (!ppage || PageSlab(ppage) || page_has_type(ppage)) pcount = 0; else @@ -216,10 +218,11 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, return -EINVAL; while (count > 0) { - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - else - ppage = NULL; + /* + * TODO: ZONE_DEVICE support requires to identify + * memmaps that were actually initialized. + */ + ppage = pfn_to_online_page(pfn); if (put_user(stable_page_flags(ppage), out)) { ret = -EFAULT; @@ -261,10 +264,11 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf, return -EINVAL; while (count > 0) { - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - else - ppage = NULL; + /* + * TODO: ZONE_DEVICE support requires to identify + * memmaps that were actually initialized. + */ + ppage = pfn_to_online_page(pfn); if (ppage) ino = page_cgroup_ino(ppage); From 96c804a6ae8c59a9092b3d5dd581198472063184 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 18 Oct 2019 20:19:23 -0700 Subject: [PATCH 449/572] mm/memory-failure.c: don't access uninitialized memmaps in memory_failure() We should check for pfn_to_online_page() to not access uninitialized memmaps. Reshuffle the code so we don't have to duplicate the error message. Link: http://lkml.kernel.org/r/20191009142435.3975-3-david@redhat.com Signed-off-by: David Hildenbrand Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319] Acked-by: Naoya Horiguchi Cc: Michal Hocko Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 0ae72b6acee7..3151c87dff73 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1257,17 +1257,19 @@ int memory_failure(unsigned long pfn, int flags) if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn); - if (!pfn_valid(pfn)) { + p = pfn_to_online_page(pfn); + if (!p) { + if (pfn_valid(pfn)) { + pgmap = get_dev_pagemap(pfn, NULL); + if (pgmap) + return memory_failure_dev_pagemap(pfn, flags, + pgmap); + } pr_err("Memory failure: %#lx: memory outside kernel control\n", pfn); return -ENXIO; } - pgmap = get_dev_pagemap(pfn, NULL); - if (pgmap) - return memory_failure_dev_pagemap(pfn, flags, pgmap); - - p = pfn_to_page(pfn); if (PageHuge(p)) return memory_failure_hugetlb(pfn, flags); if (TestSetPageHWPoison(p)) { From ca210ba32ef7537b02731bfe255ed8eb1e4e2b59 Mon Sep 17 00:00:00 2001 From: Joel Colledge Date: Fri, 18 Oct 2019 20:19:26 -0700 Subject: [PATCH 450/572] scripts/gdb: fix lx-dmesg when CONFIG_PRINTK_CALLER is set When CONFIG_PRINTK_CALLER is set, struct printk_log contains an additional member caller_id. This affects the offset of the log text. Account for this by using the type information from gdb to determine all the offsets instead of using hardcoded values. This fixes following error: (gdb) lx-dmesg Python Exception embedded null character: Error occurred in Python command: embedded null character The read_u* utility functions now take an offset argument to make them easier to use. Link: http://lkml.kernel.org/r/20191011142500.2339-1-joel.colledge@linbit.com Signed-off-by: Joel Colledge Reviewed-by: Jan Kiszka Cc: Kieran Bingham Cc: Leonard Crestez Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/gdb/linux/dmesg.py | 16 ++++++++++++---- scripts/gdb/linux/utils.py | 25 +++++++++++++------------ 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py index 6d2e09a2ad2f..2fa7bb83885f 100644 --- a/scripts/gdb/linux/dmesg.py +++ b/scripts/gdb/linux/dmesg.py @@ -16,6 +16,8 @@ import sys from linux import utils +printk_log_type = utils.CachedType("struct printk_log") + class LxDmesg(gdb.Command): """Print Linux kernel log buffer.""" @@ -42,9 +44,14 @@ class LxDmesg(gdb.Command): b = utils.read_memoryview(inf, log_buf_addr, log_next_idx) log_buf = a.tobytes() + b.tobytes() + length_offset = printk_log_type.get_type()['len'].bitpos // 8 + text_len_offset = printk_log_type.get_type()['text_len'].bitpos // 8 + time_stamp_offset = printk_log_type.get_type()['ts_nsec'].bitpos // 8 + text_offset = printk_log_type.get_type().sizeof + pos = 0 while pos < log_buf.__len__(): - length = utils.read_u16(log_buf[pos + 8:pos + 10]) + length = utils.read_u16(log_buf, pos + length_offset) if length == 0: if log_buf_2nd_half == -1: gdb.write("Corrupted log buffer!\n") @@ -52,10 +59,11 @@ class LxDmesg(gdb.Command): pos = log_buf_2nd_half continue - text_len = utils.read_u16(log_buf[pos + 10:pos + 12]) - text = log_buf[pos + 16:pos + 16 + text_len].decode( + text_len = utils.read_u16(log_buf, pos + text_len_offset) + text_start = pos + text_offset + text = log_buf[text_start:text_start + text_len].decode( encoding='utf8', errors='replace') - time_stamp = utils.read_u64(log_buf[pos:pos + 8]) + time_stamp = utils.read_u64(log_buf, pos + time_stamp_offset) for line in text.splitlines(): msg = u"[{time:12.6f}] {line}\n".format( diff --git a/scripts/gdb/linux/utils.py b/scripts/gdb/linux/utils.py index bc67126118c4..ea94221dbd39 100644 --- a/scripts/gdb/linux/utils.py +++ b/scripts/gdb/linux/utils.py @@ -92,15 +92,16 @@ def read_memoryview(inf, start, length): return memoryview(inf.read_memory(start, length)) -def read_u16(buffer): +def read_u16(buffer, offset): + buffer_val = buffer[offset:offset + 2] value = [0, 0] - if type(buffer[0]) is str: - value[0] = ord(buffer[0]) - value[1] = ord(buffer[1]) + if type(buffer_val[0]) is str: + value[0] = ord(buffer_val[0]) + value[1] = ord(buffer_val[1]) else: - value[0] = buffer[0] - value[1] = buffer[1] + value[0] = buffer_val[0] + value[1] = buffer_val[1] if get_target_endianness() == LITTLE_ENDIAN: return value[0] + (value[1] << 8) @@ -108,18 +109,18 @@ def read_u16(buffer): return value[1] + (value[0] << 8) -def read_u32(buffer): +def read_u32(buffer, offset): if get_target_endianness() == LITTLE_ENDIAN: - return read_u16(buffer[0:2]) + (read_u16(buffer[2:4]) << 16) + return read_u16(buffer, offset) + (read_u16(buffer, offset + 2) << 16) else: - return read_u16(buffer[2:4]) + (read_u16(buffer[0:2]) << 16) + return read_u16(buffer, offset + 2) + (read_u16(buffer, offset) << 16) -def read_u64(buffer): +def read_u64(buffer, offset): if get_target_endianness() == LITTLE_ENDIAN: - return read_u32(buffer[0:4]) + (read_u32(buffer[4:8]) << 32) + return read_u32(buffer, offset) + (read_u32(buffer, offset + 4) << 32) else: - return read_u32(buffer[4:8]) + (read_u32(buffer[0:4]) << 32) + return read_u32(buffer, offset + 4) + (read_u32(buffer, offset) << 32) target_arch = None From a26ee565b6cd8dc2bf15ff6aa70bbb28f928b773 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 18 Oct 2019 20:19:29 -0700 Subject: [PATCH 451/572] mm/page_owner: don't access uninitialized memmaps when reading /proc/pagetypeinfo Uninitialized memmaps contain garbage and in the worst case trigger kernel BUGs, especially with CONFIG_PAGE_POISONING. They should not get touched. For example, when not onlining a memory block that is spanned by a zone and reading /proc/pagetypeinfo with CONFIG_DEBUG_VM_PGFLAGS and CONFIG_PAGE_POISONING, we can trigger a kernel BUG: :/# echo 1 > /sys/devices/system/memory/memory40/online :/# echo 1 > /sys/devices/system/memory/memory42/online :/# cat /proc/pagetypeinfo > test.file page:fffff2c585200000 is uninitialized and poisoned raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) There is not page extension available. ------------[ cut here ]------------ kernel BUG at include/linux/mm.h:1107! invalid opcode: 0000 [#1] SMP NOPTI Please note that this change does not affect ZONE_DEVICE, because pagetypeinfo_showmixedcount_print() is called from mm/vmstat.c:pagetypeinfo_showmixedcount() only for populated zones, and ZONE_DEVICE is never populated (zone->present_pages always 0). [david@redhat.com: move check to outer loop, add comment, rephrase description] Link: http://lkml.kernel.org/r/20191011140638.8160-1-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") # visible after d0dc12e86b319 Signed-off-by: Qian Cai Signed-off-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Thomas Gleixner Cc: "Peter Zijlstra (Intel)" Cc: Miles Chen Cc: Mike Rapoport Cc: Qian Cai Cc: Greg Kroah-Hartman Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_owner.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/page_owner.c b/mm/page_owner.c index e327bcd0380e..18ecde9f45b2 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -271,7 +271,8 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, * not matter as the mixed block count will still be correct */ for (; pfn < end_pfn; ) { - if (!pfn_valid(pfn)) { + page = pfn_to_online_page(pfn); + if (!page) { pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); continue; } @@ -279,13 +280,13 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); block_end_pfn = min(block_end_pfn, end_pfn); - page = pfn_to_page(pfn); pageblock_mt = get_pageblock_migratetype(page); for (; pfn < block_end_pfn; pfn++) { if (!pfn_valid_within(pfn)) continue; + /* The pageblock is online, no need to recheck. */ page = pfn_to_page(pfn); if (page_zone(page) != zone) From 00d6c019b5bc175cee3770e0e659f2b5f4804ea5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 18 Oct 2019 20:19:33 -0700 Subject: [PATCH 452/572] mm/memory_hotplug: don't access uninitialized memmaps in shrink_pgdat_span() We might use the nid of memmaps that were never initialized. For example, if the memmap was poisoned, we will crash the kernel in pfn_to_nid() right now. Let's use the calculated boundaries of the separate zones instead. This now also avoids having to iterate over a whole bunch of subsections again, after shrinking one zone. Before commit d0dc12e86b31 ("mm/memory_hotplug: optimize memory hotplug"), the memmap was initialized to 0 and the node was set to the right value. After that commit, the node might be garbage. We'll have to fix shrink_zone_span() next. Link: http://lkml.kernel.org/r/20191006085646.5768-4-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [d0dc12e86b319] Signed-off-by: David Hildenbrand Reported-by: Aneesh Kumar K.V Cc: Oscar Salvador Cc: David Hildenbrand Cc: Michal Hocko Cc: Pavel Tatashin Cc: Dan Williams Cc: Wei Yang Cc: Alexander Duyck Cc: Alexander Potapenko Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Damian Tometzki Cc: Dave Hansen Cc: Fenghua Yu Cc: Gerald Schaefer Cc: Greg Kroah-Hartman Cc: Halil Pasic Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Jun Yao Cc: Logan Gunthorpe Cc: Mark Rutland Cc: Masahiro Yamada Cc: "Matthew Wilcox (Oracle)" Cc: Mel Gorman Cc: Michael Ellerman Cc: Mike Rapoport Cc: Pankaj Gupta Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Qian Cai Cc: Rich Felker Cc: Robin Murphy Cc: Steve Capper Cc: Thomas Gleixner Cc: Tom Lendacky Cc: Tony Luck Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Wei Yang Cc: Will Deacon Cc: Yoshinori Sato Cc: Yu Zhao Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 74 ++++++++++----------------------------------- 1 file changed, 16 insertions(+), 58 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b1be791f772d..df570e5c71cc 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -436,67 +436,25 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, zone_span_writeunlock(zone); } -static void shrink_pgdat_span(struct pglist_data *pgdat, - unsigned long start_pfn, unsigned long end_pfn) +static void update_pgdat_span(struct pglist_data *pgdat) { - unsigned long pgdat_start_pfn = pgdat->node_start_pfn; - unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */ - unsigned long pgdat_end_pfn = p; - unsigned long pfn; - int nid = pgdat->node_id; + unsigned long node_start_pfn = 0, node_end_pfn = 0; + struct zone *zone; - if (pgdat_start_pfn == start_pfn) { - /* - * If the section is smallest section in the pgdat, it need - * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages. - * In this case, we find second smallest valid mem_section - * for shrinking zone. - */ - pfn = find_smallest_section_pfn(nid, NULL, end_pfn, - pgdat_end_pfn); - if (pfn) { - pgdat->node_start_pfn = pfn; - pgdat->node_spanned_pages = pgdat_end_pfn - pfn; - } - } else if (pgdat_end_pfn == end_pfn) { - /* - * If the section is biggest section in the pgdat, it need - * shrink pgdat->node_spanned_pages. - * In this case, we find second biggest valid mem_section for - * shrinking zone. - */ - pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn, - start_pfn); - if (pfn) - pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1; + for (zone = pgdat->node_zones; + zone < pgdat->node_zones + MAX_NR_ZONES; zone++) { + unsigned long zone_end_pfn = zone->zone_start_pfn + + zone->spanned_pages; + + /* No need to lock the zones, they can't change. */ + if (zone_end_pfn > node_end_pfn) + node_end_pfn = zone_end_pfn; + if (zone->zone_start_pfn < node_start_pfn) + node_start_pfn = zone->zone_start_pfn; } - /* - * If the section is not biggest or smallest mem_section in the pgdat, - * it only creates a hole in the pgdat. So in this case, we need not - * change the pgdat. - * But perhaps, the pgdat has only hole data. Thus it check the pgdat - * has only hole or not. - */ - pfn = pgdat_start_pfn; - for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SUBSECTION) { - if (unlikely(!pfn_valid(pfn))) - continue; - - if (pfn_to_nid(pfn) != nid) - continue; - - /* Skip range to be removed */ - if (pfn >= start_pfn && pfn < end_pfn) - continue; - - /* If we find valid section, we have nothing to do */ - return; - } - - /* The pgdat has no valid section */ - pgdat->node_start_pfn = 0; - pgdat->node_spanned_pages = 0; + pgdat->node_start_pfn = node_start_pfn; + pgdat->node_spanned_pages = node_end_pfn - node_start_pfn; } static void __remove_zone(struct zone *zone, unsigned long start_pfn, @@ -507,7 +465,7 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn, pgdat_resize_lock(zone->zone_pgdat, &flags); shrink_zone_span(zone, start_pfn, start_pfn + nr_pages); - shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages); + update_pgdat_span(pgdat); pgdat_resize_unlock(zone->zone_pgdat, &flags); } From 77e080e7680e1e615587352f70c87b9e98126d03 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 18 Oct 2019 20:19:39 -0700 Subject: [PATCH 453/572] mm/memunmap: don't access uninitialized memmap in memunmap_pages() Patch series "mm/memory_hotplug: Shrink zones before removing memory", v6. This series fixes the access of uninitialized memmaps when shrinking zones/nodes and when removing memory. Also, it contains all fixes for crashes that can be triggered when removing certain namespace using memunmap_pages() - ZONE_DEVICE, reported by Aneesh. We stop trying to shrink ZONE_DEVICE, as it's buggy, fixing it would be more involved (we don't have SECTION_IS_ONLINE as an indicator), and shrinking is only of limited use (set_zone_contiguous() cannot detect the ZONE_DEVICE as contiguous). We continue shrinking !ZONE_DEVICE zones, however, I reduced the amount of code to a minimum. Shrinking is especially necessary to keep zone->contiguous set where possible, especially, on memory unplug of DIMMs at zone boundaries. -------------------------------------------------------------------------- Zones are now properly shrunk when offlining memory blocks or when onlining failed. This allows to properly shrink zones on memory unplug even if the separate memory blocks of a DIMM were onlined to different zones or re-onlined to a different zone after offlining. Example: :/# cat /proc/zoneinfo Node 1, zone Movable spanned 0 present 0 managed 0 :/# echo "online_movable" > /sys/devices/system/memory/memory41/state :/# echo "online_movable" > /sys/devices/system/memory/memory43/state :/# cat /proc/zoneinfo Node 1, zone Movable spanned 98304 present 65536 managed 65536 :/# echo 0 > /sys/devices/system/memory/memory43/online :/# cat /proc/zoneinfo Node 1, zone Movable spanned 32768 present 32768 managed 32768 :/# echo 0 > /sys/devices/system/memory/memory41/online :/# cat /proc/zoneinfo Node 1, zone Movable spanned 0 present 0 managed 0 This patch (of 10): With an altmap, the memmap falling into the reserved altmap space are not initialized and, therefore, contain a garbage NID and a garbage zone. Make sure to read the NID/zone from a memmap that was initialized. This fixes a kernel crash that is observed when destroying a namespace: kernel BUG at include/linux/mm.h:1107! cpu 0x1: Vector: 700 (Program Check) at [c000000274087890] pc: c0000000004b9728: memunmap_pages+0x238/0x340 lr: c0000000004b9724: memunmap_pages+0x234/0x340 ... pid = 3669, comm = ndctl kernel BUG at include/linux/mm.h:1107! devm_action_release+0x30/0x50 release_nodes+0x268/0x2d0 device_release_driver_internal+0x174/0x240 unbind_store+0x13c/0x190 drv_attr_store+0x44/0x60 sysfs_kf_write+0x70/0xa0 kernfs_fop_write+0x1ac/0x290 __vfs_write+0x3c/0x70 vfs_write+0xe4/0x200 ksys_write+0x7c/0x140 system_call+0x5c/0x68 The "page_zone(pfn_to_page(pfn)" was introduced by 69324b8f4833 ("mm, devm_memremap_pages: add MEMORY_DEVICE_PRIVATE support"), however, I think we will never have driver reserved memory with MEMORY_DEVICE_PRIVATE (no altmap AFAIKS). [david@redhat.com: minimze code changes, rephrase description] Link: http://lkml.kernel.org/r/20191006085646.5768-2-david@redhat.com Fixes: 2c2a5af6fed2 ("mm, memory_hotplug: add nid parameter to arch_remove_memory") Signed-off-by: Aneesh Kumar K.V Signed-off-by: David Hildenbrand Cc: Dan Williams Cc: Jason Gunthorpe Cc: Logan Gunthorpe Cc: Ira Weiny Cc: Damian Tometzki Cc: Alexander Duyck Cc: Alexander Potapenko Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Dave Hansen Cc: Fenghua Yu Cc: Gerald Schaefer Cc: Greg Kroah-Hartman Cc: Halil Pasic Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jun Yao Cc: Mark Rutland Cc: Masahiro Yamada Cc: "Matthew Wilcox (Oracle)" Cc: Mel Gorman Cc: Michael Ellerman Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Qian Cai Cc: Rich Felker Cc: Robin Murphy Cc: Steve Capper Cc: Thomas Gleixner Cc: Tom Lendacky Cc: Tony Luck Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Wei Yang Cc: Wei Yang Cc: Will Deacon Cc: Yoshinori Sato Cc: Yu Zhao Cc: [5.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memremap.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/mm/memremap.c b/mm/memremap.c index 68204912cc0a..03ccbdfeb697 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -103,6 +103,7 @@ static void dev_pagemap_cleanup(struct dev_pagemap *pgmap) void memunmap_pages(struct dev_pagemap *pgmap) { struct resource *res = &pgmap->res; + struct page *first_page; unsigned long pfn; int nid; @@ -111,14 +112,16 @@ void memunmap_pages(struct dev_pagemap *pgmap) put_page(pfn_to_page(pfn)); dev_pagemap_cleanup(pgmap); + /* make sure to access a memmap that was actually initialized */ + first_page = pfn_to_page(pfn_first(pgmap)); + /* pages are dead and unused, undo the arch mapping */ - nid = page_to_nid(pfn_to_page(PHYS_PFN(res->start))); + nid = page_to_nid(first_page); mem_hotplug_begin(); if (pgmap->type == MEMORY_DEVICE_PRIVATE) { - pfn = PHYS_PFN(res->start); - __remove_pages(page_zone(pfn_to_page(pfn)), pfn, - PHYS_PFN(resource_size(res)), NULL); + __remove_pages(page_zone(first_page), PHYS_PFN(res->start), + PHYS_PFN(resource_size(res)), NULL); } else { arch_remove_memory(nid, res->start, resource_size(res), pgmap_altmap(pgmap)); From b749ecfaf6c53ce79d6ab66afd2fc34189a073b1 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 18 Oct 2019 20:19:44 -0700 Subject: [PATCH 454/572] mm: memcg/slab: fix panic in __free_slab() caused by premature memcg pointer release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Karsten reported the following panic in __free_slab() happening on a s390x machine: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 0000000000000000 TEID: 0000000000000483 Fault in home space mode while using kernel ASCE. AS:00000000017d4007 R3:000000007fbd0007 S:000000007fbff000 P:000000000000003d Oops: 0004 ilc:3 Ý#1¨ PREEMPT SMP Modules linked in: tcp_diag inet_diag xt_tcpudp ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ip6table_nat ip6table_mangle ip6table_raw ip6table_security iptable_at nf_nat CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.0-05872-g6133e3e4bada-dirty #14 Hardware name: IBM 2964 NC9 702 (z/VM 6.4.0) Krnl PSW : 0704d00180000000 00000000003cadb6 (__free_slab+0x686/0x6b0) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: 00000000f3a32928 0000000000000000 000000007fbf5d00 000000000117c4b8 0000000000000000 000000009e3291c1 0000000000000000 0000000000000000 0000000000000003 0000000000000008 000000002b478b00 000003d080a97600 0000000000000003 0000000000000008 000000002b478b00 000003d080a97600 000000000117ba00 000003e000057db0 00000000003cabcc 000003e000057c78 Krnl Code: 00000000003cada6: e310a1400004 lg %r1,320(%r10) 00000000003cadac: c0e50046c286 brasl %r14,ca32b8 #00000000003cadb2: a7f4fe36 brc 15,3caa1e >00000000003cadb6: e32060800024 stg %r2,128(%r6) 00000000003cadbc: a7f4fd9e brc 15,3ca8f8 00000000003cadc0: c0e50046790c brasl %r14,c99fd8 00000000003cadc6: a7f4fe2c brc 15,3caa 00000000003cadc6: a7f4fe2c brc 15,3caa1e 00000000003cadca: ecb1ffff00d9 aghik %r11,%r1,-1 Call Trace: (<00000000003cabcc> __free_slab+0x49c/0x6b0) <00000000001f5886> rcu_core+0x5a6/0x7e0 <0000000000ca2dea> __do_softirq+0xf2/0x5c0 <0000000000152644> irq_exit+0x104/0x130 <000000000010d222> do_IRQ+0x9a/0xf0 <0000000000ca2344> ext_int_handler+0x130/0x134 <0000000000103648> enabled_wait+0x58/0x128 (<0000000000103634> enabled_wait+0x44/0x128) <0000000000103b00> arch_cpu_idle+0x40/0x58 <0000000000ca0544> default_idle_call+0x3c/0x68 <000000000018eaa4> do_idle+0xec/0x1c0 <000000000018ee0e> cpu_startup_entry+0x36/0x40 <000000000122df34> arch_call_rest_init+0x5c/0x88 <0000000000000000> 0x0 INFO: lockdep is turned off. Last Breaking-Event-Address: <00000000003ca8f4> __free_slab+0x1c4/0x6b0 Kernel panic - not syncing: Fatal exception in interrupt The kernel panics on an attempt to dereference the NULL memcg pointer. When shutdown_cache() is called from the kmem_cache_destroy() context, a memcg kmem_cache might have empty slab pages in a partial list, which are still charged to the memory cgroup. These pages are released by free_partial() at the beginning of shutdown_cache(): either directly or by scheduling a RCU-delayed work (if the kmem_cache has the SLAB_TYPESAFE_BY_RCU flag). The latter case is when the reported panic can happen: memcg_unlink_cache() is called immediately after shrinking partial lists, without waiting for scheduled RCU works. It sets the kmem_cache->memcg_params.memcg pointer to NULL, and the following attempt to dereference it by __free_slab() from the RCU work context causes the panic. To fix the issue, let's postpone the release of the memcg pointer to destroy_memcg_params(). It's called from a separate work context by slab_caches_to_rcu_destroy_workfn(), which contains a full RCU barrier. This guarantees that all scheduled page release RCU works will complete before the memcg pointer will be zeroed. Big thanks for Karsten for the perfect report containing all necessary information, his help with the analysis of the problem and testing of the fix. Link: http://lkml.kernel.org/r/20191010160549.1584316-1-guro@fb.com Fixes: fb2f2b0adb98 ("mm: memcg/slab: reparent memcg kmem_caches on cgroup removal") Signed-off-by: Roman Gushchin Reported-by: Karsten Graul Tested-by: Karsten Graul Acked-by: Vlastimil Babka Reviewed-by: Shakeel Butt Cc: Karsten Graul Cc: Vladimir Davydov Cc: David Rientjes Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab_common.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index c29f03adca91..f9fb27b4c843 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -178,10 +178,13 @@ static int init_memcg_params(struct kmem_cache *s, static void destroy_memcg_params(struct kmem_cache *s) { - if (is_root_cache(s)) + if (is_root_cache(s)) { kvfree(rcu_access_pointer(s->memcg_params.memcg_caches)); - else + } else { + mem_cgroup_put(s->memcg_params.memcg); + WRITE_ONCE(s->memcg_params.memcg, NULL); percpu_ref_exit(&s->memcg_params.refcnt); + } } static void free_memcg_params(struct rcu_head *rcu) @@ -253,8 +256,6 @@ static void memcg_unlink_cache(struct kmem_cache *s) } else { list_del(&s->memcg_params.children_node); list_del(&s->memcg_params.kmem_caches_node); - mem_cgroup_put(s->memcg_params.memcg); - WRITE_ONCE(s->memcg_params.memcg, NULL); } } #else From ce750f43f5790de74c1644c39d78f684071658d1 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 18 Oct 2019 20:19:47 -0700 Subject: [PATCH 455/572] ocfs2: fix error handling in ocfs2_setattr() Should set transfer_to[USRQUOTA/GRPQUOTA] to NULL on error case before jumping to do dqput(). Link: http://lkml.kernel.org/r/20191010082349.1134-1-cgxu519@mykernel.net Signed-off-by: Chengguang Xu Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 2e982db3e1ae..53939bf9d7d2 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1230,6 +1230,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid)); if (IS_ERR(transfer_to[USRQUOTA])) { status = PTR_ERR(transfer_to[USRQUOTA]); + transfer_to[USRQUOTA] = NULL; goto bail_unlock; } } @@ -1239,6 +1240,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid)); if (IS_ERR(transfer_to[GRPQUOTA])) { status = PTR_ERR(transfer_to[GRPQUOTA]); + transfer_to[GRPQUOTA] = NULL; goto bail_unlock; } } From 6f24c8d30d08f270b54f4c2cb9b08dfccbe59c57 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 18 Oct 2019 20:19:50 -0700 Subject: [PATCH 456/572] mm/gup_benchmark: add a missing "w" to getopt string Even though gup_benchmark.c has code to handle the -w command-line option, the "w" is not part of the getopt string. It looks as if it has been missing the whole time. On my machine, this leads naturally to the following predictable result: $ sudo ./gup_benchmark -w ./gup_benchmark: invalid option -- 'w' ...which is fixed with this commit. Link: http://lkml.kernel.org/r/20191014184639.1512873-2-jhubbard@nvidia.com Signed-off-by: John Hubbard Acked-by: Kirill A. Shutemov Cc: Keith Busch Cc: Shuah Khan Cc: Christoph Hellwig Cc: "Aneesh Kumar K . V" Cc: Ira Weiny Cc: Christoph Hellwig Cc: kbuild test robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/gup_benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index c0534e298b51..cb3fc09645c4 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -37,7 +37,7 @@ int main(int argc, char **argv) char *file = "/dev/zero"; char *p; - while ((opt = getopt(argc, argv, "m:r:n:f:tTLUSH")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:f:tTLUwSH")) != -1) { switch (opt) { case 'm': size = atoi(optarg) * MB; From 0cd22afdcea21fa16bb5b0d3e0508ca42072d0bd Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 18 Oct 2019 20:19:53 -0700 Subject: [PATCH 457/572] mm/gup: fix a misnamed "write" argument, and a related bug In several routines, the "flags" argument is incorrectly named "write". Change it to "flags". Also, in one place, the misnaming led to an actual bug: "flags & FOLL_WRITE" is required, rather than just "flags". (That problem was flagged by krobot, in v1 of this patch.) Also, change the flags argument from int, to unsigned int. You can see that this was a simple oversight, because the calling code passes "flags" to the fifth argument: gup_pgd_range(): ... if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, PGDIR_SHIFT, next, flags, pages, nr)) ...which, until this patch, the callees referred to as "write". Also, change two lines to avoid checkpatch line length complaints, and another line to fix another oversight that checkpatch called out: missing "int" on pdshift. Link: http://lkml.kernel.org/r/20191014184639.1512873-3-jhubbard@nvidia.com Fixes: b798bec4741b ("mm/gup: change write parameter to flags in fast walk") Signed-off-by: John Hubbard Reported-by: kbuild test robot Suggested-by: Kirill A. Shutemov Suggested-by: Ira Weiny Acked-by: Kirill A. Shutemov Reviewed-by: Ira Weiny Cc: Christoph Hellwig Cc: Aneesh Kumar K.V Cc: Keith Busch Cc: Shuah Khan Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 23a9f9c9d377..8f236a335ae9 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1973,7 +1973,8 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, } static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) + unsigned long end, unsigned int flags, + struct page **pages, int *nr) { unsigned long pte_end; struct page *head, *page; @@ -1986,7 +1987,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, pte = READ_ONCE(*ptep); - if (!pte_access_permitted(pte, write)) + if (!pte_access_permitted(pte, flags & FOLL_WRITE)) return 0; /* hugepages are never "special" */ @@ -2023,7 +2024,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, } static int gup_huge_pd(hugepd_t hugepd, unsigned long addr, - unsigned int pdshift, unsigned long end, int write, + unsigned int pdshift, unsigned long end, unsigned int flags, struct page **pages, int *nr) { pte_t *ptep; @@ -2033,7 +2034,7 @@ static int gup_huge_pd(hugepd_t hugepd, unsigned long addr, ptep = hugepte_offset(hugepd, addr, pdshift); do { next = hugepte_addr_end(addr, end, sz); - if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) + if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr)) return 0; } while (ptep++, addr = next, addr != end); @@ -2041,7 +2042,7 @@ static int gup_huge_pd(hugepd_t hugepd, unsigned long addr, } #else static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, - unsigned pdshift, unsigned long end, int write, + unsigned int pdshift, unsigned long end, unsigned int flags, struct page **pages, int *nr) { return 0; @@ -2049,7 +2050,8 @@ static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, #endif /* CONFIG_ARCH_HAS_HUGEPD */ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, - unsigned long end, unsigned int flags, struct page **pages, int *nr) + unsigned long end, unsigned int flags, + struct page **pages, int *nr) { struct page *head, *page; int refs; From b11edebbc967ebf5c55b8f9e1d5bb6d68ec3a7fd Mon Sep 17 00:00:00 2001 From: Honglei Wang Date: Fri, 18 Oct 2019 20:19:58 -0700 Subject: [PATCH 458/572] mm: memcg: get number of pages on the LRU list in memcgroup base on lru_zone_size Commit 1a61ab8038e72 ("mm: memcontrol: replace zone summing with lruvec_page_state()") has made lruvec_page_state to use per-cpu counters instead of calculating it directly from lru_zone_size with an idea that this would be more effective. Tim has reported that this is not really the case for their database benchmark which is showing an opposite results where lruvec_page_state is taking up a huge chunk of CPU cycles (about 25% of the system time which is roughly 7% of total cpu cycles) on 5.3 kernels. The workload is running on a larger machine (96cpus), it has many cgroups (500) and it is heavily direct reclaim bound. Tim Chen said: : The problem can also be reproduced by running simple multi-threaded : pmbench benchmark with a fast Optane SSD swap (see profile below). : : : 6.15% 3.08% pmbench [kernel.vmlinux] [k] lruvec_lru_size : | : |--3.07%--lruvec_lru_size : | | : | |--2.11%--cpumask_next : | | | : | | --1.66%--find_next_bit : | | : | --0.57%--call_function_interrupt : | | : | --0.55%--smp_call_function_interrupt : | : |--1.59%--0x441f0fc3d009 : | _ops_rdtsc_init_base_freq : | access_histogram : | page_fault : | __do_page_fault : | handle_mm_fault : | __handle_mm_fault : | | : | --1.54%--do_swap_page : | swapin_readahead : | swap_cluster_readahead : | | : | --1.53%--read_swap_cache_async : | __read_swap_cache_async : | alloc_pages_vma : | __alloc_pages_nodemask : | __alloc_pages_slowpath : | try_to_free_pages : | do_try_to_free_pages : | shrink_node : | shrink_node_memcg : | | : | |--0.77%--lruvec_lru_size : | | : | --0.76%--inactive_list_is_low : | | : | --0.76%--lruvec_lru_size : | : --1.50%--measure_read : page_fault : __do_page_fault : handle_mm_fault : __handle_mm_fault : do_swap_page : swapin_readahead : swap_cluster_readahead : | : --1.48%--read_swap_cache_async : __read_swap_cache_async : alloc_pages_vma : __alloc_pages_nodemask : __alloc_pages_slowpath : try_to_free_pages : do_try_to_free_pages : shrink_node : shrink_node_memcg : | : |--0.75%--inactive_list_is_low : | | : | --0.75%--lruvec_lru_size : | : --0.73%--lruvec_lru_size The likely culprit is the cache traffic the lruvec_page_state_local generates. Dave Hansen says: : I was thinking purely of the cache footprint. If it's reading : pn->lruvec_stat_local->count[idx] is three separate cachelines, so 192 : bytes of cache *96 CPUs = 18k of data, mostly read-only. 1 cgroup would : be 18k of data for the whole system and the caching would be pretty : efficient and all 18k would probably survive a tight page fault loop in : the L1. 500 cgroups would be ~90k of data per CPU thread which doesn't : fit in the L1 and probably wouldn't survive a tight page fault loop if : both logical threads were banging on different cgroups. : : It's just a theory, but it's why I noted the number of cgroups when I : initially saw this show up in profiles Fix the regression by partially reverting the said commit and calculate the lru size explicitly. Link: http://lkml.kernel.org/r/20190905071034.16822-1-honglei.wang@oracle.com Fixes: 1a61ab8038e72 ("mm: memcontrol: replace zone summing with lruvec_page_state()") Signed-off-by: Honglei Wang Reported-by: Tim Chen Acked-by: Tim Chen Tested-by: Tim Chen Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: Johannes Weiner Cc: Roman Gushchin Cc: Tejun Heo Cc: Dave Hansen Cc: [5.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index c6659bb758a4..024b7e929752 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -351,12 +351,13 @@ unsigned long zone_reclaimable_pages(struct zone *zone) */ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx) { - unsigned long lru_size; + unsigned long lru_size = 0; int zid; - if (!mem_cgroup_disabled()) - lru_size = lruvec_page_state_local(lruvec, NR_LRU_BASE + lru); - else + if (!mem_cgroup_disabled()) { + for (zid = 0; zid < MAX_NR_ZONES; zid++) + lru_size += mem_cgroup_get_zone_lru_size(lruvec, lru, zid); + } else lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) { From f3057ad767542be7bbac44e548cb44017178a163 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 18 Oct 2019 20:20:01 -0700 Subject: [PATCH 459/572] mm: memblock: do not enforce current limit for memblock_phys* family Until commit 92d12f9544b7 ("memblock: refactor internal allocation functions") the maximal address for memblock allocations was forced to memblock.current_limit only for the allocation functions returning virtual address. The changes introduced by that commit moved the limit enforcement into the allocation core and as a result the allocation functions returning physical address also started to limit allocations to memblock.current_limit. This caused breakage of etnaviv GPU driver: etnaviv etnaviv: bound 130000.gpu (ops gpu_ops) etnaviv etnaviv: bound 134000.gpu (ops gpu_ops) etnaviv etnaviv: bound 2204000.gpu (ops gpu_ops) etnaviv-gpu 130000.gpu: model: GC2000, revision: 5108 etnaviv-gpu 130000.gpu: command buffer outside valid memory window etnaviv-gpu 134000.gpu: model: GC320, revision: 5007 etnaviv-gpu 134000.gpu: command buffer outside valid memory window etnaviv-gpu 2204000.gpu: model: GC355, revision: 1215 etnaviv-gpu 2204000.gpu: Ignoring GPU with VG and FE2.0 Restore the behaviour of memblock_phys* family so that these functions will not enforce memblock.current_limit. Link: http://lkml.kernel.org/r/1570915861-17633-1-git-send-email-rppt@kernel.org Fixes: 92d12f9544b7 ("memblock: refactor internal allocation functions") Signed-off-by: Mike Rapoport Reported-by: Adam Ford Tested-by: Adam Ford [imx6q-logicpd] Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Fabio Estevam Cc: Lucas Stach Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memblock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/memblock.c b/mm/memblock.c index 7d4f61ae666a..c4b16cae2bc9 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1356,9 +1356,6 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, align = SMP_CACHE_BYTES; } - if (end > memblock.current_limit) - end = memblock.current_limit; - again: found = memblock_find_in_range_node(size, align, start, end, nid, flags); @@ -1469,6 +1466,9 @@ static void * __init memblock_alloc_internal( if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, nid); + if (max_addr > memblock.current_limit) + max_addr = memblock.current_limit; + alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid); /* retry allocation without lower limit */ From f231fe4235e22e18d847e05cbe705deaca56580a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 18 Oct 2019 20:20:05 -0700 Subject: [PATCH 460/572] hugetlbfs: don't access uninitialized memmaps in pfn_range_valid_gigantic() Uninitialized memmaps contain garbage and in the worst case trigger kernel BUGs, especially with CONFIG_PAGE_POISONING. They should not get touched. Let's make sure that we only consider online memory (managed by the buddy) that has initialized memmaps. ZONE_DEVICE is not applicable. page_zone() will call page_to_nid(), which will trigger VM_BUG_ON_PGFLAGS(PagePoisoned(page), page) with CONFIG_PAGE_POISONING and CONFIG_DEBUG_VM_PGFLAGS when called on uninitialized memmaps. This can be the case when an offline memory block (e.g., never onlined) is spanned by a zone. Note: As explained by Michal in [1], alloc_contig_range() will verify the range. So it boils down to the wrong access in this function. [1] http://lkml.kernel.org/r/20180423000943.GO17484@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20191015120717.4858-1-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319] Signed-off-by: David Hildenbrand Reported-by: Michal Hocko Acked-by: Michal Hocko Reviewed-by: Mike Kravetz Cc: Anshuman Khandual Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ef37c85423a5..b45a95363a84 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1084,11 +1084,10 @@ static bool pfn_range_valid_gigantic(struct zone *z, struct page *page; for (i = start_pfn; i < end_pfn; i++) { - if (!pfn_valid(i)) + page = pfn_to_online_page(i); + if (!page) return false; - page = pfn_to_page(i); - if (page_zone(page) != z) return false; From b918c43021baaa3648de09e19a4a3dd555a45f40 Mon Sep 17 00:00:00 2001 From: Yi Li Date: Fri, 18 Oct 2019 20:20:08 -0700 Subject: [PATCH 461/572] ocfs2: fix panic due to ocfs2_wq is null mount.ocfs2 failed when reading ocfs2 filesystem superblock encounters an error. ocfs2_initialize_super() returns before allocating ocfs2_wq. ocfs2_dismount_volume() triggers the following panic. Oct 15 16:09:27 cnwarekv-205120 kernel: On-disk corruption discovered.Please run fsck.ocfs2 once the filesystem is unmounted. Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_read_locked_inode:537 ERROR: status = -30 Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_init_global_system_inodes:458 ERROR: status = -30 Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_init_global_system_inodes:491 ERROR: status = -30 Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_initialize_super:2313 ERROR: status = -30 Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_fill_super:1033 ERROR: status = -30 ------------[ cut here ]------------ Oops: 0002 [#1] SMP NOPTI CPU: 1 PID: 11753 Comm: mount.ocfs2 Tainted: G E 4.14.148-200.ckv.x86_64 #1 Hardware name: Sugon H320-G30/35N16-US, BIOS 0SSDX017 12/21/2018 task: ffff967af0520000 task.stack: ffffa5f05484000 RIP: 0010:mutex_lock+0x19/0x20 Call Trace: flush_workqueue+0x81/0x460 ocfs2_shutdown_local_alloc+0x47/0x440 [ocfs2] ocfs2_dismount_volume+0x84/0x400 [ocfs2] ocfs2_fill_super+0xa4/0x1270 [ocfs2] ? ocfs2_initialize_super.isa.211+0xf20/0xf20 [ocfs2] mount_bdev+0x17f/0x1c0 mount_fs+0x3a/0x160 Link: http://lkml.kernel.org/r/1571139611-24107-1-git-send-email-yili@winhong.com Signed-off-by: Yi Li Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/journal.c | 3 ++- fs/ocfs2/localalloc.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 930e3d388579..699a560efbb0 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -217,7 +217,8 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb) /* At this point, we know that no more recovery threads can be * launched, so wait for any recovery completion work to * complete. */ - flush_workqueue(osb->ocfs2_wq); + if (osb->ocfs2_wq) + flush_workqueue(osb->ocfs2_wq); /* * Now that recovery is shut down, and the osb is about to be diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 158e5af767fd..720e9f94957e 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -377,7 +377,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) struct ocfs2_dinode *alloc = NULL; cancel_delayed_work(&osb->la_enable_wq); - flush_workqueue(osb->ocfs2_wq); + if (osb->ocfs2_wq) + flush_workqueue(osb->ocfs2_wq); if (osb->local_alloc_state == OCFS2_LA_UNUSED) goto out; From ae8af4388db002bbd1df78ecee7ca31cee78e964 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 18 Oct 2019 20:20:11 -0700 Subject: [PATCH 462/572] mm/memcontrol: update lruvec counters in mem_cgroup_move_account Mapped, dirty and writeback pages are also counted in per-lruvec stats. These counters needs update when page is moved between cgroups. Currently is nobody *consuming* the lruvec versions of these counters and that there is no user-visible effect. Link: http://lkml.kernel.org/r/157112699975.7360.1062614888388489788.stgit@buzz Fixes: 00f3ca2c2d66 ("mm: memcontrol: per-lruvec stats infrastructure") Signed-off-by: Konstantin Khlebnikov Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bdac56009a38..363106578876 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5420,6 +5420,8 @@ static int mem_cgroup_move_account(struct page *page, struct mem_cgroup *from, struct mem_cgroup *to) { + struct lruvec *from_vec, *to_vec; + struct pglist_data *pgdat; unsigned long flags; unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1; int ret; @@ -5443,11 +5445,15 @@ static int mem_cgroup_move_account(struct page *page, anon = PageAnon(page); + pgdat = page_pgdat(page); + from_vec = mem_cgroup_lruvec(pgdat, from); + to_vec = mem_cgroup_lruvec(pgdat, to); + spin_lock_irqsave(&from->move_lock, flags); if (!anon && page_mapped(page)) { - __mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages); - __mod_memcg_state(to, NR_FILE_MAPPED, nr_pages); + __mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages); + __mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages); } /* @@ -5459,14 +5465,14 @@ static int mem_cgroup_move_account(struct page *page, struct address_space *mapping = page_mapping(page); if (mapping_cap_account_dirty(mapping)) { - __mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages); - __mod_memcg_state(to, NR_FILE_DIRTY, nr_pages); + __mod_lruvec_state(from_vec, NR_FILE_DIRTY, -nr_pages); + __mod_lruvec_state(to_vec, NR_FILE_DIRTY, nr_pages); } } if (PageWriteback(page)) { - __mod_memcg_state(from, NR_WRITEBACK, -nr_pages); - __mod_memcg_state(to, NR_WRITEBACK, nr_pages); + __mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages); + __mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE From f7daefe4231e57381d92c2e2ad905a899c28e402 Mon Sep 17 00:00:00 2001 From: Chenwandun Date: Fri, 18 Oct 2019 20:20:14 -0700 Subject: [PATCH 463/572] zram: fix race between backing_dev_show and backing_dev_store CPU0: CPU1: backing_dev_show backing_dev_store ...... ...... file = zram->backing_dev; down_read(&zram->init_lock); down_read(&zram->init_init_lock) file_path(file, ...); zram->backing_dev = backing_dev; up_read(&zram->init_lock); up_read(&zram->init_lock); gets the value of zram->backing_dev too early in backing_dev_show, which resultin the value being NULL at the beginning, and not NULL later. backtrace: d_path+0xcc/0x174 file_path+0x10/0x18 backing_dev_show+0x40/0xb4 dev_attr_show+0x20/0x54 sysfs_kf_seq_show+0x9c/0x10c kernfs_seq_show+0x28/0x30 seq_read+0x184/0x488 kernfs_fop_read+0x5c/0x1a4 __vfs_read+0x44/0x128 vfs_read+0xa0/0x138 SyS_read+0x54/0xb4 Link: http://lkml.kernel.org/r/1571046839-16814-1-git-send-email-chenwandun@huawei.com Signed-off-by: Chenwandun Acked-by: Minchan Kim Cc: Sergey Senozhatsky Cc: Jens Axboe Cc: [4.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d58a359a6622..4285e75e52c3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -413,13 +413,14 @@ static void reset_bdev(struct zram *zram) static ssize_t backing_dev_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct file *file; struct zram *zram = dev_to_zram(dev); - struct file *file = zram->backing_dev; char *p; ssize_t ret; down_read(&zram->init_lock); - if (!zram->backing_dev) { + file = zram->backing_dev; + if (!file) { memcpy(buf, "none\n", 5); up_read(&zram->init_lock); return 5; From 444f84fd2ac7bae36f3dd3ce1d39d11211c2c72a Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 18 Oct 2019 20:20:17 -0700 Subject: [PATCH 464/572] mm: include for is_vma_temporary_stack Include for the definition of is_vma_temporary_stack to fix the following sparse warning: mm/rmap.c:1673:6: warning: symbol 'is_vma_temporary_stack' was not declared. Should it be static? Link: http://lkml.kernel.org/r/20191009151155.27763-1-ben.dooks@codethink.co.uk Signed-off-by: Ben Dooks Reviewed-by: Qian Cai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/rmap.c b/mm/rmap.c index d9a23bb773bf..0c7b2a9400d4 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include From d0e6a5821cdf08eea91d8597dce1ad695ebeb8bc Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 18 Oct 2019 20:20:20 -0700 Subject: [PATCH 465/572] mm/filemap.c: include for generic_file_vm_ops definition The generic_file_vm_ops is defined in so include it to fix the following warning: mm/filemap.c:2717:35: warning: symbol 'generic_file_vm_ops' was not declared. Should it be static? Link: http://lkml.kernel.org/r/20191008102311.25432-1-ben.dooks@codethink.co.uk Signed-off-by: Ben Dooks Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/filemap.c b/mm/filemap.c index 1146fcfa3215..85b7d087eb45 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "internal.h" #define CREATE_TRACE_POINTS From a2ae8c0551050cbf3232fbb3d963a9bbe7b57268 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Fri, 18 Oct 2019 20:20:24 -0700 Subject: [PATCH 466/572] mm/init-mm.c: include for vm_committed_as_batch mm_init.c needs to include for the definition of vm_committed_as_batch. Fixes the following sparse warning: mm/mm_init.c:141:5: warning: symbol 'vm_committed_as_batch' was not declared. Should it be static? Link: http://lkml.kernel.org/r/20191016091509.26708-1-ben.dooks@codethink.co.uk Signed-off-by: Ben Dooks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/init-mm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/init-mm.c b/mm/init-mm.c index fb1e15028ef0..19603302a77f 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include From 2be5fbf9a91c417cd178a481538baddda3e2f38c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 18 Oct 2019 20:20:27 -0700 Subject: [PATCH 467/572] proc/meminfo: fix output alignment Patch series "Fixes for THP in page cache", v2. This patch (of 5): Add extra space for FileHugePages and FilePmdMapped, so the output is aligned with other rows. Link: http://lkml.kernel.org/r/20191017164223.2762148-2-songliubraving@fb.com Fixes: 60fbf0ab5da1 ("mm,thp: stats for file backed THP") Signed-off-by: Kirill A. Shutemov Signed-off-by: Song Liu Tested-by: Song Liu Acked-by: Yang Shi Cc: Matthew Wilcox Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index ac9247371871..8c1f1bb1a5ce 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -132,9 +132,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR); show_val_kb(m, "ShmemPmdMapped: ", global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR); - show_val_kb(m, "FileHugePages: ", + show_val_kb(m, "FileHugePages: ", global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR); - show_val_kb(m, "FilePmdMapped: ", + show_val_kb(m, "FilePmdMapped: ", global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR); #endif From 06d3eff62d9dc6f21e7ebeb14399f2542a36cdf5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 18 Oct 2019 20:20:30 -0700 Subject: [PATCH 468/572] mm/thp: fix node page state in split_huge_page_to_list() Make sure split_huge_page_to_list() handles the state of shmem THP and file THP properly. Link: http://lkml.kernel.org/r/20191017164223.2762148-3-songliubraving@fb.com Fixes: 60fbf0ab5da1 ("mm,thp: stats for file backed THP") Signed-off-by: Kirill A. Shutemov Signed-off-by: Song Liu Tested-by: Song Liu Acked-by: Yang Shi Cc: Matthew Wilcox (Oracle) Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c5cb6dcd6c69..13cc93785006 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2789,8 +2789,13 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) ds_queue->split_queue_len--; list_del(page_deferred_list(head)); } - if (mapping) - __dec_node_page_state(page, NR_SHMEM_THPS); + if (mapping) { + if (PageSwapBacked(page)) + __dec_node_page_state(page, NR_SHMEM_THPS); + else + __dec_node_page_state(page, NR_FILE_THPS); + } + spin_unlock(&ds_queue->split_queue_lock); __split_huge_page(page, list, end, flags); if (PageSwapCache(head)) { From 906d278d75e364f2bb85dc1e1ff6265ea46e7e43 Mon Sep 17 00:00:00 2001 From: William Kucharski Date: Fri, 18 Oct 2019 20:20:33 -0700 Subject: [PATCH 469/572] mm/vmscan.c: support removing arbitrary sized pages from mapping __remove_mapping() assumes that pages can only be either base pages or HPAGE_PMD_SIZE. Ask the page what size it is. Link: http://lkml.kernel.org/r/20191017164223.2762148-4-songliubraving@fb.com Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS") Signed-off-by: William Kucharski Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Song Liu Acked-by: Yang Shi Cc: "Kirill A. Shutemov" Cc: Oleg Nesterov Cc: Srikar Dronamraju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 024b7e929752..ee4eecc7e1c2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -933,10 +933,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, * Note that if SetPageDirty is always performed via set_page_dirty, * and thus under the i_pages lock, then this ordering is not required. */ - if (unlikely(PageTransHuge(page)) && PageSwapCache(page)) - refcount = 1 + HPAGE_PMD_NR; - else - refcount = 2; + refcount = 1 + compound_nr(page); if (!page_ref_freeze(page, refcount)) goto cannot_free; /* note: atomic_cmpxchg in page_ref_freeze provides the smp_rmb */ From ef18a1ca847b01cb7296f11a728cb2f5ef671760 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 18 Oct 2019 20:20:36 -0700 Subject: [PATCH 470/572] mm/thp: allow dropping THP from page cache Once a THP is added to the page cache, it cannot be dropped via /proc/sys/vm/drop_caches. Fix this issue with proper handling in invalidate_mapping_pages(). Link: http://lkml.kernel.org/r/20191017164223.2762148-5-songliubraving@fb.com Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS") Signed-off-by: Kirill A. Shutemov Signed-off-by: Song Liu Tested-by: Song Liu Acked-by: Yang Shi Cc: Matthew Wilcox (Oracle) Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/truncate.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/truncate.c b/mm/truncate.c index 8563339041f6..dd9ebc1da356 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -592,6 +592,16 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, unlock_page(page); continue; } + + /* Take a pin outside pagevec */ + get_page(page); + + /* + * Drop extra pins before trying to invalidate + * the huge page. + */ + pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); } ret = invalidate_inode_page(page); @@ -602,6 +612,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, */ if (!ret) deactivate_file_page(page); + if (PageTransHuge(page)) + put_page(page); count += ret; } pagevec_remove_exceptionals(&pvec); From aa5de305c90c995321df452f274fe75b52bd8fcc Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 18 Oct 2019 20:20:40 -0700 Subject: [PATCH 471/572] kernel/events/uprobes.c: only do FOLL_SPLIT_PMD for uprobe register Attaching uprobe to text section in THP splits the PMD mapped page table into PTE mapped entries. On uprobe detach, we would like to regroup PMD mapped page table entry to regain performance benefit of THP. However, the regroup is broken For perf_event based trace_uprobe. This is because perf_event based trace_uprobe calls uprobe_unregister twice on close: first in TRACE_REG_PERF_CLOSE, then in TRACE_REG_PERF_UNREGISTER. The second call will split the PMD mapped page table entry, which is not the desired behavior. Fix this by only use FOLL_SPLIT_PMD for uprobe register case. Add a WARN() to confirm uprobe unregister never work on huge pages, and abort the operation when this WARN() triggers. Link: http://lkml.kernel.org/r/20191017164223.2762148-6-songliubraving@fb.com Fixes: 5a52c9df62b4 ("uprobe: use FOLL_SPLIT_PMD instead of FOLL_SPLIT") Signed-off-by: Song Liu Reviewed-by: Srikar Dronamraju Cc: Kirill A. Shutemov Cc: Oleg Nesterov Cc: Matthew Wilcox (Oracle) Cc: William Kucharski Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/events/uprobes.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 94d38a39d72e..c74761004ee5 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -474,14 +474,17 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, struct vm_area_struct *vma; int ret, is_register, ref_ctr_updated = 0; bool orig_page_huge = false; + unsigned int gup_flags = FOLL_FORCE; is_register = is_swbp_insn(&opcode); uprobe = container_of(auprobe, struct uprobe, arch); retry: + if (is_register) + gup_flags |= FOLL_SPLIT_PMD; /* Read the page with vaddr into memory */ - ret = get_user_pages_remote(NULL, mm, vaddr, 1, - FOLL_FORCE | FOLL_SPLIT_PMD, &old_page, &vma, NULL); + ret = get_user_pages_remote(NULL, mm, vaddr, 1, gup_flags, + &old_page, &vma, NULL); if (ret <= 0) return ret; @@ -489,6 +492,12 @@ retry: if (ret <= 0) goto put_old; + if (WARN(!is_register && PageCompound(old_page), + "uprobe unregister should never work on compound page\n")) { + ret = -EINVAL; + goto put_old; + } + /* We are going to replace instruction, update ref_ctr. */ if (!ref_ctr_updated && uprobe->ref_ctr_offset) { ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1); From 585d730d41120926e3f79a601edad3930fa28366 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 18 Oct 2019 20:20:43 -0700 Subject: [PATCH 472/572] scripts/gdb: fix debugging modules on s390 Currently lx-symbols assumes that module text is always located at module->core_layout->base, but s390 uses the following layout: +------+ <- module->core_layout->base | GOT | +------+ <- module->core_layout->base + module->arch->plt_offset | PLT | +------+ <- module->core_layout->base + module->arch->plt_offset + | TEXT | module->arch->plt_size +------+ Therefore, when trying to debug modules on s390, all the symbol addresses are skewed by plt_offset + plt_size. Fix by adding plt_offset + plt_size to module_addr in load_module_symbols(). Link: http://lkml.kernel.org/r/20191017085917.81791-1-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Reviewed-by: Jan Kiszka Cc: Kieran Bingham Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/gdb/linux/symbols.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index 34e40e96dee2..7b7c2fafbc68 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -15,7 +15,7 @@ import gdb import os import re -from linux import modules +from linux import modules, utils if hasattr(gdb, 'Breakpoint'): @@ -116,6 +116,12 @@ lx-symbols command.""" module_file = self._get_module_file(module_name) if module_file: + if utils.is_target_arch('s390'): + # Module text is preceded by PLT stubs on s390. + module_arch = module['arch'] + plt_offset = int(module_arch['plt_offset']) + plt_size = int(module_arch['plt_size']) + module_addr = hex(int(module_addr, 0) + plt_offset + plt_size) gdb.write("loading @{addr}: {filename}\n".format( addr=module_addr, filename=module_file)) cmdline = "add-symbol-file {filename} {addr}{sections}".format( From 580a05f9d4ada3bfb689140d0efec1efdb8a48da Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Fri, 18 Oct 2019 11:42:59 +0800 Subject: [PATCH 473/572] net: hns3: fix mis-counting IRQ vector numbers issue Currently, the num_msi_left means the vector numbers of NIC, but if the PF supported RoCE, it contains the vector numbers of NIC and RoCE(Not expected). This may cause interrupts lost in some case, because of the NIC module used the vector resources which belongs to RoCE. This patch adds a new variable num_nic_msi to store the vector numbers of NIC, and adjust the default TQP numbers and rss_size according to the value of num_nic_msi. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Yonglong Liu Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 ++ .../hisilicon/hns3/hns3pf/hclge_main.c | 21 +++++++++++++- .../hisilicon/hns3/hns3pf/hclge_main.h | 1 + .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 11 ++++++-- .../hisilicon/hns3/hns3vf/hclgevf_main.c | 28 +++++++++++++++++-- .../hisilicon/hns3/hns3vf/hclgevf_main.h | 1 + 6 files changed, 58 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index c4b7bf851a28..75ccc1e7076b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -32,6 +32,8 @@ #define HNAE3_MOD_VERSION "1.0" +#define HNAE3_MIN_VECTOR_NUM 2 /* first one for misc, another for IO */ + /* Device IDs */ #define HNAE3_DEV_ID_GE 0xA220 #define HNAE3_DEV_ID_25GE 0xA221 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index fd7f94372ff0..e02e01bd9eff 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -906,6 +906,9 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev) hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number), HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S); + /* nic's msix numbers is always equals to the roce's. */ + hdev->num_nic_msi = hdev->num_roce_msi; + /* PF should have NIC vectors and Roce vectors, * NIC vectors are queued before Roce vectors. */ @@ -915,6 +918,15 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev) hdev->num_msi = hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number), HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S); + + hdev->num_nic_msi = hdev->num_msi; + } + + if (hdev->num_nic_msi < HNAE3_MIN_VECTOR_NUM) { + dev_err(&hdev->pdev->dev, + "Just %u msi resources, not enough for pf(min:2).\n", + hdev->num_nic_msi); + return -EINVAL; } return 0; @@ -1507,6 +1519,10 @@ static int hclge_assign_tqp(struct hclge_vport *vport, u16 num_tqps) kinfo->rss_size = min_t(u16, hdev->rss_size_max, vport->alloc_tqps / hdev->tm_info.num_tc); + /* ensure one to one mapping between irq and queue at default */ + kinfo->rss_size = min_t(u16, kinfo->rss_size, + (hdev->num_nic_msi - 1) / hdev->tm_info.num_tc); + return 0; } @@ -2285,7 +2301,8 @@ static int hclge_init_msi(struct hclge_dev *hdev) int vectors; int i; - vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, + vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM, + hdev->num_msi, PCI_IRQ_MSI | PCI_IRQ_MSIX); if (vectors < 0) { dev_err(&pdev->dev, @@ -2300,6 +2317,7 @@ static int hclge_init_msi(struct hclge_dev *hdev) hdev->num_msi = vectors; hdev->num_msi_left = vectors; + hdev->base_msi_vector = pdev->irq; hdev->roce_base_vector = hdev->base_msi_vector + hdev->roce_base_msix_offset; @@ -3903,6 +3921,7 @@ static int hclge_get_vector(struct hnae3_handle *handle, u16 vector_num, int alloc = 0; int i, j; + vector_num = min_t(u16, hdev->num_nic_msi - 1, vector_num); vector_num = min(hdev->num_msi_left, vector_num); for (j = 0; j < vector_num; j++) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 3e9574a9e22d..c3d56b872ed7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -763,6 +763,7 @@ struct hclge_dev { u32 base_msi_vector; u16 *vector_status; int *vector_irq; + u16 num_nic_msi; /* Num of nic vectors for this PF */ u16 num_roce_msi; /* Num of roce vectors for this PF */ int roce_base_vector; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 9f0e35f27789..62399cc1c5a6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -537,9 +537,16 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) kinfo->rss_size = kinfo->req_rss_size; } else if (kinfo->rss_size > max_rss_size || (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) { + /* if user not set rss, the rss_size should compare with the + * valid msi numbers to ensure one to one map between tqp and + * irq as default. + */ + if (!kinfo->req_rss_size) + max_rss_size = min_t(u16, max_rss_size, + (hdev->num_nic_msi - 1) / + kinfo->num_tc); + /* Set to the maximum specification value (max_rss_size). */ - dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n", - kinfo->rss_size, max_rss_size); kinfo->rss_size = max_rss_size; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index e3090b3dab1d..7d7e712691b9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -411,6 +411,13 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev) kinfo->tqp[i] = &hdev->htqp[i].q; } + /* after init the max rss_size and tqps, adjust the default tqp numbers + * and rss size with the actual vector numbers + */ + kinfo->num_tqps = min_t(u16, hdev->num_nic_msix - 1, kinfo->num_tqps); + kinfo->rss_size = min_t(u16, kinfo->num_tqps / kinfo->num_tc, + kinfo->rss_size); + return 0; } @@ -502,6 +509,7 @@ static int hclgevf_get_vector(struct hnae3_handle *handle, u16 vector_num, int alloc = 0; int i, j; + vector_num = min_t(u16, hdev->num_nic_msix - 1, vector_num); vector_num = min(hdev->num_msi_left, vector_num); for (j = 0; j < vector_num; j++) { @@ -2246,13 +2254,14 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev) int vectors; int i; - if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)) + if (hnae3_dev_roce_supported(hdev)) vectors = pci_alloc_irq_vectors(pdev, hdev->roce_base_msix_offset + 1, hdev->num_msi, PCI_IRQ_MSIX); else - vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, + vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM, + hdev->num_msi, PCI_IRQ_MSI | PCI_IRQ_MSIX); if (vectors < 0) { @@ -2268,6 +2277,7 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev) hdev->num_msi = vectors; hdev->num_msi_left = vectors; + hdev->base_msi_vector = pdev->irq; hdev->roce_base_vector = pdev->irq + hdev->roce_base_msix_offset; @@ -2533,7 +2543,7 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev) req = (struct hclgevf_query_res_cmd *)desc.data; - if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)) { + if (hnae3_dev_roce_supported(hdev)) { hdev->roce_base_msix_offset = hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee), HCLGEVF_MSIX_OFT_ROCEE_M, @@ -2542,6 +2552,9 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev) hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number), HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S); + /* nic's msix numbers is always equals to the roce's. */ + hdev->num_nic_msix = hdev->num_roce_msix; + /* VF should have NIC vectors and Roce vectors, NIC vectors * are queued before Roce vectors. The offset is fixed to 64. */ @@ -2551,6 +2564,15 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev) hdev->num_msi = hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number), HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S); + + hdev->num_nic_msix = hdev->num_msi; + } + + if (hdev->num_nic_msix < HNAE3_MIN_VECTOR_NUM) { + dev_err(&hdev->pdev->dev, + "Just %u msi resources, not enough for vf(min:2).\n", + hdev->num_nic_msix); + return -EINVAL; } return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index bdde3afc286b..2b8d6bc6d224 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -270,6 +270,7 @@ struct hclgevf_dev { u16 num_msi; u16 num_msi_left; u16 num_msi_used; + u16 num_nic_msix; /* Num of nic vectors for this VF */ u16 num_roce_msix; /* Num of roce vectors for this VF */ u16 roce_base_msix_offset; int roce_base_vector; From 3d5c1a037d37392a6859afbde49be5ba6a70a6b3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 18 Oct 2019 09:45:49 +0200 Subject: [PATCH 474/572] xen/netback: fix error path of xenvif_connect_data() xenvif_connect_data() calls module_put() in case of error. This is wrong as there is no related module_get(). Remove the superfluous module_put(). Fixes: 279f438e36c0a7 ("xen-netback: Don't destroy the netdev until the vif is shut down") Cc: # 3.12 Signed-off-by: Juergen Gross Reviewed-by: Paul Durrant Reviewed-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 240f762b3749..103ed00775eb 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -719,7 +719,6 @@ err_unmap: xenvif_unmap_frontend_data_rings(queue); netif_napi_del(&queue->napi); err: - module_put(THIS_MODULE); return err; } From bd310aca442fcc72731b7acb65d32d05c956d56b Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Fri, 18 Oct 2019 16:11:43 +0200 Subject: [PATCH 475/572] macb: propagate errors when getting optional clocks The tx_clk, rx_clk, and tsu_clk are optional. Currently the macb driver marks clock as not available if it receives an error when trying to get a clock. This is wrong, because a clock controller might return -EPROBE_DEFER if a clock is not available, but will eventually become available. In these cases, the driver would probe successfully but will never be able to adjust the clocks, because the clocks were not available during probe, but became available later. For example, the clock controller for the ZynqMP is implemented in the PMU firmware and the clocks are only available after the firmware driver has been probed. Use devm_clk_get_optional() in instead of devm_clk_get() to get the optional clock and propagate all errors to the calling function. Signed-off-by: Michael Tretter Acked-by: Nicolas Ferre Tested-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 8e8d557901a9..1e1b774e1953 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3405,17 +3405,17 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, return err; } - *tx_clk = devm_clk_get(&pdev->dev, "tx_clk"); + *tx_clk = devm_clk_get_optional(&pdev->dev, "tx_clk"); if (IS_ERR(*tx_clk)) - *tx_clk = NULL; + return PTR_ERR(*tx_clk); - *rx_clk = devm_clk_get(&pdev->dev, "rx_clk"); + *rx_clk = devm_clk_get_optional(&pdev->dev, "rx_clk"); if (IS_ERR(*rx_clk)) - *rx_clk = NULL; + return PTR_ERR(*rx_clk); - *tsu_clk = devm_clk_get(&pdev->dev, "tsu_clk"); + *tsu_clk = devm_clk_get_optional(&pdev->dev, "tsu_clk"); if (IS_ERR(*tsu_clk)) - *tsu_clk = NULL; + return PTR_ERR(*tsu_clk); err = clk_prepare_enable(*pclk); if (err) { From a7fa12d15855904aff1716e1fc723c03ba38c5cc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 18 Oct 2019 09:16:57 -0700 Subject: [PATCH 476/572] net: netem: fix error path for corrupted GSO frames To corrupt a GSO frame we first perform segmentation. We then proceed using the first segment instead of the full GSO skb and requeue the rest of the segments as separate packets. If there are any issues with processing the first segment we still want to process the rest, therefore we jump to the finish_segs label. Commit 177b8007463c ("net: netem: fix backlog accounting for corrupted GSO frames") started using the pointer to the first segment in the "rest of segments processing", but as mentioned above the first segment may had already been freed at this point. Backlog corrections for parent qdiscs have to be adjusted. Fixes: 177b8007463c ("net: netem: fix backlog accounting for corrupted GSO frames") Reported-by: kbuild test robot Reported-by: Dan Carpenter Reported-by: Ben Hutchings Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 0e44039e729c..942eb17f413c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -509,6 +509,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) { qdisc_drop(skb, sch, to_free); + skb = NULL; goto finish_segs; } @@ -593,9 +594,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, finish_segs: if (segs) { unsigned int len, last_len; - int nb = 0; + int nb; - len = skb->len; + len = skb ? skb->len : 0; + nb = skb ? 1 : 0; while (segs) { skb2 = segs->next; @@ -612,7 +614,8 @@ finish_segs: } segs = skb2; } - qdisc_tree_reduce_backlog(sch, -nb, prev_len - len); + /* Parent qdiscs accounted for 1 skb of size @prev_len */ + qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len)); } return NET_XMIT_SUCCESS; } From e0ad032e144731a5928f2d75e91c2064ba1a764c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 18 Oct 2019 09:16:58 -0700 Subject: [PATCH 477/572] net: netem: correct the parent's backlog when corrupted packet was dropped If packet corruption failed we jump to finish_segs and return NET_XMIT_SUCCESS. Seeing success will make the parent qdisc increment its backlog, that's incorrect - we need to return NET_XMIT_DROP. Fixes: 6071bd1aa13e ("netem: Segment GSO packets on enqueue") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 942eb17f413c..42e557d48e4e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -616,6 +616,8 @@ finish_segs: } /* Parent qdiscs accounted for 1 skb of size @prev_len */ qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len)); + } else if (!skb) { + return NET_XMIT_DROP; } return NET_XMIT_SUCCESS; } From 3d00cf2fbb61212f47a3cf838be51c921366c937 Mon Sep 17 00:00:00 2001 From: Chenwandun Date: Fri, 18 Oct 2019 18:20:37 +0800 Subject: [PATCH 478/572] net: aquantia: add an error handling in aq_nic_set_multicast_list add an error handling in aq_nic_set_multicast_list, it may not work when hw_multicast_list_set error; and at the same time it will remove gcc Wunused-but-set-variable warning. Signed-off-by: Chenwandun Reviewed-by: Igor Russkikh Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 2a18439b36fb..137c1de4c6ec 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -664,6 +664,8 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev) err = hw_ops->hw_multicast_list_set(self->aq_hw, self->mc_list.ar, self->mc_list.count); + if (err < 0) + return err; } return aq_nic_set_packet_filter(self, packet_filter); } From 05908d72cc8fa39058cd9535db2833089cc3df6f Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 18 Oct 2019 17:56:58 +0100 Subject: [PATCH 479/572] net: ethernet: dwmac-sun8i: show message only when switching to promisc Printing the info message every time more than the max number of mac addresses are requested generates unnecessary log spam. Showing it only when the hw is not already in promiscous mode is equally informative without being annoying. Signed-off-by: Mans Rullgard Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index f97a4096f8fc..ddcc191febdb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -651,7 +651,8 @@ static void sun8i_dwmac_set_filter(struct mac_device_info *hw, } } } else { - netdev_info(dev, "Too many address, switching to promiscuous\n"); + if (!(readl(ioaddr + EMAC_RX_FRM_FLT) & EMAC_FRM_FLT_RXALL)) + netdev_info(dev, "Too many address, switching to promiscuous\n"); v = EMAC_FRM_FLT_RXALL; } From 50c7d2ba9de20f60a2d527ad6928209ef67e4cdd Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 18 Oct 2019 17:02:46 -0400 Subject: [PATCH 480/572] net: dsa: fix switch tree list If there are multiple switch trees on the device, only the last one will be listed, because the arguments of list_add_tail are swapped. Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation") Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 73002022c9d8..716d265ba8ca 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -46,7 +46,7 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index) dst->index = index; INIT_LIST_HEAD(&dst->list); - list_add_tail(&dsa_tree_list, &dst->list); + list_add_tail(&dst->list, &dsa_tree_list); kref_init(&dst->refcount); From 2a06b8982f8f2f40d03a3daf634676386bd84dbc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Oct 2019 15:20:05 -0700 Subject: [PATCH 481/572] net: reorder 'struct net' fields to avoid false sharing Intel test robot reported a ~7% regression on TCP_CRR tests that they bisected to the cited commit. Indeed, every time a new TCP socket is created or deleted, the atomic counter net->count is touched (via get_net(net) and put_net(net) calls) So cpus might have to reload a contended cache line in net_hash_mix(net) calls. We need to reorder 'struct net' fields to move @hash_mix in a read mostly cache line. We move in the first cache line fields that can be dirtied often. We probably will have to address in a followup patch the __randomize_layout that was added in linux-4.13, since this might break our placement choices. Fixes: 355b98553789 ("netns: provide pure entropy for net_hash_mix()") Signed-off-by: Eric Dumazet Reported-by: kernel test robot Signed-off-by: David S. Miller --- include/net/net_namespace.h | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f8712bbeb2e0..4c2cd9378699 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -52,6 +52,9 @@ struct bpf_prog; #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { + /* First cache line can be often dirtied. + * Do not place here read-mostly fields. + */ refcount_t passive; /* To decide when the network * namespace should be freed. */ @@ -60,7 +63,13 @@ struct net { */ spinlock_t rules_mod_lock; - u32 hash_mix; + unsigned int dev_unreg_count; + + unsigned int dev_base_seq; /* protected by rtnl_mutex */ + int ifindex; + + spinlock_t nsid_lock; + atomic_t fnhe_genid; struct list_head list; /* list of network namespaces */ struct list_head exit_list; /* To linked to call pernet exit @@ -76,11 +85,11 @@ struct net { #endif struct user_namespace *user_ns; /* Owning user namespace */ struct ucounts *ucounts; - spinlock_t nsid_lock; struct idr netns_ids; struct ns_common ns; + struct list_head dev_base_head; struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; @@ -93,17 +102,18 @@ struct net { struct uevent_sock *uevent_sock; /* uevent socket */ - struct list_head dev_base_head; struct hlist_head *dev_name_head; struct hlist_head *dev_index_head; - unsigned int dev_base_seq; /* protected by rtnl_mutex */ - int ifindex; - unsigned int dev_unreg_count; + /* Note that @hash_mix can be read millions times per second, + * it is critical that it is on a read_mostly cache line. + */ + u32 hash_mix; + + struct net_device *loopback_dev; /* The loopback */ /* core fib_rules */ struct list_head rules_ops; - struct net_device *loopback_dev; /* The loopback */ struct netns_core core; struct netns_mib mib; struct netns_packet packet; @@ -171,7 +181,6 @@ struct net { struct sock *crypto_nlsk; #endif struct sock *diag_nlsk; - atomic_t fnhe_genid; } __randomize_layout; #include From f4e23cf9477452640d2522f3286db5df6fcfe45a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 3 Oct 2019 18:02:08 +0100 Subject: [PATCH 482/572] KVM: arm64: pmu: Fix cycle counter truncation When a counter is disabled, its value is sampled before the event is being disabled, and the value written back in the shadow register. In that process, the value gets truncated to 32bit, which is adequate for any counter but the cycle counter (defined as a 64bit counter). This obviously results in a corrupted counter, and things like "perf record -e cycles" not working at all when run in a guest... A similar, but less critical bug exists in kvm_pmu_get_counter_value. Make the truncation conditional on the counter not being the cycle counter, which results in a minor code reorganisation. Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters") Reviewed-by: Andrew Murray Reported-by: Julien Thierry Signed-off-by: Marc Zyngier --- virt/kvm/arm/pmu.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 362a01886bab..c30c3a74fc7f 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -146,8 +146,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) if (kvm_pmu_pmc_is_chained(pmc) && kvm_pmu_idx_is_high_counter(select_idx)) counter = upper_32_bits(counter); - - else if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) + else if (select_idx != ARMV8_PMU_CYCLE_IDX) counter = lower_32_bits(counter); return counter; @@ -193,7 +192,7 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) */ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) { - u64 counter, reg; + u64 counter, reg, val; pmc = kvm_pmu_get_canonical_pmc(pmc); if (!pmc->perf_event) @@ -201,16 +200,19 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); - if (kvm_pmu_pmc_is_chained(pmc)) { - reg = PMEVCNTR0_EL0 + pmc->idx; - __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter); - __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); + if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { + reg = PMCCNTR_EL0; + val = counter; } else { - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; - __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter); + reg = PMEVCNTR0_EL0 + pmc->idx; + val = lower_32_bits(counter); } + __vcpu_sys_reg(vcpu, reg) = val; + + if (kvm_pmu_pmc_is_chained(pmc)) + __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); + kvm_pmu_release_perf_event(pmc); } From 6f16371453476fd094760ea3d6f00144e9ae3057 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 4 Oct 2019 11:03:09 +0100 Subject: [PATCH 483/572] arm64: KVM: Handle PMCR_EL0.LC as RES1 on pure AArch64 systems Of PMCR_EL0.LC, the ARMv8 ARM says: "In an AArch64 only implementation, this field is RES 1." So be it. Fixes: ab9468340d2bc ("arm64: KVM: Add access handler for PMCR register") Reviewed-by: Andrew Murray Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2071260a275b..46822afc57e0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -632,6 +632,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); + if (!system_supports_32bit_el0()) + val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, r->reg) = val; } @@ -682,6 +684,8 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; + if (!system_supports_32bit_el0()) + val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); kvm_vcpu_pmu_restore_guest(vcpu); From 725ce66979fb6da5c1aec5b064d0871bedc23bf7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Oct 2019 15:09:55 +0100 Subject: [PATCH 484/572] KVM: arm64: pmu: Set the CHAINED attribute before creating the in-kernel event The current convention for KVM to request a chained event from the host PMU is to set bit[0] in attr.config1 (PERF_ATTR_CFG1_KVM_PMU_CHAINED). But as it turns out, this bit gets set *after* we create the kernel event that backs our virtual counter, meaning that we never get a 64bit counter. Moving the setting to an earlier point solves the problem. Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters") Reviewed-by: Andrew Murray Signed-off-by: Marc Zyngier --- virt/kvm/arm/pmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index c30c3a74fc7f..f291d4ac3519 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -569,12 +569,12 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) * high counter. */ attr.sample_period = (-counter) & GENMASK(63, 0); + if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1)) + attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; + event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc + 1); - - if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1)) - attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; } else { /* The initial sample period (overflow count) of an event. */ if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) From 8c3252c06516eac22c4f8e2506122171abedcc09 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 6 Oct 2019 10:28:50 +0100 Subject: [PATCH 485/572] KVM: arm64: pmu: Reset sample period on overflow handling The PMU emulation code uses the perf event sample period to trigger the overflow detection. This works fine for the *first* overflow handling, but results in a huge number of interrupts on the host, unrelated to the number of interrupts handled in the guest (a x20 factor is pretty common for the cycle counter). On a slow system (such as a SW model), this can result in the guest only making forward progress at a glacial pace. It turns out that the clue is in the name. The sample period is exactly that: a period. And once the an overflow has occured, the following period should be the full width of the associated counter, instead of whatever the guest had initially programed. Reset the sample period to the architected value in the overflow handler, which now results in a number of host interrupts that is much closer to the number of interrupts in the guest. Fixes: b02386eb7dac ("arm64: KVM: Add PMU overflow interrupt routing") Reviewed-by: Andrew Murray Signed-off-by: Marc Zyngier --- virt/kvm/arm/pmu.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index f291d4ac3519..8731dfeced8b 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -442,8 +443,25 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, struct pt_regs *regs) { struct kvm_pmc *pmc = perf_event->overflow_handler_context; + struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); int idx = pmc->idx; + u64 period; + + cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); + + /* + * Reset the sample period to the architectural limit, + * i.e. the point where the counter overflows. + */ + period = -(local64_read(&perf_event->count)); + + if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) + period &= GENMASK(31, 0); + + local64_set(&perf_event->hw.period_left, 0); + perf_event->attr.sample_period = period; + perf_event->hw.sample_period = period; __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); @@ -451,6 +469,8 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); } + + cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); } /** From 13301c6b16a6d809b331bb88e40ab9ce38238b8b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Oct 2019 00:07:26 +0200 Subject: [PATCH 486/572] perf/x86/intel/pt: Fix base for single entry topa Jan reported failing ltp test for PT: https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/tracing/pt_test/pt_test.c It looks like the reason is this new commit added in this v5.4 merge window: 38bb8d77d0b9 ("perf/x86/intel/pt: Split ToPA metadata and page layout") which did not keep the TOPA_SHIFT for entry base. Add it back. Reported-by: Jan Stancek Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 38bb8d77d0b9 ("perf/x86/intel/pt: Split ToPA metadata and page layout") Link: https://lkml.kernel.org/r/20191019220726.12213-1-jolsa@kernel.org [ Minor changelog edits. ] Signed-off-by: Ingo Molnar --- arch/x86/events/intel/pt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 74e80ed9c6c4..05e43d0f430b 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -627,7 +627,7 @@ static struct topa *topa_alloc(int cpu, gfp_t gfp) * link as the 2nd entry in the table */ if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) { - TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p); + TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p) >> TOPA_SHIFT; TOPA_ENTRY(&tp->topa, 1)->end = 1; } From 7d194c2100ad2a6dded545887d02754948ca5241 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Oct 2019 15:56:22 -0400 Subject: [PATCH 487/572] Linux 5.4-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 19efe04b398c..5475cdb6d57d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Nesting Opossum # *DOCUMENTATION* From 77751a466ebd1a785456556061a2db6d60ea3898 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 16 Oct 2019 12:41:24 +0200 Subject: [PATCH 488/572] PM: QoS: Introduce frequency QoS Introduce frequency QoS, based on the "raw" low-level PM QoS, to represent min and max frequency requests and aggregate constraints. The min and max frequency requests are to be represented by struct freq_qos_request objects and the aggregate constraints are to be represented by struct freq_constraints objects. The latter are expected to be initialized with the help of freq_constraints_init(). The freq_qos_read_value() helper is defined to retrieve the aggregate constraints values from a given struct freq_constraints object and there are the freq_qos_add_request(), freq_qos_update_request() and freq_qos_remove_request() helpers to manipulate the min and max frequency requests. It is assumed that the the helpers will not run concurrently with each other for the same struct freq_qos_request object, so if that may be the case, their uses must ensure proper synchronization between them (e.g. through locking). In addition, freq_qos_add_notifier() and freq_qos_remove_notifier() are provided to add and remove notifiers that will trigger on aggregate constraint changes to and from a given struct freq_constraints object, respectively. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/pm_qos.h | 44 ++++++++ kernel/power/qos.c | 240 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 284 insertions(+) diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 222c3e01397c..57b93e3d8f29 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -267,4 +267,48 @@ static inline s32 dev_pm_qos_raw_resume_latency(struct device *dev) } #endif +#define FREQ_QOS_MIN_DEFAULT_VALUE 0 +#define FREQ_QOS_MAX_DEFAULT_VALUE (-1) + +enum freq_qos_req_type { + FREQ_QOS_MIN = 1, + FREQ_QOS_MAX, +}; + +struct freq_constraints { + struct pm_qos_constraints min_freq; + struct blocking_notifier_head min_freq_notifiers; + struct pm_qos_constraints max_freq; + struct blocking_notifier_head max_freq_notifiers; +}; + +struct freq_qos_request { + enum freq_qos_req_type type; + struct plist_node pnode; + struct freq_constraints *qos; +}; + +static inline int freq_qos_request_active(struct freq_qos_request *req) +{ + return !IS_ERR_OR_NULL(req->qos); +} + +void freq_constraints_init(struct freq_constraints *qos); + +s32 freq_qos_read_value(struct freq_constraints *qos, + enum freq_qos_req_type type); + +int freq_qos_add_request(struct freq_constraints *qos, + struct freq_qos_request *req, + enum freq_qos_req_type type, s32 value); +int freq_qos_update_request(struct freq_qos_request *req, s32 new_value); +int freq_qos_remove_request(struct freq_qos_request *req); + +int freq_qos_add_notifier(struct freq_constraints *qos, + enum freq_qos_req_type type, + struct notifier_block *notifier); +int freq_qos_remove_notifier(struct freq_constraints *qos, + enum freq_qos_req_type type, + struct notifier_block *notifier); + #endif diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 9568a2fe7c11..04e83fdfbe80 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -650,3 +650,243 @@ static int __init pm_qos_power_init(void) } late_initcall(pm_qos_power_init); + +/* Definitions related to the frequency QoS below. */ + +/** + * freq_constraints_init - Initialize frequency QoS constraints. + * @qos: Frequency QoS constraints to initialize. + */ +void freq_constraints_init(struct freq_constraints *qos) +{ + struct pm_qos_constraints *c; + + c = &qos->min_freq; + plist_head_init(&c->list); + c->target_value = FREQ_QOS_MIN_DEFAULT_VALUE; + c->default_value = FREQ_QOS_MIN_DEFAULT_VALUE; + c->no_constraint_value = FREQ_QOS_MIN_DEFAULT_VALUE; + c->type = PM_QOS_MAX; + c->notifiers = &qos->min_freq_notifiers; + BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers); + + c = &qos->max_freq; + plist_head_init(&c->list); + c->target_value = FREQ_QOS_MAX_DEFAULT_VALUE; + c->default_value = FREQ_QOS_MAX_DEFAULT_VALUE; + c->no_constraint_value = FREQ_QOS_MAX_DEFAULT_VALUE; + c->type = PM_QOS_MIN; + c->notifiers = &qos->max_freq_notifiers; + BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers); +} + +/** + * freq_qos_read_value - Get frequency QoS constraint for a given list. + * @qos: Constraints to evaluate. + * @type: QoS request type. + */ +s32 freq_qos_read_value(struct freq_constraints *qos, + enum freq_qos_req_type type) +{ + s32 ret; + + switch (type) { + case FREQ_QOS_MIN: + ret = IS_ERR_OR_NULL(qos) ? + FREQ_QOS_MIN_DEFAULT_VALUE : + pm_qos_read_value(&qos->min_freq); + break; + case FREQ_QOS_MAX: + ret = IS_ERR_OR_NULL(qos) ? + FREQ_QOS_MAX_DEFAULT_VALUE : + pm_qos_read_value(&qos->max_freq); + break; + default: + WARN_ON(1); + ret = 0; + } + + return ret; +} + +/** + * freq_qos_apply - Add/modify/remove frequency QoS request. + * @req: Constraint request to apply. + * @action: Action to perform (add/update/remove). + * @value: Value to assign to the QoS request. + */ +static int freq_qos_apply(struct freq_qos_request *req, + enum pm_qos_req_action action, s32 value) +{ + int ret; + + switch(req->type) { + case FREQ_QOS_MIN: + ret = pm_qos_update_target(&req->qos->min_freq, &req->pnode, + action, value); + break; + case FREQ_QOS_MAX: + ret = pm_qos_update_target(&req->qos->max_freq, &req->pnode, + action, value); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/** + * freq_qos_add_request - Insert new frequency QoS request into a given list. + * @qos: Constraints to update. + * @req: Preallocated request object. + * @type: Request type. + * @value: Request value. + * + * Insert a new entry into the @qos list of requests, recompute the effective + * QoS constraint value for that list and initialize the @req object. The + * caller needs to save that object for later use in updates and removal. + * + * Return 1 if the effective constraint value has changed, 0 if the effective + * constraint value has not changed, or a negative error code on failures. + */ +int freq_qos_add_request(struct freq_constraints *qos, + struct freq_qos_request *req, + enum freq_qos_req_type type, s32 value) +{ + int ret; + + if (IS_ERR_OR_NULL(qos) || !req) + return -EINVAL; + + if (WARN(freq_qos_request_active(req), + "%s() called for active request\n", __func__)) + return -EINVAL; + + req->qos = qos; + req->type = type; + ret = freq_qos_apply(req, PM_QOS_ADD_REQ, value); + if (ret < 0) { + req->qos = NULL; + req->type = 0; + } + + return ret; +} +EXPORT_SYMBOL_GPL(freq_qos_add_request); + +/** + * freq_qos_update_request - Modify existing frequency QoS request. + * @req: Request to modify. + * @new_value: New request value. + * + * Update an existing frequency QoS request along with the effective constraint + * value for the list of requests it belongs to. + * + * Return 1 if the effective constraint value has changed, 0 if the effective + * constraint value has not changed, or a negative error code on failures. + */ +int freq_qos_update_request(struct freq_qos_request *req, s32 new_value) +{ + if (!req) + return -EINVAL; + + if (WARN(!freq_qos_request_active(req), + "%s() called for unknown object\n", __func__)) + return -EINVAL; + + if (req->pnode.prio == new_value) + return 0; + + return freq_qos_apply(req, PM_QOS_UPDATE_REQ, new_value); +} +EXPORT_SYMBOL_GPL(freq_qos_update_request); + +/** + * freq_qos_remove_request - Remove frequency QoS request from its list. + * @req: Request to remove. + * + * Remove the given frequency QoS request from the list of constraints it + * belongs to and recompute the effective constraint value for that list. + * + * Return 1 if the effective constraint value has changed, 0 if the effective + * constraint value has not changed, or a negative error code on failures. + */ +int freq_qos_remove_request(struct freq_qos_request *req) +{ + if (!req) + return -EINVAL; + + if (WARN(!freq_qos_request_active(req), + "%s() called for unknown object\n", __func__)) + return -EINVAL; + + return freq_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); +} +EXPORT_SYMBOL_GPL(freq_qos_remove_request); + +/** + * freq_qos_add_notifier - Add frequency QoS change notifier. + * @qos: List of requests to add the notifier to. + * @type: Request type. + * @notifier: Notifier block to add. + */ +int freq_qos_add_notifier(struct freq_constraints *qos, + enum freq_qos_req_type type, + struct notifier_block *notifier) +{ + int ret; + + if (IS_ERR_OR_NULL(qos) || !notifier) + return -EINVAL; + + switch (type) { + case FREQ_QOS_MIN: + ret = blocking_notifier_chain_register(qos->min_freq.notifiers, + notifier); + break; + case FREQ_QOS_MAX: + ret = blocking_notifier_chain_register(qos->max_freq.notifiers, + notifier); + break; + default: + WARN_ON(1); + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(freq_qos_add_notifier); + +/** + * freq_qos_remove_notifier - Remove frequency QoS change notifier. + * @qos: List of requests to remove the notifier from. + * @type: Request type. + * @notifier: Notifier block to remove. + */ +int freq_qos_remove_notifier(struct freq_constraints *qos, + enum freq_qos_req_type type, + struct notifier_block *notifier) +{ + int ret; + + if (IS_ERR_OR_NULL(qos) || !notifier) + return -EINVAL; + + switch (type) { + case FREQ_QOS_MIN: + ret = blocking_notifier_chain_unregister(qos->min_freq.notifiers, + notifier); + break; + case FREQ_QOS_MAX: + ret = blocking_notifier_chain_unregister(qos->max_freq.notifiers, + notifier); + break; + default: + WARN_ON(1); + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(freq_qos_remove_notifier); From 3000ce3c52f8b8db093e4dc649cd172390f71137 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 16 Oct 2019 12:47:06 +0200 Subject: [PATCH 489/572] cpufreq: Use per-policy frequency QoS Replace the CPU device PM QoS used for the management of min and max frequency constraints in cpufreq (and its users) with per-policy frequency QoS to avoid problems with cpufreq policies covering more then one CPU. Namely, a cpufreq driver is registered with the subsys interface which calls cpufreq_add_dev() for each CPU, starting from CPU0, so currently the PM QoS notifiers are added to the first CPU in the policy (i.e. CPU0 in the majority of cases). In turn, when the cpufreq driver is unregistered, the subsys interface doing that calls cpufreq_remove_dev() for each CPU, starting from CPU0, and the PM QoS notifiers are only removed when cpufreq_remove_dev() is called for the last CPU in the policy, say CPUx, which as a rule is not CPU0 if the policy covers more than one CPU. Then, the PM QoS notifiers cannot be removed, because CPUx does not have them, and they are still there in the device PM QoS notifiers list of CPU0, which prevents new PM QoS notifiers from being registered for CPU0 on the next attempt to register the cpufreq driver. The same issue occurs when the first CPU in the policy goes offline before unregistering the driver. After this change it does not matter which CPU is the policy CPU at the driver registration time and whether or not it is online all the time, because the frequency QoS is per policy and not per CPU. Fixes: 67d874c3b2c6 ("cpufreq: Register notifiers with the PM QoS framework") Reported-by: Dmitry Osipenko Tested-by: Dmitry Osipenko Reported-by: Sudeep Holla Tested-by: Sudeep Holla Diagnosed-by: Viresh Kumar Link: https://lore.kernel.org/linux-pm/5ad2624194baa2f53acc1f1e627eb7684c577a19.1562210705.git.viresh.kumar@linaro.org/T/#md2d89e95906b8c91c15f582146173dce2e86e99f Link: https://lore.kernel.org/linux-pm/20191017094612.6tbkwoq4harsjcqv@vireshk-i7/T/#m30d48cc23b9a80467fbaa16e30f90b3828a5a29b Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/acpi/processor_driver.c | 9 ++-- drivers/acpi/processor_perflib.c | 18 +++---- drivers/acpi/processor_thermal.c | 18 +++---- drivers/cpufreq/cpufreq.c | 59 ++++++++++------------ drivers/cpufreq/intel_pstate.c | 30 +++++------ drivers/cpufreq/ppc_cbe_cpufreq_pmi.c | 15 +++--- drivers/macintosh/windfarm_cpufreq_clamp.c | 38 ++++++++------ drivers/thermal/cpu_cooling.c | 14 ++--- include/acpi/processor.h | 20 ++++---- include/linux/cpufreq.h | 7 ++- 10 files changed, 114 insertions(+), 114 deletions(-) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 08da9c29f1e9..62114a03a51a 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -290,14 +290,13 @@ static int acpi_processor_notifier(struct notifier_block *nb, unsigned long event, void *data) { struct cpufreq_policy *policy = data; - int cpu = policy->cpu; if (event == CPUFREQ_CREATE_POLICY) { - acpi_thermal_cpufreq_init(cpu); - acpi_processor_ppc_init(cpu); + acpi_thermal_cpufreq_init(policy); + acpi_processor_ppc_init(policy); } else if (event == CPUFREQ_REMOVE_POLICY) { - acpi_processor_ppc_exit(cpu); - acpi_thermal_cpufreq_exit(cpu); + acpi_processor_ppc_exit(policy); + acpi_thermal_cpufreq_exit(policy); } return 0; diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 930a49fa4dfc..753e171de006 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -81,10 +81,10 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr) pr->performance_platform_limit = (int)ppc; if (ppc >= pr->performance->state_count || - unlikely(!dev_pm_qos_request_active(&pr->perflib_req))) + unlikely(!freq_qos_request_active(&pr->perflib_req))) return 0; - ret = dev_pm_qos_update_request(&pr->perflib_req, + ret = freq_qos_update_request(&pr->perflib_req, pr->performance->states[ppc].core_frequency * 1000); if (ret < 0) { pr_warn("Failed to update perflib freq constraint: CPU%d (%d)\n", @@ -157,28 +157,28 @@ void acpi_processor_ignore_ppc_init(void) ignore_ppc = 0; } -void acpi_processor_ppc_init(int cpu) +void acpi_processor_ppc_init(struct cpufreq_policy *policy) { + int cpu = policy->cpu; struct acpi_processor *pr = per_cpu(processors, cpu); int ret; if (!pr) return; - ret = dev_pm_qos_add_request(get_cpu_device(cpu), - &pr->perflib_req, DEV_PM_QOS_MAX_FREQUENCY, - INT_MAX); + ret = freq_qos_add_request(&policy->constraints, &pr->perflib_req, + FREQ_QOS_MAX, INT_MAX); if (ret < 0) pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, ret); } -void acpi_processor_ppc_exit(int cpu) +void acpi_processor_ppc_exit(struct cpufreq_policy *policy) { - struct acpi_processor *pr = per_cpu(processors, cpu); + struct acpi_processor *pr = per_cpu(processors, policy->cpu); if (pr) - dev_pm_qos_remove_request(&pr->perflib_req); + freq_qos_remove_request(&pr->perflib_req); } static int acpi_processor_get_performance_control(struct acpi_processor *pr) diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index 8227c7dd75b1..c77a5b1fb107 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -105,7 +105,7 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state) pr = per_cpu(processors, i); - if (unlikely(!dev_pm_qos_request_active(&pr->thermal_req))) + if (unlikely(!freq_qos_request_active(&pr->thermal_req))) continue; policy = cpufreq_cpu_get(i); @@ -116,7 +116,7 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state) cpufreq_cpu_put(policy); - ret = dev_pm_qos_update_request(&pr->thermal_req, max_freq); + ret = freq_qos_update_request(&pr->thermal_req, max_freq); if (ret < 0) { pr_warn("Failed to update thermal freq constraint: CPU%d (%d)\n", pr->id, ret); @@ -125,28 +125,28 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state) return 0; } -void acpi_thermal_cpufreq_init(int cpu) +void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy) { + int cpu = policy->cpu; struct acpi_processor *pr = per_cpu(processors, cpu); int ret; if (!pr) return; - ret = dev_pm_qos_add_request(get_cpu_device(cpu), - &pr->thermal_req, DEV_PM_QOS_MAX_FREQUENCY, - INT_MAX); + ret = freq_qos_add_request(&policy->constraints, &pr->thermal_req, + FREQ_QOS_MAX, INT_MAX); if (ret < 0) pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, ret); } -void acpi_thermal_cpufreq_exit(int cpu) +void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy) { - struct acpi_processor *pr = per_cpu(processors, cpu); + struct acpi_processor *pr = per_cpu(processors, policy->cpu); if (pr) - dev_pm_qos_remove_request(&pr->thermal_req); + freq_qos_remove_request(&pr->thermal_req); } #else /* ! CONFIG_CPU_FREQ */ static int cpufreq_get_max_state(unsigned int cpu) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bffc11b87247..8478ff6f3045 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -720,7 +720,7 @@ static ssize_t store_##file_name \ if (ret != 1) \ return -EINVAL; \ \ - ret = dev_pm_qos_update_request(policy->object##_freq_req, val);\ + ret = freq_qos_update_request(policy->object##_freq_req, val);\ return ret >= 0 ? count : ret; \ } @@ -1202,19 +1202,21 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) goto err_free_real_cpus; } + freq_constraints_init(&policy->constraints); + policy->nb_min.notifier_call = cpufreq_notifier_min; policy->nb_max.notifier_call = cpufreq_notifier_max; - ret = dev_pm_qos_add_notifier(dev, &policy->nb_min, - DEV_PM_QOS_MIN_FREQUENCY); + ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MIN, + &policy->nb_min); if (ret) { dev_err(dev, "Failed to register MIN QoS notifier: %d (%*pbl)\n", ret, cpumask_pr_args(policy->cpus)); goto err_kobj_remove; } - ret = dev_pm_qos_add_notifier(dev, &policy->nb_max, - DEV_PM_QOS_MAX_FREQUENCY); + ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MAX, + &policy->nb_max); if (ret) { dev_err(dev, "Failed to register MAX QoS notifier: %d (%*pbl)\n", ret, cpumask_pr_args(policy->cpus)); @@ -1232,8 +1234,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) return policy; err_min_qos_notifier: - dev_pm_qos_remove_notifier(dev, &policy->nb_min, - DEV_PM_QOS_MIN_FREQUENCY); + freq_qos_remove_notifier(&policy->constraints, FREQ_QOS_MIN, + &policy->nb_min); err_kobj_remove: cpufreq_policy_put_kobj(policy); err_free_real_cpus: @@ -1250,7 +1252,6 @@ err_free_policy: static void cpufreq_policy_free(struct cpufreq_policy *policy) { - struct device *dev = get_cpu_device(policy->cpu); unsigned long flags; int cpu; @@ -1262,10 +1263,10 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) per_cpu(cpufreq_cpu_data, cpu) = NULL; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - dev_pm_qos_remove_notifier(dev, &policy->nb_max, - DEV_PM_QOS_MAX_FREQUENCY); - dev_pm_qos_remove_notifier(dev, &policy->nb_min, - DEV_PM_QOS_MIN_FREQUENCY); + freq_qos_remove_notifier(&policy->constraints, FREQ_QOS_MAX, + &policy->nb_max); + freq_qos_remove_notifier(&policy->constraints, FREQ_QOS_MIN, + &policy->nb_min); if (policy->max_freq_req) { /* @@ -1274,10 +1275,10 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) */ blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_REMOVE_POLICY, policy); - dev_pm_qos_remove_request(policy->max_freq_req); + freq_qos_remove_request(policy->max_freq_req); } - dev_pm_qos_remove_request(policy->min_freq_req); + freq_qos_remove_request(policy->min_freq_req); kfree(policy->min_freq_req); cpufreq_policy_put_kobj(policy); @@ -1357,8 +1358,6 @@ static int cpufreq_online(unsigned int cpu) cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); if (new_policy) { - struct device *dev = get_cpu_device(cpu); - for_each_cpu(j, policy->related_cpus) { per_cpu(cpufreq_cpu_data, j) = policy; add_cpu_dev_symlink(policy, j); @@ -1369,36 +1368,31 @@ static int cpufreq_online(unsigned int cpu) if (!policy->min_freq_req) goto out_destroy_policy; - ret = dev_pm_qos_add_request(dev, policy->min_freq_req, - DEV_PM_QOS_MIN_FREQUENCY, - policy->min); + ret = freq_qos_add_request(&policy->constraints, + policy->min_freq_req, FREQ_QOS_MIN, + policy->min); if (ret < 0) { /* - * So we don't call dev_pm_qos_remove_request() for an + * So we don't call freq_qos_remove_request() for an * uninitialized request. */ kfree(policy->min_freq_req); policy->min_freq_req = NULL; - - dev_err(dev, "Failed to add min-freq constraint (%d)\n", - ret); goto out_destroy_policy; } /* * This must be initialized right here to avoid calling - * dev_pm_qos_remove_request() on uninitialized request in case + * freq_qos_remove_request() on uninitialized request in case * of errors. */ policy->max_freq_req = policy->min_freq_req + 1; - ret = dev_pm_qos_add_request(dev, policy->max_freq_req, - DEV_PM_QOS_MAX_FREQUENCY, - policy->max); + ret = freq_qos_add_request(&policy->constraints, + policy->max_freq_req, FREQ_QOS_MAX, + policy->max); if (ret < 0) { policy->max_freq_req = NULL; - dev_err(dev, "Failed to add max-freq constraint (%d)\n", - ret); goto out_destroy_policy; } @@ -2374,7 +2368,6 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_policy *new_policy) { struct cpufreq_governor *old_gov; - struct device *cpu_dev = get_cpu_device(policy->cpu); int ret; pr_debug("setting new policy for CPU %u: %u - %u kHz\n", @@ -2386,8 +2379,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, * PM QoS framework collects all the requests from users and provide us * the final aggregated value here. */ - new_policy->min = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MIN_FREQUENCY); - new_policy->max = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MAX_FREQUENCY); + new_policy->min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN); + new_policy->max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX); /* verify the cpu speed can be set within this limit */ ret = cpufreq_driver->verify(new_policy); @@ -2518,7 +2511,7 @@ static int cpufreq_boost_set_sw(int state) break; } - ret = dev_pm_qos_update_request(policy->max_freq_req, policy->max); + ret = freq_qos_update_request(policy->max_freq_req, policy->max); if (ret < 0) break; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9f02de9a1b47..53a51c169451 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1088,10 +1088,10 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, static struct cpufreq_driver intel_pstate; -static void update_qos_request(enum dev_pm_qos_req_type type) +static void update_qos_request(enum freq_qos_req_type type) { int max_state, turbo_max, freq, i, perf_pct; - struct dev_pm_qos_request *req; + struct freq_qos_request *req; struct cpufreq_policy *policy; for_each_possible_cpu(i) { @@ -1112,7 +1112,7 @@ static void update_qos_request(enum dev_pm_qos_req_type type) else turbo_max = cpu->pstate.turbo_pstate; - if (type == DEV_PM_QOS_MIN_FREQUENCY) { + if (type == FREQ_QOS_MIN) { perf_pct = global.min_perf_pct; } else { req++; @@ -1122,7 +1122,7 @@ static void update_qos_request(enum dev_pm_qos_req_type type) freq = DIV_ROUND_UP(turbo_max * perf_pct, 100); freq *= cpu->pstate.scaling; - if (dev_pm_qos_update_request(req, freq) < 0) + if (freq_qos_update_request(req, freq) < 0) pr_warn("Failed to update freq constraint: CPU%d\n", i); } } @@ -1153,7 +1153,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b, if (intel_pstate_driver == &intel_pstate) intel_pstate_update_policies(); else - update_qos_request(DEV_PM_QOS_MAX_FREQUENCY); + update_qos_request(FREQ_QOS_MAX); mutex_unlock(&intel_pstate_driver_lock); @@ -1187,7 +1187,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b, if (intel_pstate_driver == &intel_pstate) intel_pstate_update_policies(); else - update_qos_request(DEV_PM_QOS_MIN_FREQUENCY); + update_qos_request(FREQ_QOS_MIN); mutex_unlock(&intel_pstate_driver_lock); @@ -2381,7 +2381,7 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) { int max_state, turbo_max, min_freq, max_freq, ret; - struct dev_pm_qos_request *req; + struct freq_qos_request *req; struct cpudata *cpu; struct device *dev; @@ -2416,15 +2416,15 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) max_freq = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100); max_freq *= cpu->pstate.scaling; - ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_MIN_FREQUENCY, - min_freq); + ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MIN, + min_freq); if (ret < 0) { dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); goto free_req; } - ret = dev_pm_qos_add_request(dev, req + 1, DEV_PM_QOS_MAX_FREQUENCY, - max_freq); + ret = freq_qos_add_request(&policy->constraints, req + 1, FREQ_QOS_MAX, + max_freq); if (ret < 0) { dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); goto remove_min_req; @@ -2435,7 +2435,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) return 0; remove_min_req: - dev_pm_qos_remove_request(req); + freq_qos_remove_request(req); free_req: kfree(req); pstate_exit: @@ -2446,12 +2446,12 @@ pstate_exit: static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy) { - struct dev_pm_qos_request *req; + struct freq_qos_request *req; req = policy->driver_data; - dev_pm_qos_remove_request(req + 1); - dev_pm_qos_remove_request(req); + freq_qos_remove_request(req + 1); + freq_qos_remove_request(req); kfree(req); return intel_pstate_cpu_exit(policy); diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index bc9dd30395c4..037fe23bc6ed 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi); static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg) { struct cpufreq_policy *policy; - struct dev_pm_qos_request *req; + struct freq_qos_request *req; u8 node, slow_mode; int cpu, ret; @@ -86,7 +86,7 @@ static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg) req = policy->driver_data; - ret = dev_pm_qos_update_request(req, + ret = freq_qos_update_request(req, policy->freq_table[slow_mode].frequency); if (ret < 0) pr_warn("Failed to update freq constraint: %d\n", ret); @@ -103,7 +103,7 @@ static struct pmi_handler cbe_pmi_handler = { void cbe_cpufreq_pmi_policy_init(struct cpufreq_policy *policy) { - struct dev_pm_qos_request *req; + struct freq_qos_request *req; int ret; if (!cbe_cpufreq_has_pmi) @@ -113,9 +113,8 @@ void cbe_cpufreq_pmi_policy_init(struct cpufreq_policy *policy) if (!req) return; - ret = dev_pm_qos_add_request(get_cpu_device(policy->cpu), req, - DEV_PM_QOS_MAX_FREQUENCY, - policy->freq_table[0].frequency); + ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MAX, + policy->freq_table[0].frequency); if (ret < 0) { pr_err("Failed to add freq constraint (%d)\n", ret); kfree(req); @@ -128,10 +127,10 @@ EXPORT_SYMBOL_GPL(cbe_cpufreq_pmi_policy_init); void cbe_cpufreq_pmi_policy_exit(struct cpufreq_policy *policy) { - struct dev_pm_qos_request *req = policy->driver_data; + struct freq_qos_request *req = policy->driver_data; if (cbe_cpufreq_has_pmi) { - dev_pm_qos_remove_request(req); + freq_qos_remove_request(req); kfree(req); } } diff --git a/drivers/macintosh/windfarm_cpufreq_clamp.c b/drivers/macintosh/windfarm_cpufreq_clamp.c index 705c6200814b..7b726f00f183 100644 --- a/drivers/macintosh/windfarm_cpufreq_clamp.c +++ b/drivers/macintosh/windfarm_cpufreq_clamp.c @@ -18,7 +18,7 @@ static int clamped; static struct wf_control *clamp_control; -static struct dev_pm_qos_request qos_req; +static struct freq_qos_request qos_req; static unsigned int min_freq, max_freq; static int clamp_set(struct wf_control *ct, s32 value) @@ -35,7 +35,7 @@ static int clamp_set(struct wf_control *ct, s32 value) } clamped = value; - return dev_pm_qos_update_request(&qos_req, freq); + return freq_qos_update_request(&qos_req, freq); } static int clamp_get(struct wf_control *ct, s32 *value) @@ -77,38 +77,44 @@ static int __init wf_cpufreq_clamp_init(void) min_freq = policy->cpuinfo.min_freq; max_freq = policy->cpuinfo.max_freq; + + ret = freq_qos_add_request(&policy->constraints, &qos_req, FREQ_QOS_MAX, + max_freq); + cpufreq_cpu_put(policy); + if (ret < 0) { + pr_err("%s: Failed to add freq constraint (%d)\n", __func__, + ret); + return ret; + } + dev = get_cpu_device(0); if (unlikely(!dev)) { pr_warn("%s: No cpu device for cpu0\n", __func__); - return -ENODEV; + ret = -ENODEV; + goto fail; } clamp = kmalloc(sizeof(struct wf_control), GFP_KERNEL); - if (clamp == NULL) - return -ENOMEM; - - ret = dev_pm_qos_add_request(dev, &qos_req, DEV_PM_QOS_MAX_FREQUENCY, - max_freq); - if (ret < 0) { - pr_err("%s: Failed to add freq constraint (%d)\n", __func__, - ret); - goto free; + if (clamp == NULL) { + ret = -ENOMEM; + goto fail; } clamp->ops = &clamp_ops; clamp->name = "cpufreq-clamp"; ret = wf_register_control(clamp); if (ret) - goto fail; + goto free; + clamp_control = clamp; return 0; - fail: - dev_pm_qos_remove_request(&qos_req); free: kfree(clamp); + fail: + freq_qos_remove_request(&qos_req); return ret; } @@ -116,7 +122,7 @@ static void __exit wf_cpufreq_clamp_exit(void) { if (clamp_control) { wf_unregister_control(clamp_control); - dev_pm_qos_remove_request(&qos_req); + freq_qos_remove_request(&qos_req); } } diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 391f39776c6a..6b9865c786ba 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -88,7 +88,7 @@ struct cpufreq_cooling_device { struct cpufreq_policy *policy; struct list_head node; struct time_in_idle *idle_time; - struct dev_pm_qos_request qos_req; + struct freq_qos_request qos_req; }; static DEFINE_IDA(cpufreq_ida); @@ -331,7 +331,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, cpufreq_cdev->cpufreq_state = state; - return dev_pm_qos_update_request(&cpufreq_cdev->qos_req, + return freq_qos_update_request(&cpufreq_cdev->qos_req, cpufreq_cdev->freq_table[state].frequency); } @@ -615,9 +615,9 @@ __cpufreq_cooling_register(struct device_node *np, cooling_ops = &cpufreq_cooling_ops; } - ret = dev_pm_qos_add_request(dev, &cpufreq_cdev->qos_req, - DEV_PM_QOS_MAX_FREQUENCY, - cpufreq_cdev->freq_table[0].frequency); + ret = freq_qos_add_request(&policy->constraints, + &cpufreq_cdev->qos_req, FREQ_QOS_MAX, + cpufreq_cdev->freq_table[0].frequency); if (ret < 0) { pr_err("%s: Failed to add freq constraint (%d)\n", __func__, ret); @@ -637,7 +637,7 @@ __cpufreq_cooling_register(struct device_node *np, return cdev; remove_qos_req: - dev_pm_qos_remove_request(&cpufreq_cdev->qos_req); + freq_qos_remove_request(&cpufreq_cdev->qos_req); remove_ida: ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id); free_table: @@ -736,7 +736,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) mutex_unlock(&cooling_list_lock); thermal_cooling_device_unregister(cdev); - dev_pm_qos_remove_request(&cpufreq_cdev->qos_req); + freq_qos_remove_request(&cpufreq_cdev->qos_req); ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id); kfree(cpufreq_cdev->idle_time); kfree(cpufreq_cdev->freq_table); diff --git a/include/acpi/processor.h b/include/acpi/processor.h index f936033cb9e6..47805172e73d 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -232,8 +232,8 @@ struct acpi_processor { struct acpi_processor_limit limit; struct thermal_cooling_device *cdev; struct device *dev; /* Processor device. */ - struct dev_pm_qos_request perflib_req; - struct dev_pm_qos_request thermal_req; + struct freq_qos_request perflib_req; + struct freq_qos_request thermal_req; }; struct acpi_processor_errata { @@ -302,8 +302,8 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx #ifdef CONFIG_CPU_FREQ extern bool acpi_processor_cpufreq_init; void acpi_processor_ignore_ppc_init(void); -void acpi_processor_ppc_init(int cpu); -void acpi_processor_ppc_exit(int cpu); +void acpi_processor_ppc_init(struct cpufreq_policy *policy); +void acpi_processor_ppc_exit(struct cpufreq_policy *policy); void acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag); extern int acpi_processor_get_bios_limit(int cpu, unsigned int *limit); #else @@ -311,11 +311,11 @@ static inline void acpi_processor_ignore_ppc_init(void) { return; } -static inline void acpi_processor_ppc_init(int cpu) +static inline void acpi_processor_ppc_init(struct cpufreq_policy *policy) { return; } -static inline void acpi_processor_ppc_exit(int cpu) +static inline void acpi_processor_ppc_exit(struct cpufreq_policy *policy) { return; } @@ -431,14 +431,14 @@ static inline int acpi_processor_hotplug(struct acpi_processor *pr) int acpi_processor_get_limit_info(struct acpi_processor *pr); extern const struct thermal_cooling_device_ops processor_cooling_ops; #if defined(CONFIG_ACPI_CPU_FREQ_PSS) & defined(CONFIG_CPU_FREQ) -void acpi_thermal_cpufreq_init(int cpu); -void acpi_thermal_cpufreq_exit(int cpu); +void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy); +void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy); #else -static inline void acpi_thermal_cpufreq_init(int cpu) +static inline void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy) { return; } -static inline void acpi_thermal_cpufreq_exit(int cpu) +static inline void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy) { return; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c57e88e85c41..92d5fdc8154e 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -76,8 +77,10 @@ struct cpufreq_policy { struct work_struct update; /* if update_policy() needs to be * called, but you're in IRQ context */ - struct dev_pm_qos_request *min_freq_req; - struct dev_pm_qos_request *max_freq_req; + struct freq_constraints constraints; + struct freq_qos_request *min_freq_req; + struct freq_qos_request *max_freq_req; + struct cpufreq_frequency_table *freq_table; enum cpufreq_table_sorting freq_table_sorted; From 2aac8bdf7a0fbd3e2a34141d28b57a7e21482cf7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 16 Oct 2019 12:47:53 +0200 Subject: [PATCH 490/572] PM: QoS: Drop frequency QoS types from device PM QoS There are no more active users of DEV_PM_QOS_MIN_FREQUENCY and DEV_PM_QOS_MAX_FREQUENCY device PM QoS request types, so drop them along with the code supporting them. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/base/power/qos.c | 70 ++-------------------------------------- include/linux/pm_qos.h | 12 ------- 2 files changed, 2 insertions(+), 80 deletions(-) diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 6c90fd7e2ff8..350dcafd751f 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -115,20 +115,10 @@ s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type) spin_lock_irqsave(&dev->power.lock, flags); - switch (type) { - case DEV_PM_QOS_RESUME_LATENCY: + if (type == DEV_PM_QOS_RESUME_LATENCY) { ret = IS_ERR_OR_NULL(qos) ? PM_QOS_RESUME_LATENCY_NO_CONSTRAINT : pm_qos_read_value(&qos->resume_latency); - break; - case DEV_PM_QOS_MIN_FREQUENCY: - ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE - : pm_qos_read_value(&qos->min_frequency); - break; - case DEV_PM_QOS_MAX_FREQUENCY: - ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE - : pm_qos_read_value(&qos->max_frequency); - break; - default: + } else { WARN_ON(1); ret = 0; } @@ -169,14 +159,6 @@ static int apply_constraint(struct dev_pm_qos_request *req, req->dev->power.set_latency_tolerance(req->dev, value); } break; - case DEV_PM_QOS_MIN_FREQUENCY: - ret = pm_qos_update_target(&qos->min_frequency, - &req->data.pnode, action, value); - break; - case DEV_PM_QOS_MAX_FREQUENCY: - ret = pm_qos_update_target(&qos->max_frequency, - &req->data.pnode, action, value); - break; case DEV_PM_QOS_FLAGS: ret = pm_qos_update_flags(&qos->flags, &req->data.flr, action, value); @@ -227,24 +209,6 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) c->no_constraint_value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; c->type = PM_QOS_MIN; - c = &qos->min_frequency; - plist_head_init(&c->list); - c->target_value = PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE; - c->default_value = PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE; - c->no_constraint_value = PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE; - c->type = PM_QOS_MAX; - c->notifiers = ++n; - BLOCKING_INIT_NOTIFIER_HEAD(n); - - c = &qos->max_frequency; - plist_head_init(&c->list); - c->target_value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE; - c->default_value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE; - c->no_constraint_value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE; - c->type = PM_QOS_MIN; - c->notifiers = ++n; - BLOCKING_INIT_NOTIFIER_HEAD(n); - INIT_LIST_HEAD(&qos->flags.list); spin_lock_irq(&dev->power.lock); @@ -305,18 +269,6 @@ void dev_pm_qos_constraints_destroy(struct device *dev) memset(req, 0, sizeof(*req)); } - c = &qos->min_frequency; - plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) { - apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE); - memset(req, 0, sizeof(*req)); - } - - c = &qos->max_frequency; - plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) { - apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); - memset(req, 0, sizeof(*req)); - } - f = &qos->flags; list_for_each_entry_safe(req, tmp, &f->list, data.flr.node) { apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); @@ -428,8 +380,6 @@ static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req, switch(req->type) { case DEV_PM_QOS_RESUME_LATENCY: case DEV_PM_QOS_LATENCY_TOLERANCE: - case DEV_PM_QOS_MIN_FREQUENCY: - case DEV_PM_QOS_MAX_FREQUENCY: curr_value = req->data.pnode.prio; break; case DEV_PM_QOS_FLAGS: @@ -557,14 +507,6 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier, ret = blocking_notifier_chain_register(dev->power.qos->resume_latency.notifiers, notifier); break; - case DEV_PM_QOS_MIN_FREQUENCY: - ret = blocking_notifier_chain_register(dev->power.qos->min_frequency.notifiers, - notifier); - break; - case DEV_PM_QOS_MAX_FREQUENCY: - ret = blocking_notifier_chain_register(dev->power.qos->max_frequency.notifiers, - notifier); - break; default: WARN_ON(1); ret = -EINVAL; @@ -604,14 +546,6 @@ int dev_pm_qos_remove_notifier(struct device *dev, ret = blocking_notifier_chain_unregister(dev->power.qos->resume_latency.notifiers, notifier); break; - case DEV_PM_QOS_MIN_FREQUENCY: - ret = blocking_notifier_chain_unregister(dev->power.qos->min_frequency.notifiers, - notifier); - break; - case DEV_PM_QOS_MAX_FREQUENCY: - ret = blocking_notifier_chain_unregister(dev->power.qos->max_frequency.notifiers, - notifier); - break; default: WARN_ON(1); ret = -EINVAL; diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 57b93e3d8f29..ebf5ef17cc2a 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -34,8 +34,6 @@ enum pm_qos_flags_status { #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS PM_QOS_LATENCY_ANY_NS #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 -#define PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE 0 -#define PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE (-1) #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) @@ -54,8 +52,6 @@ struct pm_qos_flags_request { enum dev_pm_qos_req_type { DEV_PM_QOS_RESUME_LATENCY = 1, DEV_PM_QOS_LATENCY_TOLERANCE, - DEV_PM_QOS_MIN_FREQUENCY, - DEV_PM_QOS_MAX_FREQUENCY, DEV_PM_QOS_FLAGS, }; @@ -97,14 +93,10 @@ struct pm_qos_flags { struct dev_pm_qos { struct pm_qos_constraints resume_latency; struct pm_qos_constraints latency_tolerance; - struct pm_qos_constraints min_frequency; - struct pm_qos_constraints max_frequency; struct pm_qos_flags flags; struct dev_pm_qos_request *resume_latency_req; struct dev_pm_qos_request *latency_tolerance_req; struct dev_pm_qos_request *flags_req; - struct dev_pm_qos_request *min_frequency_req; - struct dev_pm_qos_request *max_frequency_req; }; /* Action requested to pm_qos_update_target */ @@ -199,10 +191,6 @@ static inline s32 dev_pm_qos_read_value(struct device *dev, switch (type) { case DEV_PM_QOS_RESUME_LATENCY: return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; - case DEV_PM_QOS_MIN_FREQUENCY: - return PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE; - case DEV_PM_QOS_MAX_FREQUENCY: - return PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE; default: WARN_ON(1); return 0; From d532cc7efdfd7bf4b9e1c287d823e584843f1de1 Mon Sep 17 00:00:00 2001 From: "Paulo Alcantara (SUSE)" Date: Thu, 10 Oct 2019 12:31:58 -0300 Subject: [PATCH 491/572] cifs: Handle -EINPROGRESS only when noblockcnt is set We only want to avoid blocking in connect when mounting SMB root filesystems, otherwise bail out from generic_ip_connect() so cifs.ko can perform any reconnect failover appropriately. This fixes DFS failover/reconnection tests in upstream buildbot. Fixes: 8eecd1c2e5bc ("cifs: Add support for root file systems") Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/connect.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a64dfa95a925..bdea4b3e8005 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3882,8 +3882,12 @@ generic_ip_connect(struct TCP_Server_Info *server) rc = socket->ops->connect(socket, saddr, slen, server->noblockcnt ? O_NONBLOCK : 0); - - if (rc == -EINPROGRESS) + /* + * When mounting SMB root file systems, we do not want to block in + * connect. Otherwise bail out and then let cifs_reconnect() perform + * reconnect failover - if possible. + */ + if (server->noblockcnt && rc == -EINPROGRESS) rc = 0; if (rc < 0) { cifs_dbg(FYI, "Error %d connecting to server\n", rc); From 553292a6342bc9e5636953ac6e20bccedaacbd1c Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 11 Oct 2019 17:36:13 -0700 Subject: [PATCH 492/572] cifs: clarify comment about timestamp granularity for old servers It could be confusing why we set granularity to 1 seconds rather than 2 seconds (1 second is the max the VFS allows) for these mounts to very old servers ... Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c049c7b3aa87..1a135d1b85bd 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -169,7 +169,13 @@ cifs_read_super(struct super_block *sb) else sb->s_maxbytes = MAX_NON_LFS; - /* Some very old servers like DOS and OS/2 used 2 second granularity */ + /* + * Some very old servers like DOS and OS/2 used 2 second granularity + * (while all current servers use 100ns granularity - see MS-DTYP) + * but 1 second is the maximum allowed granularity for the VFS + * so for old servers set time granularity to 1 second while for + * everything else (current servers) set it to 100ns. + */ if ((tcon->ses->server->vals->protocol_id == SMB10_PROT_ID) && ((tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find) == 0) && From 03d9a9fe3f3aec508e485dd3dcfa1e99933b4bdb Mon Sep 17 00:00:00 2001 From: Roberto Bergantinos Corpas Date: Mon, 14 Oct 2019 10:59:23 +0200 Subject: [PATCH 493/572] CIFS: avoid using MID 0xFFFF According to MS-CIFS specification MID 0xFFFF should not be used by the CIFS client, but we actually do. Besides, this has proven to cause races leading to oops between SendReceive2/cifs_demultiplex_thread. On SMB1, MID is a 2 byte value easy to reach in CurrentMid which may conflict with an oplock break notification request coming from server Signed-off-by: Roberto Bergantinos Corpas Reviewed-by: Ronnie Sahlberg Reviewed-by: Aurelien Aptel Signed-off-by: Steve French CC: Stable --- fs/cifs/smb1ops.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index b7421a096319..514810694c0f 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -171,6 +171,9 @@ cifs_get_next_mid(struct TCP_Server_Info *server) /* we do not want to loop forever */ last_mid = cur_mid; cur_mid++; + /* avoid 0xFFFF MID */ + if (cur_mid == 0xffff) + cur_mid++; /* * This nested loop looks more expensive than it is. From 783bf7b8b641167fb6f3f4f787f60ae62bad41b3 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Mon, 14 Oct 2019 15:15:31 +0800 Subject: [PATCH 494/572] cifs: Fix missed free operations cifs_setattr_nounix has two paths which miss free operations for xid and fullpath. Use goto cifs_setattr_exit like other paths to fix them. CC: Stable Fixes: aa081859b10c ("cifs: flush before set-info if we have writeable handles") Signed-off-by: Chuhong Yuan Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 5dcc95b38310..df9377828e2f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2475,9 +2475,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid); cifsFileInfo_put(wfile); if (rc) - return rc; + goto cifs_setattr_exit; } else if (rc != -EBADF) - return rc; + goto cifs_setattr_exit; else rc = 0; } From 24957db1004353346583c9cc6d783db8f213e3ad Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 17 Oct 2019 12:27:58 +0200 Subject: [PATCH 495/572] opp: core: Revert "add regulators enable and disable" All the drivers, which use the OPP framework control regulators, which are already enabled. Typically those regulators are also system critical, due to providing power to CPU core or system buses. It turned out that there are cases, where calling regulator_enable() on such boot-enabled regulator has side-effects and might change its initial voltage due to performing initial voltage balancing without all restrictions from the consumers. Until this issue becomes finally solved in regulator core, avoid calling regulator_enable()/disable() from the OPP framework. This reverts commit 7f93ff73f7c8c8bfa6be33bcc16470b0b44682aa. Signed-off-by: Marek Szyprowski Reviewed-by: Mark Brown Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 3b7ffd0234e9..9ff0538ee83a 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1626,12 +1626,6 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev, goto free_regulators; } - ret = regulator_enable(reg); - if (ret < 0) { - regulator_put(reg); - goto free_regulators; - } - opp_table->regulators[i] = reg; } @@ -1645,10 +1639,8 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev, return opp_table; free_regulators: - while (i--) { - regulator_disable(opp_table->regulators[i]); - regulator_put(opp_table->regulators[i]); - } + while (i != 0) + regulator_put(opp_table->regulators[--i]); kfree(opp_table->regulators); opp_table->regulators = NULL; @@ -1674,10 +1666,8 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table) /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); - for (i = opp_table->regulator_count - 1; i >= 0; i--) { - regulator_disable(opp_table->regulators[i]); + for (i = opp_table->regulator_count - 1; i >= 0; i--) regulator_put(opp_table->regulators[i]); - } _free_set_opp_data(opp_table); From 5e6c3c7b1ec217c1c4c95d9148182302b9969b97 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 21 Oct 2019 10:33:54 +0200 Subject: [PATCH 496/572] perf/aux: Fix tracking of auxiliary trace buffer allocation The following commit from the v5.4 merge window: d44248a41337 ("perf/core: Rework memory accounting in perf_mmap()") ... breaks auxiliary trace buffer tracking. If I run command 'perf record -e rbd000' to record samples and saving them in the **auxiliary** trace buffer then the value of 'locked_vm' becomes negative after all trace buffers have been allocated and released: During allocation the values increase: [52.250027] perf_mmap user->locked_vm:0x87 pinned_vm:0x0 ret:0 [52.250115] perf_mmap user->locked_vm:0x107 pinned_vm:0x0 ret:0 [52.250251] perf_mmap user->locked_vm:0x188 pinned_vm:0x0 ret:0 [52.250326] perf_mmap user->locked_vm:0x208 pinned_vm:0x0 ret:0 [52.250441] perf_mmap user->locked_vm:0x289 pinned_vm:0x0 ret:0 [52.250498] perf_mmap user->locked_vm:0x309 pinned_vm:0x0 ret:0 [52.250613] perf_mmap user->locked_vm:0x38a pinned_vm:0x0 ret:0 [52.250715] perf_mmap user->locked_vm:0x408 pinned_vm:0x2 ret:0 [52.250834] perf_mmap user->locked_vm:0x408 pinned_vm:0x83 ret:0 [52.250915] perf_mmap user->locked_vm:0x408 pinned_vm:0x103 ret:0 [52.251061] perf_mmap user->locked_vm:0x408 pinned_vm:0x184 ret:0 [52.251146] perf_mmap user->locked_vm:0x408 pinned_vm:0x204 ret:0 [52.251299] perf_mmap user->locked_vm:0x408 pinned_vm:0x285 ret:0 [52.251383] perf_mmap user->locked_vm:0x408 pinned_vm:0x305 ret:0 [52.251544] perf_mmap user->locked_vm:0x408 pinned_vm:0x386 ret:0 [52.251634] perf_mmap user->locked_vm:0x408 pinned_vm:0x406 ret:0 [52.253018] perf_mmap user->locked_vm:0x408 pinned_vm:0x487 ret:0 [52.253197] perf_mmap user->locked_vm:0x408 pinned_vm:0x508 ret:0 [52.253374] perf_mmap user->locked_vm:0x408 pinned_vm:0x589 ret:0 [52.253550] perf_mmap user->locked_vm:0x408 pinned_vm:0x60a ret:0 [52.253726] perf_mmap user->locked_vm:0x408 pinned_vm:0x68b ret:0 [52.253903] perf_mmap user->locked_vm:0x408 pinned_vm:0x70c ret:0 [52.254084] perf_mmap user->locked_vm:0x408 pinned_vm:0x78d ret:0 [52.254263] perf_mmap user->locked_vm:0x408 pinned_vm:0x80e ret:0 The value of user->locked_vm increases to a limit then the memory is tracked by pinned_vm. During deallocation the size is subtracted from pinned_vm until it hits a limit. Then a larger value is subtracted from locked_vm leading to a large number (because of type unsigned): [64.267797] perf_mmap_close mmap_user->locked_vm:0x408 pinned_vm:0x78d [64.267826] perf_mmap_close mmap_user->locked_vm:0x408 pinned_vm:0x70c [64.267848] perf_mmap_close mmap_user->locked_vm:0x408 pinned_vm:0x68b [64.267869] perf_mmap_close mmap_user->locked_vm:0x408 pinned_vm:0x60a [64.267891] perf_mmap_close mmap_user->locked_vm:0x408 pinned_vm:0x589 [64.267911] perf_mmap_close mmap_user->locked_vm:0x408 pinned_vm:0x508 [64.267933] perf_mmap_close mmap_user->locked_vm:0x408 pinned_vm:0x487 [64.267952] perf_mmap_close mmap_user->locked_vm:0x408 pinned_vm:0x406 [64.268883] perf_mmap_close mmap_user->locked_vm:0x307 pinned_vm:0x406 [64.269117] perf_mmap_close mmap_user->locked_vm:0x206 pinned_vm:0x406 [64.269433] perf_mmap_close mmap_user->locked_vm:0x105 pinned_vm:0x406 [64.269536] perf_mmap_close mmap_user->locked_vm:0x4 pinned_vm:0x404 [64.269797] perf_mmap_close mmap_user->locked_vm:0xffffffffffffff84 pinned_vm:0x303 [64.270105] perf_mmap_close mmap_user->locked_vm:0xffffffffffffff04 pinned_vm:0x202 [64.270374] perf_mmap_close mmap_user->locked_vm:0xfffffffffffffe84 pinned_vm:0x101 [64.270628] perf_mmap_close mmap_user->locked_vm:0xfffffffffffffe04 pinned_vm:0x0 This value sticks for the user until system is rebooted, causing follow-on system calls using locked_vm resource limit to fail. Note: There is no issue using the normal trace buffer. In fact the issue is in perf_mmap_close(). During allocation auxiliary trace buffer memory is either traced as 'extra' and added to 'pinned_vm' or trace as 'user_extra' and added to 'locked_vm'. This applies for normal trace buffers and auxiliary trace buffer. However in function perf_mmap_close() all auxiliary trace buffer is subtraced from 'locked_vm' and never from 'pinned_vm'. This breaks the ballance. Signed-off-by: Thomas Richter Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: gor@linux.ibm.com Cc: hechaol@fb.com Cc: heiko.carstens@de.ibm.com Cc: linux-perf-users@vger.kernel.org Cc: songliubraving@fb.com Fixes: d44248a41337 ("perf/core: Rework memory accounting in perf_mmap()") Link: https://lkml.kernel.org/r/20191021083354.67868-1-tmricht@linux.ibm.com [ Minor readability edits. ] Signed-off-by: Ingo Molnar --- kernel/events/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 9ec0b0bfddbd..f5d7950d1931 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5607,8 +5607,10 @@ static void perf_mmap_close(struct vm_area_struct *vma) perf_pmu_output_stop(event); /* now it's safe to free the pages */ - atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm); - atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm); + if (!rb->aux_mmap_locked) + atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm); + else + atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm); /* this has to be the last one */ rb_free_aux(rb); From 83629532ce45ef9df1f297b419b9ea112045685d Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 2 May 2019 16:03:26 +0800 Subject: [PATCH 497/572] ALSA: hda/realtek - Add support for ALC711 Support new codec ALC711. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ce4f11659765..085a2f95e076 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -393,6 +393,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0700: case 0x10ec0701: case 0x10ec0703: + case 0x10ec0711: alc_update_coef_idx(codec, 0x10, 1<<15, 0); break; case 0x10ec0662: @@ -8019,6 +8020,7 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0700: case 0x10ec0701: case 0x10ec0703: + case 0x10ec0711: spec->codec_variant = ALC269_TYPE_ALC700; spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */ @@ -9233,6 +9235,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0700, "ALC700", patch_alc269), HDA_CODEC_ENTRY(0x10ec0701, "ALC701", patch_alc269), HDA_CODEC_ENTRY(0x10ec0703, "ALC703", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0711, "ALC711", patch_alc269), HDA_CODEC_ENTRY(0x10ec0867, "ALC891", patch_alc662), HDA_CODEC_ENTRY(0x10ec0880, "ALC880", patch_alc880), HDA_CODEC_ENTRY(0x10ec0882, "ALC882", patch_alc882), From feb40824d78eac5e48f56498dca941754dff33d7 Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Thu, 10 Oct 2019 16:22:30 +0530 Subject: [PATCH 498/572] mmc: sdhci-omap: Fix Tuning procedure for temperatures < -20C According to the App note[1] detailing the tuning algorithm, for temperatures < -20C, the initial tuning value should be min(largest value in LPW - 24, ceil(13/16 ratio of LPW)). The largest value in LPW is (max_window + 4 * (max_len - 1)) and not (max_window + 4 * max_len) itself. Fix this implementation. [1] http://www.ti.com/lit/an/spraca9b/spraca9b.pdf Fixes: 961de0a856e3 ("mmc: sdhci-omap: Workaround errata regarding SDR104/HS200 tuning failures (i929)") Cc: stable@vger.kernel.org Signed-off-by: Faiz Abbas Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index 41c2677c587f..083e7e053c95 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -372,7 +372,7 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) * on temperature */ if (temperature < -20000) - phase_delay = min(max_window + 4 * max_len - 24, + phase_delay = min(max_window + 4 * (max_len - 1) - 24, max_window + DIV_ROUND_UP(13 * max_len, 16) * 4); else if (temperature < 20000) From c07d0073b9ec80a139d07ebf78e9c30d2a28279e Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Tue, 15 Oct 2019 00:08:49 +0530 Subject: [PATCH 499/572] mmc: cqhci: Commit descriptors before setting the doorbell Add a write memory barrier to make sure that descriptors are actually written to memory, before ringing the doorbell. Signed-off-by: Faiz Abbas Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/cqhci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c index f7bdae5354c3..5047f7343ffc 100644 --- a/drivers/mmc/host/cqhci.c +++ b/drivers/mmc/host/cqhci.c @@ -611,7 +611,8 @@ static int cqhci_request(struct mmc_host *mmc, struct mmc_request *mrq) cq_host->slot[tag].flags = 0; cq_host->qcnt += 1; - + /* Make sure descriptors are ready before ringing the doorbell */ + wmb(); cqhci_writel(cq_host, 1 << tag, CQHCI_TDBR); if (!(cqhci_readl(cq_host, CQHCI_TDBR) & (1 << tag))) pr_debug("%s: cqhci: doorbell not set for tag %d\n", From 1f0d9cbeec9bb0a1c2013342836f2c9754d6502b Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Wed, 9 Oct 2019 14:20:34 -0700 Subject: [PATCH 500/572] i2c: aspeed: fix master pending state handling In case of master pending state, it should not trigger a master command, otherwise data could be corrupted because this H/W shares the same data buffer for slave and master operations. It also means that H/W command queue handling is unreliable because of the buffer sharing issue. To fix this issue, it clears command queue if a master command is queued in pending state to use S/W solution instead of H/W command queue handling. Also, it refines restarting mechanism of the pending master command. Fixes: 2e57b7cebb98 ("i2c: aspeed: Add multi-master use case support") Signed-off-by: Jae Hyun Yoo Reviewed-by: Brendan Higgins Acked-by: Joel Stanley Tested-by: Tao Ren Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-aspeed.c | 54 +++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index fa66951b05d0..7b098ff5f5dd 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -108,6 +108,12 @@ #define ASPEED_I2CD_S_TX_CMD BIT(2) #define ASPEED_I2CD_M_TX_CMD BIT(1) #define ASPEED_I2CD_M_START_CMD BIT(0) +#define ASPEED_I2CD_MASTER_CMDS_MASK \ + (ASPEED_I2CD_M_STOP_CMD | \ + ASPEED_I2CD_M_S_RX_CMD_LAST | \ + ASPEED_I2CD_M_RX_CMD | \ + ASPEED_I2CD_M_TX_CMD | \ + ASPEED_I2CD_M_START_CMD) /* 0x18 : I2CD Slave Device Address Register */ #define ASPEED_I2CD_DEV_ADDR_MASK GENMASK(6, 0) @@ -336,18 +342,19 @@ static void aspeed_i2c_do_start(struct aspeed_i2c_bus *bus) struct i2c_msg *msg = &bus->msgs[bus->msgs_index]; u8 slave_addr = i2c_8bit_addr_from_msg(msg); - bus->master_state = ASPEED_I2C_MASTER_START; - #if IS_ENABLED(CONFIG_I2C_SLAVE) /* * If it's requested in the middle of a slave session, set the master * state to 'pending' then H/W will continue handling this master * command when the bus comes back to the idle state. */ - if (bus->slave_state != ASPEED_I2C_SLAVE_INACTIVE) + if (bus->slave_state != ASPEED_I2C_SLAVE_INACTIVE) { bus->master_state = ASPEED_I2C_MASTER_PENDING; + return; + } #endif /* CONFIG_I2C_SLAVE */ + bus->master_state = ASPEED_I2C_MASTER_START; bus->buf_index = 0; if (msg->flags & I2C_M_RD) { @@ -422,20 +429,6 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) } } -#if IS_ENABLED(CONFIG_I2C_SLAVE) - /* - * A pending master command will be started by H/W when the bus comes - * back to idle state after completing a slave operation so change the - * master state from 'pending' to 'start' at here if slave is inactive. - */ - if (bus->master_state == ASPEED_I2C_MASTER_PENDING) { - if (bus->slave_state != ASPEED_I2C_SLAVE_INACTIVE) - goto out_no_complete; - - bus->master_state = ASPEED_I2C_MASTER_START; - } -#endif /* CONFIG_I2C_SLAVE */ - /* Master is not currently active, irq was for someone else. */ if (bus->master_state == ASPEED_I2C_MASTER_INACTIVE || bus->master_state == ASPEED_I2C_MASTER_PENDING) @@ -462,11 +455,15 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) #if IS_ENABLED(CONFIG_I2C_SLAVE) /* * If a peer master starts a xfer immediately after it queues a - * master command, change its state to 'pending' then H/W will - * continue the queued master xfer just after completing the - * slave mode session. + * master command, clear the queued master command and change + * its state to 'pending'. To simplify handling of pending + * cases, it uses S/W solution instead of H/W command queue + * handling. */ if (unlikely(irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH)) { + writel(readl(bus->base + ASPEED_I2C_CMD_REG) & + ~ASPEED_I2CD_MASTER_CMDS_MASK, + bus->base + ASPEED_I2C_CMD_REG); bus->master_state = ASPEED_I2C_MASTER_PENDING; dev_dbg(bus->dev, "master goes pending due to a slave start\n"); @@ -629,6 +626,14 @@ static irqreturn_t aspeed_i2c_bus_irq(int irq, void *dev_id) irq_handled |= aspeed_i2c_master_irq(bus, irq_remaining); } + + /* + * Start a pending master command at here if a slave operation is + * completed. + */ + if (bus->master_state == ASPEED_I2C_MASTER_PENDING && + bus->slave_state == ASPEED_I2C_SLAVE_INACTIVE) + aspeed_i2c_do_start(bus); #else irq_handled = aspeed_i2c_master_irq(bus, irq_remaining); #endif /* CONFIG_I2C_SLAVE */ @@ -691,6 +696,15 @@ static int aspeed_i2c_master_xfer(struct i2c_adapter *adap, ASPEED_I2CD_BUS_BUSY_STS)) aspeed_i2c_recover_bus(bus); + /* + * If timed out and the state is still pending, drop the pending + * master command. + */ + spin_lock_irqsave(&bus->lock, flags); + if (bus->master_state == ASPEED_I2C_MASTER_PENDING) + bus->master_state = ASPEED_I2C_MASTER_INACTIVE; + spin_unlock_irqrestore(&bus->lock, flags); + return -ETIMEDOUT; } From b88639b8e3808c948169af390bd7e84e909bde8d Mon Sep 17 00:00:00 2001 From: Mihail Atanassov Date: Thu, 10 Oct 2019 10:30:07 +0000 Subject: [PATCH 501/572] drm/komeda: Don't flush inactive pipes HW doesn't allow flushing inactive pipes and raises an MERR interrupt if you try to do so. Stop triggering the MERR interrupt in the middle of a commit by calling drm_atomic_helper_commit_planes with the ACTIVE_ONLY flag. Reviewed-by: James Qian Wang (Arm Technology China) Signed-off-by: Mihail Atanassov Link: https://patchwork.freedesktop.org/patch/msgid/20191010102950.56253-1-mihail.atanassov@arm.com --- drivers/gpu/drm/arm/display/komeda/komeda_kms.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index 8820ce15ce37..ae274902ff92 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -82,7 +82,8 @@ static void komeda_kms_commit_tail(struct drm_atomic_state *old_state) drm_atomic_helper_commit_modeset_disables(dev, old_state); - drm_atomic_helper_commit_planes(dev, old_state, 0); + drm_atomic_helper_commit_planes(dev, old_state, + DRM_PLANE_COMMIT_ACTIVE_ONLY); drm_atomic_helper_commit_modeset_enables(dev, old_state); From 8ae501e295cce9bc6e0dd82d5204a1d5faef44f8 Mon Sep 17 00:00:00 2001 From: Mihail Atanassov Date: Mon, 30 Sep 2019 12:23:07 +0000 Subject: [PATCH 502/572] drm/komeda: Fix typos in komeda_splitter_validate Fix both the string and the struct member being printed. Changes since v1: - Now with a bonus grammar fix, too. Fixes: 264b9436d23b ("drm/komeda: Enable writeback split support") Reviewed-by: James Qian Wang (Arm Technology China) Signed-off-by: Mihail Atanassov Link: https://patchwork.freedesktop.org/patch/msgid/20190930122231.33029-1-mihail.atanassov@arm.com --- drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c index ea26bc9c2d00..b848270e0a1f 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c @@ -564,8 +564,8 @@ komeda_splitter_validate(struct komeda_splitter *splitter, } if (!in_range(&splitter->vsize, dflow->in_h)) { - DRM_DEBUG_ATOMIC("split in_in: %d exceed the acceptable range.\n", - dflow->in_w); + DRM_DEBUG_ATOMIC("split in_h: %d exceeds the acceptable range.\n", + dflow->in_h); return -EINVAL; } From 2bb9f7566ba7ab3c2154964461e37b52cdc6b91b Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 18 Oct 2019 11:39:34 +0200 Subject: [PATCH 503/572] mmc: mxs: fix flags passed to dmaengine_prep_slave_sg Since ceeeb99cd821 we no longer abuse the DMA_CTRL_ACK flag for custom driver use and introduced the MXS_DMA_CTRL_WAIT4END instead. We have not changed all users to this flag though. This patch fixes it for the mxs-mmc driver. Fixes: ceeeb99cd821 ("dmaengine: mxs: rename custom flag") Signed-off-by: Sascha Hauer Tested-by: Fabio Estevam Reported-by: Bruno Thomsen Tested-by: Bruno Thomsen Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/mxs-mmc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index 78e7e350655c..4031217d21c3 100644 --- a/drivers/mmc/host/mxs-mmc.c +++ b/drivers/mmc/host/mxs-mmc.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -266,7 +267,7 @@ static void mxs_mmc_bc(struct mxs_mmc_host *host) ssp->ssp_pio_words[2] = cmd1; ssp->dma_dir = DMA_NONE; ssp->slave_dirn = DMA_TRANS_NONE; - desc = mxs_mmc_prep_dma(host, DMA_CTRL_ACK); + desc = mxs_mmc_prep_dma(host, MXS_DMA_CTRL_WAIT4END); if (!desc) goto out; @@ -311,7 +312,7 @@ static void mxs_mmc_ac(struct mxs_mmc_host *host) ssp->ssp_pio_words[2] = cmd1; ssp->dma_dir = DMA_NONE; ssp->slave_dirn = DMA_TRANS_NONE; - desc = mxs_mmc_prep_dma(host, DMA_CTRL_ACK); + desc = mxs_mmc_prep_dma(host, MXS_DMA_CTRL_WAIT4END); if (!desc) goto out; @@ -441,7 +442,7 @@ static void mxs_mmc_adtc(struct mxs_mmc_host *host) host->data = data; ssp->dma_dir = dma_data_dir; ssp->slave_dirn = slave_dirn; - desc = mxs_mmc_prep_dma(host, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + desc = mxs_mmc_prep_dma(host, DMA_PREP_INTERRUPT | MXS_DMA_CTRL_WAIT4END); if (!desc) goto out; From b1a402e75a5f5127ff1ffff0615249f98df8b7b3 Mon Sep 17 00:00:00 2001 From: Dixit Parmar Date: Mon, 21 Oct 2019 09:32:47 -0700 Subject: [PATCH 504/572] Input: st1232 - fix reporting multitouch coordinates For Sitronix st1633 multi-touch controller driver the coordinates reported for multiple fingers were wrong, as it was always taking LSB of coordinates from the first contact data. Signed-off-by: Dixit Parmar Reviewed-by: Martin Kepplinger Cc: stable@vger.kernel.org Fixes: 351e0592bfea ("Input: st1232 - add support for st1633") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204561 Link: https://lore.kernel.org/r/1566209314-21767-1-git-send-email-dixitparmar19@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/st1232.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c index 34923399ece4..1139714e72e2 100644 --- a/drivers/input/touchscreen/st1232.c +++ b/drivers/input/touchscreen/st1232.c @@ -81,8 +81,10 @@ static int st1232_ts_read_data(struct st1232_ts_data *ts) for (i = 0, y = 0; i < ts->chip_info->max_fingers; i++, y += 3) { finger[i].is_valid = buf[i + y] >> 7; if (finger[i].is_valid) { - finger[i].x = ((buf[i + y] & 0x0070) << 4) | buf[i + 1]; - finger[i].y = ((buf[i + y] & 0x0007) << 8) | buf[i + 2]; + finger[i].x = ((buf[i + y] & 0x0070) << 4) | + buf[i + y + 1]; + finger[i].y = ((buf[i + y] & 0x0007) << 8) | + buf[i + y + 2]; /* st1232 includes a z-axis / touch strength */ if (ts->chip_info->have_z) From 2ecb287998a47cc0a766f6071f63bc185f338540 Mon Sep 17 00:00:00 2001 From: afzal mohammed Date: Mon, 21 Oct 2019 06:06:14 +0100 Subject: [PATCH 505/572] ARM: 8926/1: v7m: remove register save to stack before svc r0-r3 & r12 registers are saved & restored, before & after svc respectively. Intention was to preserve those registers across thread to handler mode switch. On v7-M, hardware saves the register context upon exception in AAPCS complaint way. Restoring r0-r3 & r12 is done from stack location where hardware saves it, not from the location on stack where these registers were saved. To clarify, on stm32f429 discovery board: 1. before svc, sp - 0x90009ff8 2. r0-r3,r12 saved to 0x90009ff8 - 0x9000a00b 3. upon svc, h/w decrements sp by 32 & pushes registers onto stack 4. after svc, sp - 0x90009fd8 5. r0-r3,r12 restored from 0x90009fd8 - 0x90009feb Above means r0-r3,r12 is not restored from the location where they are saved, but since hardware pushes the registers onto stack, the registers are restored correctly. Note that during register saving to stack (step 2), it goes past 0x9000a000. And it seems, based on objdump, there are global symbols residing there, and it perhaps can cause issues on a non-XIP Kernel (on XIP, data section is setup later). Based on the analysis above, manually saving registers onto stack is at best no-op and at worst can cause data section corruption. Hence remove storing of registers onto stack before svc. Fixes: b70cd406d7fe ("ARM: 8671/1: V7M: Preserve registers across switch from Thread to Handler mode") Signed-off-by: afzal mohammed Acked-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/mm/proc-v7m.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index efebf4120a0c..1a49d503eafc 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -132,7 +132,6 @@ __v7m_setup_cont: dsb mov r6, lr @ save LR ldr sp, =init_thread_union + THREAD_START_SP - stmia sp, {r0-r3, r12} cpsie i svc #0 1: cpsid i From db633a4e0e6eda69b6065e3e106f9ea13a0676c3 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 21 Oct 2019 19:24:02 +0200 Subject: [PATCH 506/572] x86/cpu/vmware: Use the full form of INL in VMWARE_HYPERCALL, for clang/llvm LLVM's assembler doesn't accept the short form INL instruction: inl (%%dx) but instead insists on the output register to be explicitly specified. This was previously fixed for the VMWARE_PORT macro. Fix it also for the VMWARE_HYPERCALL macro. Suggested-by: Sami Tolvanen Signed-off-by: Thomas Hellstrom Reviewed-by: Nick Desaulniers Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sean Christopherson Cc: Thomas Gleixner Cc: clang-built-linux@googlegroups.com Fixes: b4dd4f6e3648 ("Add a header file for hypercall definitions") Link: https://lkml.kernel.org/r/20191021172403.3085-2-thomas_os@shipmail.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/vmware.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h index e00c9e875933..3caac90f9761 100644 --- a/arch/x86/include/asm/vmware.h +++ b/arch/x86/include/asm/vmware.h @@ -29,7 +29,8 @@ /* The low bandwidth call. The low word of edx is presumed clear. */ #define VMWARE_HYPERCALL \ - ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT ", %%dx; inl (%%dx)", \ + ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT ", %%dx; " \ + "inl (%%dx), %%eax", \ "vmcall", X86_FEATURE_VMCALL, \ "vmmcall", X86_FEATURE_VMW_VMMCALL) From 6fee2a0be0ecae939d4b6cd8297d88b5cbb61654 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 21 Oct 2019 19:24:03 +0200 Subject: [PATCH 507/572] x86/cpu/vmware: Fix platform detection VMWARE_PORT macro The platform detection VMWARE_PORT macro uses the VMWARE_HYPERVISOR_PORT definition, but expects it to be an integer. However, when it was moved to the new vmware.h include file, it was changed to be a string to better fit into the VMWARE_HYPERCALL set of macros. This obviously breaks the platform detection VMWARE_PORT functionality. Change the VMWARE_HYPERVISOR_PORT and VMWARE_HYPERVISOR_PORT_HB definitions to be integers, and use __stringify() for their stringified form when needed. Signed-off-by: Thomas Hellstrom Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sean Christopherson Cc: Thomas Gleixner Fixes: b4dd4f6e3648 ("Add a header file for hypercall definitions") Link: https://lkml.kernel.org/r/20191021172403.3085-3-thomas_os@shipmail.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/vmware.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h index 3caac90f9761..ac9fc51e2b18 100644 --- a/arch/x86/include/asm/vmware.h +++ b/arch/x86/include/asm/vmware.h @@ -4,6 +4,7 @@ #include #include +#include /* * The hypercall definitions differ in the low word of the %edx argument @@ -20,8 +21,8 @@ */ /* Old port-based version */ -#define VMWARE_HYPERVISOR_PORT "0x5658" -#define VMWARE_HYPERVISOR_PORT_HB "0x5659" +#define VMWARE_HYPERVISOR_PORT 0x5658 +#define VMWARE_HYPERVISOR_PORT_HB 0x5659 /* Current vmcall / vmmcall version */ #define VMWARE_HYPERVISOR_HB BIT(0) @@ -29,7 +30,7 @@ /* The low bandwidth call. The low word of edx is presumed clear. */ #define VMWARE_HYPERCALL \ - ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT ", %%dx; " \ + ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \ "inl (%%dx), %%eax", \ "vmcall", X86_FEATURE_VMCALL, \ "vmmcall", X86_FEATURE_VMW_VMMCALL) @@ -39,7 +40,8 @@ * HB and OUT bits set. */ #define VMWARE_HYPERCALL_HB_OUT \ - ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT_HB ", %%dx; rep outsb", \ + ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \ + "rep outsb", \ "vmcall", X86_FEATURE_VMCALL, \ "vmmcall", X86_FEATURE_VMW_VMMCALL) @@ -48,7 +50,8 @@ * HB bit set. */ #define VMWARE_HYPERCALL_HB_IN \ - ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT_HB ", %%dx; rep insb", \ + ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \ + "rep insb", \ "vmcall", X86_FEATURE_VMCALL, \ "vmmcall", X86_FEATURE_VMW_VMMCALL) #endif From 6b1340cc00edeadd52ebd8a45171f38c8de2a387 Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Tue, 15 Oct 2019 11:47:25 +0530 Subject: [PATCH 508/572] tracing: Fix race in perf_trace_buf initialization A race condition exists while initialiazing perf_trace_buf from perf_trace_init() and perf_kprobe_init(). CPU0 CPU1 perf_trace_init() mutex_lock(&event_mutex) perf_trace_event_init() perf_trace_event_reg() total_ref_count == 0 buf = alloc_percpu() perf_trace_buf[i] = buf tp_event->class->reg() //fails perf_kprobe_init() goto fail perf_trace_event_init() perf_trace_event_reg() fail: total_ref_count == 0 total_ref_count == 0 buf = alloc_percpu() perf_trace_buf[i] = buf tp_event->class->reg() total_ref_count++ free_percpu(perf_trace_buf[i]) perf_trace_buf[i] = NULL Any subsequent call to perf_trace_event_reg() will observe total_ref_count > 0, causing the perf_trace_buf to be always NULL. This can result in perf_trace_buf getting accessed from perf_trace_buf_alloc() without being initialized. Acquiring event_mutex in perf_kprobe_init() before calling perf_trace_event_init() should fix this race. The race caused the following bug: Unable to handle kernel paging request at virtual address 0000003106f2003c Mem abort info: ESR = 0x96000045 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000045 CM = 0, WnR = 1 user pgtable: 4k pages, 39-bit VAs, pgdp = ffffffc034b9b000 [0000003106f2003c] pgd=0000000000000000, pud=0000000000000000 Internal error: Oops: 96000045 [#1] PREEMPT SMP Process syz-executor (pid: 18393, stack limit = 0xffffffc093190000) pstate: 80400005 (Nzcv daif +PAN -UAO) pc : __memset+0x20/0x1ac lr : memset+0x3c/0x50 sp : ffffffc09319fc50 __memset+0x20/0x1ac perf_trace_buf_alloc+0x140/0x1a0 perf_trace_sys_enter+0x158/0x310 syscall_trace_enter+0x348/0x7c0 el0_svc_common+0x11c/0x368 el0_svc_handler+0x12c/0x198 el0_svc+0x8/0xc Ramdumps showed the following: total_ref_count = 3 perf_trace_buf = ( 0x0 -> NULL, 0x0 -> NULL, 0x0 -> NULL, 0x0 -> NULL) Link: http://lkml.kernel.org/r/1571120245-4186-1-git-send-email-prsood@codeaurora.org Cc: stable@vger.kernel.org Fixes: e12f03d7031a9 ("perf/core: Implement the 'perf_kprobe' PMU") Acked-by: Song Liu Signed-off-by: Prateek Sood Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_event_perf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 0892e38ed6fb..a9dfa04ffa44 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -272,9 +272,11 @@ int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe) goto out; } + mutex_lock(&event_mutex); ret = perf_trace_event_init(tp_event, p_event); if (ret) destroy_local_trace_kprobe(tp_event); + mutex_unlock(&event_mutex); out: kfree(func); return ret; @@ -282,8 +284,10 @@ out: void perf_kprobe_destroy(struct perf_event *p_event) { + mutex_lock(&event_mutex); perf_trace_event_close(p_event); perf_trace_event_unreg(p_event); + mutex_unlock(&event_mutex); destroy_local_trace_kprobe(p_event->tp_event); } From edffc70f505abdab885f4b4212438b4298dec78f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 18 Oct 2019 15:35:34 +0300 Subject: [PATCH 509/572] ACPI: NFIT: Fix unlock on error in scrub_show() We change the locking in this function and forgot to update this error path so we are accidentally still holding the "dev->lockdep_mutex". Fixes: 87a30e1f05d7 ("driver-core, libnvdimm: Let device subsystems add local lockdep coverage") Signed-off-by: Dan Carpenter Reviewed-by: Ira Weiny Acked-by: Dan Williams Cc: 5.3+ # 5.3+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/nfit/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 1413324982f0..14e68f202f81 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1322,7 +1322,7 @@ static ssize_t scrub_show(struct device *dev, nfit_device_lock(dev); nd_desc = dev_get_drvdata(dev); if (!nd_desc) { - device_unlock(dev); + nfit_device_unlock(dev); return rc; } acpi_desc = to_acpi_desc(nd_desc); From 31d851407f90076c58291fb5eadc82c1dd613cee Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Fri, 18 Oct 2019 08:49:29 +0800 Subject: [PATCH 510/572] cpuidle: haltpoll: Take 'idle=' override into account Currenly haltpoll isn't aware of the 'idle=' override, the priority is 'idle=poll' > haltpoll > 'idle=halt'. When 'idle=poll' is used, cpuidle driver is bypassed but current_driver in sys still shows 'haltpoll'. When 'idle=halt' is used, haltpoll takes precedence and makes 'idle=halt' have no effect. Add a check to prevent the haltpoll driver from loading if 'idle=' is present. Signed-off-by: Zhenzhong Duan Co-developed-by: Joao Martins [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-haltpoll.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 932390b028f1..b0ce9bc78113 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -95,6 +95,10 @@ static int __init haltpoll_init(void) int ret; struct cpuidle_driver *drv = &haltpoll_driver; + /* Do not load haltpoll if idle= is passed */ + if (boot_option_idle_override != IDLE_NO_OVERRIDE) + return -ENODEV; + cpuidle_poll_state_init(drv); if (!kvm_para_available() || From 41cd02c6f7f6e66e7abf02a4379e355a7db89f78 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 4 Oct 2019 13:22:47 -0700 Subject: [PATCH 511/572] kvm: x86: Expose RDPID in KVM_GET_SUPPORTED_CPUID When the RDPID instruction is supported on the host, enumerate it in KVM_GET_SUPPORTED_CPUID. Signed-off-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9c5029cf6f3f..f68c0c753c38 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -363,7 +363,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | + F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/; From 9de25d182b806d63412f6827e3066c031d4be500 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 7 Oct 2019 15:26:56 +0200 Subject: [PATCH 512/572] selftests: kvm: synchronize .gitignore to Makefile Because "Untracked files:" are annoying. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index b35da375530a..409c1fa75e03 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,4 +1,5 @@ /s390x/sync_regs_test +/s390x/memop /x86_64/cr4_cpuid_sync_test /x86_64/evmcs_test /x86_64/hyperv_cpuid @@ -9,6 +10,7 @@ /x86_64/state_test /x86_64/sync_regs_test /x86_64/vmx_close_while_nested_test +/x86_64/vmx_dirty_log_test /x86_64/vmx_set_nested_state_test /x86_64/vmx_tsc_adjust_test /clear_dirty_log_test From 44551b2f693d1ddcab4ca9895074f8f61c3a72af Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 29 Sep 2019 09:06:56 +0800 Subject: [PATCH 513/572] KVM: Don't shrink/grow vCPU halt_poll_ns if host side polling is disabled Don't waste cycles to shrink/grow vCPU halt_poll_ns if host side polling is disabled. Acked-by: Marcelo Tosatti Cc: Marcelo Tosatti Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fd68fbe0a75d..67ef3f2e19e8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2360,20 +2360,23 @@ out: kvm_arch_vcpu_unblocking(vcpu); block_ns = ktime_to_ns(cur) - ktime_to_ns(start); - if (!vcpu_valid_wakeup(vcpu)) - shrink_halt_poll_ns(vcpu); - else if (halt_poll_ns) { - if (block_ns <= vcpu->halt_poll_ns) - ; - /* we had a long block, shrink polling */ - else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns) + if (!kvm_arch_no_poll(vcpu)) { + if (!vcpu_valid_wakeup(vcpu)) { shrink_halt_poll_ns(vcpu); - /* we had a short halt and our poll time is too small */ - else if (vcpu->halt_poll_ns < halt_poll_ns && - block_ns < halt_poll_ns) - grow_halt_poll_ns(vcpu); - } else - vcpu->halt_poll_ns = 0; + } else if (halt_poll_ns) { + if (block_ns <= vcpu->halt_poll_ns) + ; + /* we had a long block, shrink polling */ + else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns) + shrink_halt_poll_ns(vcpu); + /* we had a short halt and our poll time is too small */ + else if (vcpu->halt_poll_ns < halt_poll_ns && + block_ns < halt_poll_ns) + grow_halt_poll_ns(vcpu); + } else { + vcpu->halt_poll_ns = 0; + } + } trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu)); kvm_arch_vcpu_block_finish(vcpu); From 700c17d9cec8712f4091692488fb63e2680f7a5d Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 8 Oct 2019 21:43:36 +0200 Subject: [PATCH 514/572] selftests: kvm: vmx_set_nested_state_test: don't check for VMX support twice vmx_set_nested_state_test() checks if VMX is supported twice: in the very beginning (and skips the whole test if it's not) and before doing test_vmx_nested_state(). One should be enough. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/vmx_set_nested_state_test.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index 853e370e8a39..a6d85614ae4d 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -271,12 +271,7 @@ int main(int argc, char *argv[]) state.flags = KVM_STATE_NESTED_RUN_PENDING; test_nested_state_expect_einval(vm, &state); - /* - * TODO: When SVM support is added for KVM_SET_NESTED_STATE - * add tests here to support it like VMX. - */ - if (entry->ecx & CPUID_VMX) - test_vmx_nested_state(vm); + test_vmx_nested_state(vm); kvm_vm_free(vm); return 0; From 9143613ef0ba9f88d2fef9038930637a0909d35a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 8 Oct 2019 21:43:37 +0200 Subject: [PATCH 515/572] selftests: kvm: consolidate VMX support checks vmx_* tests require VMX and three of them implement the same check. Move it to vmx library. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/vmx.h | 2 ++ tools/testing/selftests/kvm/lib/x86_64/vmx.c | 10 ++++++++++ .../selftests/kvm/x86_64/vmx_close_while_nested_test.c | 6 +----- .../selftests/kvm/x86_64/vmx_set_nested_state_test.c | 6 +----- .../testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 6 +----- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 6ae5a47fe067..f52e0ba84fed 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -580,6 +580,8 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); bool load_vmcs(struct vmx_pages *vmx); +void nested_vmx_check_supported(void); + void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot); void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index fab8f6b0bf52..f6ec97b7eaef 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -376,6 +376,16 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_guest_state(guest_rip, guest_rsp); } +void nested_vmx_check_supported(void) +{ + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + if (!(entry->ecx & CPUID_VMX)) { + fprintf(stderr, "nested VMX not enabled, skipping test\n"); + exit(KSFT_SKIP); + } +} + void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot) { diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index 3b0ffe01dacd..5dfb53546a26 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -53,12 +53,8 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - if (!(entry->ecx & CPUID_VMX)) { - fprintf(stderr, "nested VMX not enabled, skipping test\n"); - exit(KSFT_SKIP); - } + nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index a6d85614ae4d..9ef7fab39d48 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -224,7 +224,6 @@ int main(int argc, char *argv[]) { struct kvm_vm *vm; struct kvm_nested_state state; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS); @@ -237,10 +236,7 @@ int main(int argc, char *argv[]) * AMD currently does not implement set_nested_state, so for now we * just early out. */ - if (!(entry->ecx & CPUID_VMX)) { - fprintf(stderr, "nested VMX not enabled, skipping test\n"); - exit(KSFT_SKIP); - } + nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, 0); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index f36c10eba71e..5590fd2bcf87 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -128,12 +128,8 @@ static void report(int64_t val) int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - if (!(entry->ecx & CPUID_VMX)) { - fprintf(stderr, "nested VMX not enabled, skipping test\n"); - exit(KSFT_SKIP); - } + nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); From 11eada4718a352a6a588de9908200c1e348530f1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 8 Oct 2019 21:43:38 +0200 Subject: [PATCH 516/572] selftests: kvm: vmx_dirty_log_test: skip the test when VMX is not supported vmx_dirty_log_test fails on AMD and this is no surprise as it is VMX specific. Bail early when nested VMX is unsupported. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index 0bca1cfe2c1e..a223a6401258 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -78,6 +78,8 @@ int main(int argc, char *argv[]) struct ucall uc; bool done = false; + nested_vmx_check_supported(); + /* Create VM */ vm = vm_create_default(VCPU_ID, 0, l1_guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); From ef4059809890f732c69cc1726d3a9a108a832a2f Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 8 Oct 2019 20:08:08 +0200 Subject: [PATCH 517/572] selftests: kvm: fix sync_regs_test with newer gccs Commit 204c91eff798a ("KVM: selftests: do not blindly clobber registers in guest asm") was intended to make test more gcc-proof, however, the result is exactly the opposite: on newer gccs (e.g. 8.2.1) the test breaks with ==== Test Assertion Failure ==== x86_64/sync_regs_test.c:168: run->s.regs.regs.rbx == 0xBAD1DEA + 1 pid=14170 tid=14170 - Invalid argument 1 0x00000000004015b3: main at sync_regs_test.c:166 (discriminator 6) 2 0x00007f413fb66412: ?? ??:0 3 0x000000000040191d: _start at ??:? rbx sync regs value incorrect 0x1. Apparently, compile is still free to play games with registers even when they have variables attached. Re-write guest code with 'asm volatile' by embedding ucall there and making sure rbx is preserved. Fixes: 204c91eff798a ("KVM: selftests: do not blindly clobber registers in guest asm") Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/sync_regs_test.c | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 11c2a70a7b87..5c8224256294 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -22,18 +22,19 @@ #define VCPU_ID 5 +#define UCALL_PIO_PORT ((uint16_t)0x1000) + +/* + * ucall is embedded here to protect against compiler reshuffling registers + * before calling a function. In this test we only need to get KVM_EXIT_IO + * vmexit and preserve RBX, no additional information is needed. + */ void guest_code(void) { - /* - * use a callee-save register, otherwise the compiler - * saves it around the call to GUEST_SYNC. - */ - register u32 stage asm("rbx"); - for (;;) { - GUEST_SYNC(0); - stage++; - asm volatile ("" : : "r" (stage)); - } + asm volatile("1: in %[port], %%al\n" + "add $0x1, %%rbx\n" + "jmp 1b" + : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx"); } static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) From 1a8211c7d8717b19c1e9fa41d19fe6a55409765e Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 29 Sep 2019 17:50:18 +0300 Subject: [PATCH 518/572] KVM: VMX: Remove specialized handling of unexpected exit-reasons Commit bf653b78f960 ("KVM: vmx: Introduce handle_unexpected_vmexit and handle WAITPKG vmexit") introduced specialized handling of specific exit-reasons that should not be raised by CPU because KVM configures VMCS such that they should never be raised. However, since commit 7396d337cfad ("KVM: x86: Return to userspace with internal error on unexpected exit reason"), VMX & SVM exit handlers were modified to generically handle all unexpected exit-reasons by returning to userspace with internal error. Therefore, there is no need for specialized handling of specific unexpected exit-reasons (This specialized handling also introduced inconsistency for these exit-reasons to silently skip guest instruction instead of return to userspace on internal-error). Fixes: bf653b78f960 ("KVM: vmx: Introduce handle_unexpected_vmexit and handle WAITPKG vmexit") Signed-off-by: Liran Alon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e7970a2e8eae..8f01019295a1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5543,14 +5543,6 @@ static int handle_encls(struct kvm_vcpu *vcpu) return 1; } -static int handle_unexpected_vmexit(struct kvm_vcpu *vcpu) -{ - kvm_skip_emulated_instruction(vcpu); - WARN_ONCE(1, "Unexpected VM-Exit Reason = 0x%x", - vmcs_read32(VM_EXIT_REASON)); - return 1; -} - /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -5602,15 +5594,11 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_INVVPID] = handle_vmx_instruction, [EXIT_REASON_RDRAND] = handle_invalid_op, [EXIT_REASON_RDSEED] = handle_invalid_op, - [EXIT_REASON_XSAVES] = handle_unexpected_vmexit, - [EXIT_REASON_XRSTORS] = handle_unexpected_vmexit, [EXIT_REASON_PML_FULL] = handle_pml_full, [EXIT_REASON_INVPCID] = handle_invpcid, [EXIT_REASON_VMFUNC] = handle_vmx_instruction, [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, [EXIT_REASON_ENCLS] = handle_encls, - [EXIT_REASON_UMWAIT] = handle_unexpected_vmexit, - [EXIT_REASON_TPAUSE] = handle_unexpected_vmexit, }; static const int kvm_vmx_max_exit_handlers = From b4fdcf6056d9057fe762bd6d8060e3ab9949efea Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Sun, 29 Sep 2019 18:43:28 +0200 Subject: [PATCH 519/572] KVM: x86: fix bugon.cocci warnings Use BUG_ON instead of a if condition followed by BUG. Generated by: scripts/coccinelle/misc/bugon.cocci Fixes: 4b526de50e39 ("KVM: x86: Check kvm_rebooting in kvm_spurious_fault()") CC: Sean Christopherson Signed-off-by: kbuild test robot Signed-off-by: Julia Lawall Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 661e2bf38526..41aecc4c52d9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -360,8 +360,7 @@ EXPORT_SYMBOL_GPL(kvm_set_apic_base); asmlinkage __visible void kvm_spurious_fault(void) { /* Fault while not rebooting. We want the trace. */ - if (!kvm_rebooting) - BUG(); + BUG_ON(!kvm_rebooting); } EXPORT_SYMBOL_GPL(kvm_spurious_fault); From 49dedf0dd0da073b3a0146a62c768887aea13508 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 10 Oct 2019 12:49:22 +0200 Subject: [PATCH 520/572] kvm: clear kvmclock MSR on reset After resetting the vCPU, the kvmclock MSR keeps the previous value but it is not enabled. This can be confusing, so fix it. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 41aecc4c52d9..5863c38108d9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2536,6 +2536,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) static void kvmclock_reset(struct kvm_vcpu *vcpu) { vcpu->arch.pv_time_enabled = false; + vcpu->arch.time = 0; } static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) @@ -2701,8 +2702,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_KVM_SYSTEM_TIME: { struct kvm_arch *ka = &vcpu->kvm->arch; - kvmclock_reset(vcpu); - if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) { bool tmp = (msr == MSR_KVM_SYSTEM_TIME); @@ -2716,14 +2715,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); /* we verify if the enable bit is set... */ + vcpu->arch.pv_time_enabled = false; if (!(data & 1)) break; - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, + if (!kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_time, data & ~1ULL, sizeof(struct pvclock_vcpu_time_info))) - vcpu->arch.pv_time_enabled = false; - else vcpu->arch.pv_time_enabled = true; break; From f3a519e4add93b7b31a6616f0b09635ff2e6a159 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 22 Oct 2019 10:39:40 +0300 Subject: [PATCH 521/572] perf/aux: Fix AUX output stopping Commit: 8a58ddae2379 ("perf/core: Fix exclusive events' grouping") allows CAP_EXCLUSIVE events to be grouped with other events. Since all of those also happen to be AUX events (which is not the case the other way around, because arch/s390), this changes the rules for stopping the output: the AUX event may not be on its PMU's context any more, if it's grouped with a HW event, in which case it will be on that HW event's context instead. If that's the case, munmap() of the AUX buffer can't find and stop the AUX event, potentially leaving the last reference with the atomic context, which will then end up freeing the AUX buffer. This will then trip warnings: Fix this by using the context's PMU context when looking for events to stop, instead of the event's PMU context. Signed-off-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20191022073940.61814-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f5d7950d1931..bb3748d29b04 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6949,7 +6949,7 @@ static void __perf_event_output_stop(struct perf_event *event, void *data) static int __perf_pmu_output_stop(void *info) { struct perf_event *event = info; - struct pmu *pmu = event->pmu; + struct pmu *pmu = event->ctx->pmu; struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); struct remote_output ro = { .rb = event->rb, From ba8bf0967a154796be15c4983603aad0b05c3138 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Oct 2019 17:45:14 +0200 Subject: [PATCH 522/572] ALSA: usb-audio: Fix copy&paste error in the validator The recently introduced USB-audio descriptor validator had a stupid copy&paste error that may lead to an unexpected overlook of too short descriptors for processing and extension units. It's likely the cause of the report triggered by syzkaller fuzzer. Let's fix it. Fixes: 57f8770620e9 ("ALSA: usb-audio: More validations of descriptor units") Reported-by: syzbot+0620f79a1978b1133fd7@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/s5hsgnkdbsl.wl-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/validate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/validate.c b/sound/usb/validate.c index 3c8f73a0eb12..a5e584b60dcd 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c @@ -75,7 +75,7 @@ static bool validate_processing_unit(const void *p, if (d->bLength < sizeof(*d)) return false; - len = d->bLength < sizeof(*d) + d->bNrInPins; + len = sizeof(*d) + d->bNrInPins; if (d->bLength < len) return false; switch (v->protocol) { From 388bb19be8eab4674a660e0c97eaf60775362bc7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 10 Oct 2019 15:13:33 +0200 Subject: [PATCH 523/572] s390/zcrypt: fix memleak at release If a process is interrupted while accessing the crypto device and the global ap_perms_mutex is contented, release() could return early and fail to free related resources. Fixes: 00fab2350e6b ("s390/zcrypt: multiple zcrypt device nodes support") Cc: # 4.19 Cc: Harald Freudenberger Signed-off-by: Johan Hovold Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_api.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 45bdb47f84c1..9157e728a362 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -522,8 +522,7 @@ static int zcrypt_release(struct inode *inode, struct file *filp) if (filp->f_inode->i_cdev == &zcrypt_cdev) { struct zcdn_device *zcdndev; - if (mutex_lock_interruptible(&ap_perms_mutex)) - return -ERESTARTSYS; + mutex_lock(&ap_perms_mutex); zcdndev = find_zcdndev_by_devt(filp->f_inode->i_rdev); mutex_unlock(&ap_perms_mutex); if (zcdndev) { From ac49303d9ef0ad98b79867a380ef23480e48870b Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 21 Oct 2019 19:56:00 +0200 Subject: [PATCH 524/572] s390/kaslr: add support for R_390_GLOB_DAT relocation type Commit "bpf: Process in-kernel BTF" in linux-next introduced an undefined __weak symbol, which results in an R_390_GLOB_DAT relocation type. That is not yet handled by the KASLR relocation code, and the kernel stops with the message "Unknown relocation type". Add code to detect and handle R_390_GLOB_DAT relocation types and undefined symbols. Fixes: 805bc0bc238f ("s390/kernel: build a relocatable kernel") Cc: # v5.2+ Acked-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 14 +++++++++++--- arch/s390/kernel/machine_kexec_reloc.c | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 596ca7cc4d7b..5367950510f6 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -101,10 +101,18 @@ static void handle_relocs(unsigned long offset) dynsym = (Elf64_Sym *) vmlinux.dynsym_start; for (rela = rela_start; rela < rela_end; rela++) { loc = rela->r_offset + offset; - val = rela->r_addend + offset; + val = rela->r_addend; r_sym = ELF64_R_SYM(rela->r_info); - if (r_sym) - val += dynsym[r_sym].st_value; + if (r_sym) { + if (dynsym[r_sym].st_shndx != SHN_UNDEF) + val += dynsym[r_sym].st_value + offset; + } else { + /* + * 0 == undefined symbol table index (STN_UNDEF), + * used for R_390_RELATIVE, only add KASLR offset + */ + val += offset; + } r_type = ELF64_R_TYPE(rela->r_info); rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0); if (rc) diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c index 3b664cb3ec4d..d5035de9020e 100644 --- a/arch/s390/kernel/machine_kexec_reloc.c +++ b/arch/s390/kernel/machine_kexec_reloc.c @@ -27,6 +27,7 @@ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, *(u32 *)loc = val; break; case R_390_64: /* Direct 64 bit. */ + case R_390_GLOB_DAT: *(u64 *)loc = val; break; case R_390_PC16: /* PC relative 16 bit. */ From 6941051d3028963c3b0b3fcdd0815f2b51b957bb Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 18 Oct 2019 11:58:15 +0100 Subject: [PATCH 525/572] cpufreq: Cancel policy update work scheduled before freeing Scheduled policy update work may end up racing with the freeing of the policy and unregistering the driver. One possible race is as below, where the cpufreq_driver is unregistered, but the scheduled work gets executed at later stage when, cpufreq_driver is NULL (i.e. after freeing the policy and driver). Unable to handle kernel NULL pointer dereference at virtual address 0000001c pgd = (ptrval) [0000001c] *pgd=80000080204003, *pmd=00000000 Internal error: Oops: 206 [#1] SMP THUMB2 Modules linked in: CPU: 0 PID: 34 Comm: kworker/0:1 Not tainted 5.4.0-rc3-00006-g67f5a8081a4b #86 Hardware name: ARM-Versatile Express Workqueue: events handle_update PC is at cpufreq_set_policy+0x58/0x228 LR is at dev_pm_qos_read_value+0x77/0xac Control: 70c5387d Table: 80203000 DAC: fffffffd Process kworker/0:1 (pid: 34, stack limit = 0x(ptrval)) (cpufreq_set_policy) from (refresh_frequency_limits.part.24+0x37/0x48) (refresh_frequency_limits.part.24) from (handle_update+0x2f/0x38) (handle_update) from (process_one_work+0x16d/0x3cc) (process_one_work) from (worker_thread+0xff/0x414) (worker_thread) from (kthread+0xff/0x100) (kthread) from (ret_from_fork+0x11/0x28) Fixes: 67d874c3b2c6 ("cpufreq: Register notifiers with the PM QoS framework") Signed-off-by: Sudeep Holla [ rjw: Cancel the work before dropping the QoS requests ] Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8478ff6f3045..48a224a6b178 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1268,6 +1268,9 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) freq_qos_remove_notifier(&policy->constraints, FREQ_QOS_MIN, &policy->nb_min); + /* Cancel any pending policy->update work before freeing the policy. */ + cancel_work_sync(&policy->update); + if (policy->max_freq_req) { /* * CPUFREQ_CREATE_POLICY notification is sent only after From 5c94ac5d0f9e3c8791d1686e0bf22a2a341d1597 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 18 Oct 2019 10:50:31 +0800 Subject: [PATCH 526/572] KVM: SVM: Fix potential wrong physical id in avic_handle_ldr_update Guest physical APIC ID may not equal to vcpu->vcpu_id in some case. We may set the wrong physical id in avic_handle_ldr_update as we always use vcpu->vcpu_id. Get physical APIC ID from vAPIC page instead. Export and use kvm_xapic_id here and in avic_handle_apic_id_update as suggested by Vitaly. Signed-off-by: Miaohe Lin Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 5 ----- arch/x86/kvm/lapic.h | 5 +++++ arch/x86/kvm/svm.c | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 87b0fcc23ef8..b29d00b661ff 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -111,11 +111,6 @@ static inline int apic_enabled(struct kvm_lapic *apic) (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) -static inline u8 kvm_xapic_id(struct kvm_lapic *apic) -{ - return kvm_lapic_get_reg(apic, APIC_ID) >> 24; -} - static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) { return apic->vcpu->vcpu_id; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 2aad7e226fc0..1f5014852e20 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -242,4 +242,9 @@ static inline enum lapic_mode kvm_apic_mode(u64 apic_base) return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); } +static inline u8 kvm_xapic_id(struct kvm_lapic *apic) +{ + return kvm_lapic_get_reg(apic, APIC_ID) >> 24; +} + #endif diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f8ecb6df5106..ca200b50cde4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4591,6 +4591,7 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) int ret = 0; struct vcpu_svm *svm = to_svm(vcpu); u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR); + u32 id = kvm_xapic_id(vcpu->arch.apic); if (ldr == svm->ldr_reg) return 0; @@ -4598,7 +4599,7 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) avic_invalidate_logical_id_entry(vcpu); if (ldr) - ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr); + ret = avic_ldr_write(vcpu, id, ldr); if (!ret) svm->ldr_reg = ldr; @@ -4610,8 +4611,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) { u64 *old, *new; struct vcpu_svm *svm = to_svm(vcpu); - u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID); - u32 id = (apic_id_reg >> 24) & 0xff; + u32 id = kvm_xapic_id(vcpu->arch.apic); if (vcpu->vcpu_id == id) return 0; From 671ddc700fd08b94967b1e2a937020e30c838609 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 15 Oct 2019 10:44:05 -0700 Subject: [PATCH 527/572] KVM: nVMX: Don't leak L1 MMIO regions to L2 If the "virtualize APIC accesses" VM-execution control is set in the VMCS, the APIC virtualization hardware is triggered when a page walk in VMX non-root mode terminates at a PTE wherein the address of the 4k page frame matches the APIC-access address specified in the VMCS. On hardware, the APIC-access address may be any valid 4k-aligned physical address. KVM's nVMX implementation enforces the additional constraint that the APIC-access address specified in the vmcs12 must be backed by a "struct page" in L1. If not, L0 will simply clear the "virtualize APIC accesses" VM-execution control in the vmcs02. The problem with this approach is that the L1 guest has arranged the vmcs12 EPT tables--or shadow page tables, if the "enable EPT" VM-execution control is clear in the vmcs12--so that the L2 guest physical address(es)--or L2 guest linear address(es)--that reference the L2 APIC map to the APIC-access address specified in the vmcs12. Without the "virtualize APIC accesses" VM-execution control in the vmcs02, the APIC accesses in the L2 guest will directly access the APIC-access page in L1. When there is no mapping whatsoever for the APIC-access address in L1, the L2 VM just loses the intended APIC virtualization. However, when the APIC-access address is mapped to an MMIO region in L1, the L2 guest gets direct access to the L1 MMIO device. For example, if the APIC-access address specified in the vmcs12 is 0xfee00000, then L2 gets direct access to L1's APIC. Since this vmcs12 configuration is something that KVM cannot faithfully emulate, the appropriate response is to exit to userspace with KVM_INTERNAL_ERROR_EMULATION. Fixes: fe3ef05c7572 ("KVM: nVMX: Prepare vmcs02 from vmcs01 and vmcs12") Reported-by: Dan Cross Signed-off-by: Jim Mattson Reviewed-by: Peter Shier Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/vmx/nested.c | 64 ++++++++++++++++++--------------- arch/x86/kvm/vmx/nested.h | 13 ++++++- arch/x86/kvm/x86.c | 8 +++-- 4 files changed, 55 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 50eb430b0ad8..24d6598dea29 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1189,7 +1189,7 @@ struct kvm_x86_ops { int (*set_nested_state)(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, struct kvm_nested_state *kvm_state); - void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); + bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); int (*smi_allowed)(struct kvm_vcpu *vcpu); int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e76eb4f07f6c..0e7c9301fe86 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2917,7 +2917,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12); -static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) +static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2937,19 +2937,18 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) vmx->nested.apic_access_page = NULL; } page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr); - /* - * If translation failed, no matter: This feature asks - * to exit when accessing the given address, and if it - * can never be accessed, this feature won't do - * anything anyway. - */ if (!is_error_page(page)) { vmx->nested.apic_access_page = page; hpa = page_to_phys(vmx->nested.apic_access_page); vmcs_write64(APIC_ACCESS_ADDR, hpa); } else { - secondary_exec_controls_clearbit(vmx, - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); + pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n", + __func__); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return false; } } @@ -2994,6 +2993,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS); else exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS); + return true; } /* @@ -3032,13 +3032,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, /* * If from_vmentry is false, this is being called from state restore (either RSM * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume. -+ * -+ * Returns: -+ * 0 - success, i.e. proceed with actual VMEnter -+ * 1 - consistency check VMExit -+ * -1 - consistency check VMFail + * + * Returns: + * NVMX_ENTRY_SUCCESS: Entered VMX non-root mode + * NVMX_ENTRY_VMFAIL: Consistency check VMFail + * NVMX_ENTRY_VMEXIT: Consistency check VMExit + * NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error */ -int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) +enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, + bool from_vmentry) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); @@ -3081,11 +3083,12 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) prepare_vmcs02_early(vmx, vmcs12); if (from_vmentry) { - nested_get_vmcs12_pages(vcpu); + if (unlikely(!nested_get_vmcs12_pages(vcpu))) + return NVMX_VMENTRY_KVM_INTERNAL_ERROR; if (nested_vmx_check_vmentry_hw(vcpu)) { vmx_switch_vmcs(vcpu, &vmx->vmcs01); - return -1; + return NVMX_VMENTRY_VMFAIL; } if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) @@ -3149,7 +3152,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) * returned as far as L1 is concerned. It will only return (and set * the success flag) when L2 exits (see nested_vmx_vmexit()). */ - return 0; + return NVMX_VMENTRY_SUCCESS; /* * A failed consistency check that leads to a VMExit during L1's @@ -3165,14 +3168,14 @@ vmentry_fail_vmexit: vmx_switch_vmcs(vcpu, &vmx->vmcs01); if (!from_vmentry) - return 1; + return NVMX_VMENTRY_VMEXIT; load_vmcs12_host_state(vcpu, vmcs12); vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY; vmcs12->exit_qualification = exit_qual; if (enable_shadow_vmcs || vmx->nested.hv_evmcs) vmx->nested.need_vmcs12_to_shadow_sync = true; - return 1; + return NVMX_VMENTRY_VMEXIT; } /* @@ -3182,9 +3185,9 @@ vmentry_fail_vmexit: static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) { struct vmcs12 *vmcs12; + enum nvmx_vmentry_status status; struct vcpu_vmx *vmx = to_vmx(vcpu); u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu); - int ret; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -3244,13 +3247,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * the nested entry. */ vmx->nested.nested_run_pending = 1; - ret = nested_vmx_enter_non_root_mode(vcpu, true); - vmx->nested.nested_run_pending = !ret; - if (ret > 0) - return 1; - else if (ret) - return nested_vmx_failValid(vcpu, - VMXERR_ENTRY_INVALID_CONTROL_FIELD); + status = nested_vmx_enter_non_root_mode(vcpu, true); + if (unlikely(status != NVMX_VMENTRY_SUCCESS)) + goto vmentry_failed; /* Hide L1D cache contents from the nested guest. */ vmx->vcpu.arch.l1tf_flush_l1d = true; @@ -3281,6 +3280,15 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return kvm_vcpu_halt(vcpu); } return 1; + +vmentry_failed: + vmx->nested.nested_run_pending = 0; + if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR) + return 0; + if (status == NVMX_VMENTRY_VMEXIT) + return 1; + WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL); + return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); } /* diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 187d39bf0bf1..6280f33e5fa6 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -6,6 +6,16 @@ #include "vmcs12.h" #include "vmx.h" +/* + * Status returned by nested_vmx_enter_non_root_mode(): + */ +enum nvmx_vmentry_status { + NVMX_VMENTRY_SUCCESS, /* Entered VMX non-root mode */ + NVMX_VMENTRY_VMFAIL, /* Consistency check VMFail */ + NVMX_VMENTRY_VMEXIT, /* Consistency check VMExit */ + NVMX_VMENTRY_KVM_INTERNAL_ERROR,/* KVM internal error */ +}; + void vmx_leave_nested(struct kvm_vcpu *vcpu); void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, bool apicv); @@ -13,7 +23,8 @@ void nested_vmx_hardware_unsetup(void); __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); void nested_vmx_vcpu_setup(void); void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu); -int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry); +enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, + bool from_vmentry); bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason); void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, u32 exit_intr_info, unsigned long exit_qualification); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5863c38108d9..ff395f812719 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7938,8 +7938,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = false; if (kvm_request_pending(vcpu)) { - if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) - kvm_x86_ops->get_vmcs12_pages(vcpu); + if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) { + if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) { + r = 0; + goto out; + } + } if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) kvm_mmu_unload(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) From 4750c212174892d26645cdf5ad73fb0e9d594ed3 Mon Sep 17 00:00:00 2001 From: Pan Xiuli Date: Tue, 22 Oct 2019 14:44:02 -0500 Subject: [PATCH 528/572] ALSA: hda: Add Tigerlake/Jasperlake PCI ID Add HD Audio Device PCI ID for the Intel Tigerlake and Jasperlake platform. Signed-off-by: Pan Xiuli Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191022194402.23178-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 240f4ca76391..a815bc811799 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2399,6 +2399,12 @@ static const struct pci_device_id azx_ids[] = { /* Icelake */ { PCI_DEVICE(0x8086, 0x34c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* Jasperlake */ + { PCI_DEVICE(0x8086, 0x38c8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* Tigerlake */ + { PCI_DEVICE(0x8086, 0xa0c8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Elkhart Lake */ { PCI_DEVICE(0x8086, 0x4b55), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, From b19c23551be8de0d4e59fe6af70f10763e3cc595 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 18 Oct 2019 14:11:58 +0530 Subject: [PATCH 529/572] opp: Reinitialize the list_kref before adding the static OPPs again The list_kref reaches a count of 0 when all the static OPPs are removed, for example when dev_pm_opp_of_cpumask_remove_table() is called, though the actual OPP table may not get freed as it may still be referenced by other parts of the kernel, like from a call to dev_pm_opp_set_supported_hw(). And if we call dev_pm_opp_of_cpumask_add_table() again at this point, we must reinitialize the list_kref otherwise the kernel will hit a WARN() in kref infrastructure for incrementing a kref with value 0. Fixes: 11e1a1648298 ("opp: Don't decrement uninitialized list_kref") Reported-by: Dmitry Osipenko Tested-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 6dc41faf74b5..1cbb58240b80 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -663,6 +663,13 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) return 0; } + /* + * Re-initialize list_kref every time we add static OPPs to the OPP + * table as the reference count may be 0 after the last tie static OPPs + * were removed. + */ + kref_init(&opp_table->list_kref); + /* We have opp-table node now, iterate over it and add OPPs */ for_each_available_child_of_node(opp_table->np, np) { opp = _opp_add_static_v2(opp_table, dev, np); From 6370740e5f8ef12de7f9a9bf48a0393d202cd827 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 21 Oct 2019 09:29:20 -0700 Subject: [PATCH 530/572] fs/dax: Fix pmd vs pte conflict detection Users reported a v5.3 performance regression and inability to establish huge page mappings. A revised version of the ndctl "dax.sh" huge page unit test identifies commit 23c84eb78375 "dax: Fix missed wakeup with PMD faults" as the source. Update get_unlocked_entry() to check for NULL entries before checking the entry order, otherwise NULL is misinterpreted as a present pte conflict. The 'order' check needs to happen before the locked check as an unlocked entry at the wrong order must fallback to lookup the correct order. Reported-by: Jeff Smits Reported-by: Doug Nelson Cc: Fixes: 23c84eb78375 ("dax: Fix missed wakeup with PMD faults") Reviewed-by: Jan Kara Cc: Jeff Moyer Cc: Matthew Wilcox (Oracle) Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/157167532455.3945484.11971474077040503994.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- fs/dax.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 6bf81f931de3..2cc43cd914eb 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -220,10 +220,11 @@ static void *get_unlocked_entry(struct xa_state *xas, unsigned int order) for (;;) { entry = xas_find_conflict(xas); + if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) + return entry; if (dax_entry_order(entry) < order) return XA_RETRY_ENTRY; - if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) || - !dax_is_locked(entry)) + if (!dax_is_locked(entry)) return entry; wq = dax_entry_waitqueue(xas, entry, &ewait.key); From 09684950050be09ed6cd591e6fbf0c71d3473237 Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Mon, 21 Oct 2019 15:34:26 +0200 Subject: [PATCH 531/572] scripts/nsdeps: use alternative sed delimiter When doing an out of tree build with O=, the nsdeps script constructs the absolute pathname of the module source file so that it can insert MODULE_IMPORT_NS statements in the right place. However, ${srctree} contains an unescaped path to the source tree, which, when used in a sed substitution, makes sed complain: ++ sed 's/[^ ]* *//home/jeyu/jeyu-linux\/&/g' sed: -e expression #1, char 12: unknown option to `s' The sed substitution command 's' ends prematurely with the forward slashes in the pathname, and sed errors out when it encounters the 'h', which is an invalid sed substitution option. To avoid escaping forward slashes ${srctree}, we can use '|' as an alternative delimiter for sed instead to avoid this error. Reviewed-by: Masahiro Yamada Reviewed-by: Matthias Maennich Tested-by: Matthias Maennich Signed-off-by: Jessica Yu --- scripts/nsdeps | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/nsdeps b/scripts/nsdeps index 3754dac13b31..dda6fbac016e 100644 --- a/scripts/nsdeps +++ b/scripts/nsdeps @@ -33,7 +33,7 @@ generate_deps() { if [ ! -f "$ns_deps_file" ]; then return; fi local mod_source_files=`cat $mod_file | sed -n 1p \ | sed -e 's/\.o/\.c/g' \ - | sed "s/[^ ]* */${srctree}\/&/g"` + | sed "s|[^ ]* *|${srctree}/&|g"` for ns in `cat $ns_deps_file`; do echo "Adding namespace $ns to module $mod_name (if needed)." generate_deps_for_ns $ns $mod_source_files From 1638b8f096ca165965189b9626564c933c79fe63 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 21 Oct 2019 12:07:15 +0200 Subject: [PATCH 532/572] lib/vdso: Make clock_getres() POSIX compliant again A recent commit removed the NULL pointer check from the clock_getres() implementation causing a test case to fault. POSIX requires an explicit NULL pointer check for clock_getres() aside of the validity check of the clock_id argument for obscure reasons. Add it back for both 32bit and 64bit. Note, this is only a partial revert of the offending commit which does not bring back the broken fallback invocation in the the 32bit compat implementations of clock_getres() and clock_gettime(). Fixes: a9446a906f52 ("lib/vdso/32: Remove inconsistent NULL pointer checks") Reported-by: Andreas Schwab Signed-off-by: Thomas Gleixner Tested-by: Christophe Leroy Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1910211202260.1904@nanos.tec.linutronix.de --- lib/vdso/gettimeofday.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index e630e7ff57f1..45f57fd2db64 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -214,9 +214,10 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res) return -1; } - res->tv_sec = 0; - res->tv_nsec = ns; - + if (likely(res)) { + res->tv_sec = 0; + res->tv_nsec = ns; + } return 0; } @@ -245,7 +246,7 @@ __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) ret = clock_getres_fallback(clock, &ts); #endif - if (likely(!ret)) { + if (likely(!ret && res)) { res->tv_sec = ts.tv_sec; res->tv_nsec = ts.tv_nsec; } From 086ee46b08634a999bcd1707eabe3b0dc1806674 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Tue, 22 Oct 2019 14:12:26 +0100 Subject: [PATCH 533/572] timers/sched_clock: Include local timekeeping.h for missing declarations Include the timekeeping.h header to get the declaration of the sched_clock_{suspend,resume} functions. Fixes the following sparse warnings: kernel/time/sched_clock.c:275:5: warning: symbol 'sched_clock_suspend' was not declared. Should it be static? kernel/time/sched_clock.c:286:6: warning: symbol 'sched_clock_resume' was not declared. Should it be static? Signed-off-by: Ben Dooks (Codethink) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20191022131226.11465-1-ben.dooks@codethink.co.uk --- kernel/time/sched_clock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 142b07619918..dbd69052eaa6 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -17,6 +17,8 @@ #include #include +#include "timekeeping.h" + /** * struct clock_read_data - data required to read from sched_clock() * From 7f2cbcbcafbca5360efbd175b3320852b8f7c637 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 21 Oct 2019 15:44:12 +0800 Subject: [PATCH 534/572] posix-cpu-timers: Fix two trivial comments Recent changes modified the function arguments of thread_group_sample_cputime() and task_cputimers_expired(), but forgot to update the comments. Fix it up. [ tglx: Changed the argument name of task_cputimers_expired() as the pointer points to an array of samples. ] Fixes: b7be4ef1365d ("posix-cpu-timers: Switch thread group sampling to array") Fixes: 001f7971433a ("posix-cpu-timers: Make expiry checks array based") Signed-off-by: Yi Wang Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/1571643852-21848-1-git-send-email-wang.yi59@zte.com.cn --- kernel/time/posix-cpu-timers.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 92a431981b1c..42d512fcfda2 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -266,7 +266,7 @@ static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, /** * thread_group_sample_cputime - Sample cputime for a given task * @tsk: Task for which cputime needs to be started - * @iimes: Storage for time samples + * @samples: Storage for time samples * * Called from sys_getitimer() to calculate the expiry time of an active * timer. That means group cputime accounting is already active. Called @@ -1038,12 +1038,12 @@ unlock: * member of @pct->bases[CLK].nextevt. False otherwise */ static inline bool -task_cputimers_expired(const u64 *sample, struct posix_cputimers *pct) +task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct) { int i; for (i = 0; i < CPUCLOCK_MAX; i++) { - if (sample[i] >= pct->bases[i].nextevt) + if (samples[i] >= pct->bases[i].nextevt) return true; } return false; From 0d660ffbca1a5f16f6db8f6ccbea5c207ec7e361 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 18 Oct 2019 10:18:35 -0700 Subject: [PATCH 535/572] MAINTAINERS: Remove Gregory and Brian for ARCH_BRCMSTB The last time Gregory and Brian did a review was sometime around 2015, since then, they have not been active for ARCH_BRCMSTB changes. Following the position of other maintainers and Harald Welte's position here: [1] http://laforge.gnumonks.org/blog/20180307-mchardy-gpl/ remove both of them. Acked-by: Brian Norris Signed-off-by: Florian Fainelli --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 14a939a9e1e7..e3f50d8ccac0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3242,8 +3242,6 @@ S: Maintained F: drivers/usb/gadget/udc/bcm63xx_udc.* BROADCOM BCM7XXX ARM ARCHITECTURE -M: Brian Norris -M: Gregory Fong M: Florian Fainelli M: bcm-kernel-feedback-list@broadcom.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) From 9af865d95bd730c1d1035acd5dd6df105da98d0c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 24 Sep 2019 14:37:56 -0500 Subject: [PATCH 536/572] dt-bindings: riscv: Fix CPU schema errors Fix the errors in the RiscV CPU DT schema: Documentation/devicetree/bindings/riscv/cpus.example.dt.yaml: cpu@0: 'timebase-frequency' is a required property Documentation/devicetree/bindings/riscv/cpus.example.dt.yaml: cpu@1: 'timebase-frequency' is a required property Documentation/devicetree/bindings/riscv/cpus.example.dt.yaml: cpu@0: compatible:0: 'riscv' is not one of ['sifive,rocket0', 'sifive,e5', 'sifive,e51', 'sifive,u54-mc', 'sifive,u54', 'sifive,u5'] Documentation/devicetree/bindings/riscv/cpus.example.dt.yaml: cpu@0: compatible: ['riscv'] is too short Documentation/devicetree/bindings/riscv/cpus.example.dt.yaml: cpu@0: 'timebase-frequency' is a required property The DT spec allows for 'timebase-frequency' to be in 'cpu' or 'cpus' node and RiscV requires it in /cpus node, so make it disallowed in cpu nodes. Fixes: 4fd669a8c487 ("dt-bindings: riscv: convert cpu binding to json-schema") Cc: Palmer Dabbelt Cc: Albert Ou Cc: linux-riscv@lists.infradead.org Acked-by: Paul Walmsley Signed-off-by: Rob Herring --- .../devicetree/bindings/riscv/cpus.yaml | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index b261a3015f84..04819ad379c2 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -24,15 +24,17 @@ description: | properties: compatible: - items: - - enum: - - sifive,rocket0 - - sifive,e5 - - sifive,e51 - - sifive,u54-mc - - sifive,u54 - - sifive,u5 - - const: riscv + oneOf: + - items: + - enum: + - sifive,rocket0 + - sifive,e5 + - sifive,e51 + - sifive,u54-mc + - sifive,u54 + - sifive,u5 + - const: riscv + - const: riscv # Simulator only description: Identifies that the hart uses the RISC-V instruction set and identifies the type of the hart. @@ -66,12 +68,8 @@ properties: insensitive, letters in the riscv,isa string must be all lowercase to simplify parsing. - timebase-frequency: - type: integer - minimum: 1 - description: - Specifies the clock frequency of the system timer in Hz. - This value is common to all harts on a single system image. + # RISC-V requires 'timebase-frequency' in /cpus, so disallow it here + timebase-frequency: false interrupt-controller: type: object @@ -93,7 +91,6 @@ properties: required: - riscv,isa - - timebase-frequency - interrupt-controller examples: From e13de8fe0d6a51341671bbe384826d527afe8d44 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 4 Oct 2019 13:58:43 -0500 Subject: [PATCH 537/572] of: unittest: fix memory leak in unittest_data_add In unittest_data_add, a copy buffer is created via kmemdup. This buffer is leaked if of_fdt_unflatten_tree fails. The release for the unittest_data buffer is added. Fixes: b951f9dc7f25 ("Enabling OF selftest to run without machine's devicetree") Signed-off-by: Navid Emamdoost Reviewed-by: Frank Rowand Signed-off-by: Rob Herring --- drivers/of/unittest.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 480a21e2ed39..92e895d86458 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -1207,6 +1207,7 @@ static int __init unittest_data_add(void) of_fdt_unflatten_tree(unittest_data, NULL, &unittest_data_node); if (!unittest_data_node) { pr_warn("%s: No tree to attach; not running tests\n", __func__); + kfree(unittest_data); return -ENODATA; } From 5dba51754b04a941a1064f584e7a7f607df3f9bc Mon Sep 17 00:00:00 2001 From: Chris Goldsworthy Date: Sat, 19 Oct 2019 18:57:24 -0700 Subject: [PATCH 538/572] of: reserved_mem: add missing of_node_put() for proper ref-counting Commit d698a388146c ("of: reserved-memory: ignore disabled memory-region nodes") added an early return in of_reserved_mem_device_init_by_idx(), but didn't call of_node_put() on a device_node whose ref-count was incremented in the call to of_parse_phandle() preceding the early exit. Fixes: d698a388146c ("of: reserved-memory: ignore disabled memory-region nodes") Signed-off-by: Chris Goldsworthy Cc: stable@vger.kernel.org Reviewed-by: Bjorn Andersson Signed-off-by: Rob Herring --- drivers/of/of_reserved_mem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 7989703b883c..6bd610ee2cd7 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -324,8 +324,10 @@ int of_reserved_mem_device_init_by_idx(struct device *dev, if (!target) return -ENODEV; - if (!of_device_is_available(target)) + if (!of_device_is_available(target)) { + of_node_put(target); return 0; + } rmem = __find_rmem(target); of_node_put(target); From 90db7b220c9aea91948e82d51b99514f9398f9b5 Mon Sep 17 00:00:00 2001 From: David Abdurachmanov Date: Tue, 22 Oct 2019 19:21:35 +0300 Subject: [PATCH 539/572] riscv: fix fs/proc/kcore.c compilation with sparsemem enabled Failed to compile Fedora/RISCV kernel (5.4-rc3+) with sparsemem enabled: fs/proc/kcore.c: In function 'read_kcore': fs/proc/kcore.c:510:8: error: implicit declaration of function 'kern_addr_valid'; did you mean 'virt_addr_valid'? [-Werror=implicit-function-declaration] 510 | if (kern_addr_valid(start)) { | ^~~~~~~~~~~~~~~ | virt_addr_valid Looking at other architectures I don't see kern_addr_valid being guarded by CONFIG_FLATMEM. Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") Signed-off-by: David Abdurachmanov Tested-by: David Abdurachmanov Reviewed-by: Logan Gunthorpe Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/pgtable.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 42292d99cc74..7110879358b8 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -428,9 +428,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#ifdef CONFIG_FLATMEM #define kern_addr_valid(addr) (1) /* FIXME */ -#endif extern void *dtb_early_va; extern void setup_bootmem(void); From 62103ece52360992a6799e9ec85628166a8477e8 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 23 Oct 2019 11:23:01 +0800 Subject: [PATCH 540/572] riscv: Fix implicit declaration of 'page_to_section' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With CONFIG_SPARSEMEM and !CONFIG_SPARSEMEM_VMEMMAP, arch/riscv/include/asm/pgtable.h: In function ‘mk_pte’: include/asm-generic/memory_model.h:64:14: error: implicit declaration of function ‘page_to_section’; did you mean ‘present_section’? [-Werror=implicit-function-declaration] int __sec = page_to_section(__pg); \ ^~~~~~~~~~~~~~~ Fixed by changing mk_pte() from inline function to macro. Cc: Logan Gunthorpe Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") Signed-off-by: Kefeng Wang [paul.walmsley@sifive.com: fixed checkpatch errors] Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/pgtable.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7110879358b8..0352f20c29f4 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -184,10 +184,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) return __pte((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); } -static inline pte_t mk_pte(struct page *page, pgprot_t prot) -{ - return pfn_pte(page_to_pfn(page), prot); -} +#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) From 9fe57d8c575d7dc012bbfcf5698048304206a99d Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 23 Oct 2019 11:23:02 +0800 Subject: [PATCH 541/572] riscv: Fix undefined reference to vmemmap_populate_basepages Using CONFIG_SPARSEMEM_VMEMMAP instead of CONFIG_SPARSEMEM to fix following build issue. riscv64-linux-ld: arch/riscv/mm/init.o: in function 'vmemmap_populate': init.c:(.meminit.text+0x8): undefined reference to 'vmemmap_populate_basepages' Cc: Logan Gunthorpe Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") Signed-off-by: Kefeng Wang Reviewed-by: Logan Gunthorpe Signed-off-by: Paul Walmsley --- arch/riscv/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 83f7d12042fb..a1ca6200c31f 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -458,7 +458,7 @@ void __init paging_init(void) zone_sizes_init(); } -#ifdef CONFIG_SPARSEMEM +#ifdef CONFIG_SPARSEMEM_VMEMMAP int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { From a6d9e2672609653146cd9f4063f3d30619828787 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Oct 2019 19:37:29 +0200 Subject: [PATCH 542/572] riscv: cleanup Remove various not required ifdefs and externs. Signed-off-by: Christoph Hellwig Reviewed-by: Anup Patel Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/bug.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index 07ceee8b1747..75604fec1b1b 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -12,7 +12,6 @@ #include -#ifdef CONFIG_GENERIC_BUG #define __INSN_LENGTH_MASK _UL(0x3) #define __INSN_LENGTH_32 _UL(0x3) #define __COMPRESSED_INSN_MASK _UL(0xffff) @@ -20,7 +19,6 @@ #define __BUG_INSN_32 _UL(0x00100073) /* ebreak */ #define __BUG_INSN_16 _UL(0x9002) /* c.ebreak */ -#ifndef __ASSEMBLY__ typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS @@ -43,6 +41,7 @@ typedef u32 bug_insn_t; RISCV_SHORT " %2" #endif +#ifdef CONFIG_GENERIC_BUG #define __BUG_FLAGS(flags) \ do { \ __asm__ __volatile__ ( \ @@ -58,14 +57,10 @@ do { \ "i" (flags), \ "i" (sizeof(struct bug_entry))); \ } while (0) - -#endif /* !__ASSEMBLY__ */ #else /* CONFIG_GENERIC_BUG */ -#ifndef __ASSEMBLY__ #define __BUG_FLAGS(flags) do { \ __asm__ __volatile__ ("ebreak\n"); \ } while (0) -#endif /* !__ASSEMBLY__ */ #endif /* CONFIG_GENERIC_BUG */ #define BUG() do { \ @@ -79,15 +74,10 @@ do { \ #include -#ifndef __ASSEMBLY__ - struct pt_regs; struct task_struct; -extern void die(struct pt_regs *regs, const char *str); -extern void do_trap(struct pt_regs *regs, int signo, int code, - unsigned long addr); - -#endif /* !__ASSEMBLY__ */ +void die(struct pt_regs *regs, const char *str); +void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr); #endif /* _ASM_RISCV_BUG_H */ From d4267a57d384334c4b4937cac292bfb150439fc8 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 24 Oct 2019 10:05:10 +0800 Subject: [PATCH 543/572] MAINTAINERS: Update the Spreadtrum SoC maintainer Change my email address, and add more Spreadtrum SC27xx series PMIC drivers to maintain. Link: https://lore.kernel.org/r/a48483d13243450ecf3b777d49e741b6367f2c6b.1571881956.git.baolin.wang@linaro.org Signed-off-by: Baolin Wang Signed-off-by: Olof Johansson --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 296de2b51c83..baac17ad16d6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2327,11 +2327,13 @@ F: drivers/edac/altera_edac. ARM/SPREADTRUM SoC SUPPORT M: Orson Zhai -M: Baolin Wang +M: Baolin Wang M: Chunyan Zhang S: Maintained F: arch/arm64/boot/dts/sprd N: sprd +N: sc27xx +N: sc2731 ARM/STI ARCHITECTURE M: Patrice Chotard From e4f5cb1a9b27c0f94ef4f5a0178a3fde2d3d0e9e Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 22 Oct 2019 21:11:00 +0200 Subject: [PATCH 544/572] MIPS: bmips: mark exception vectors as char arrays The vectors span more than one byte, so mark them as arrays. Fixes the following build error when building when using GCC 8.3: In file included from ./include/linux/string.h:19, from ./include/linux/bitmap.h:9, from ./include/linux/cpumask.h:12, from ./arch/mips/include/asm/processor.h:15, from ./arch/mips/include/asm/thread_info.h:16, from ./include/linux/thread_info.h:38, from ./include/asm-generic/preempt.h:5, from ./arch/mips/include/generated/asm/preempt.h:1, from ./include/linux/preempt.h:81, from ./include/linux/spinlock.h:51, from ./include/linux/mmzone.h:8, from ./include/linux/bootmem.h:8, from arch/mips/bcm63xx/prom.c:10: arch/mips/bcm63xx/prom.c: In function 'prom_init': ./arch/mips/include/asm/string.h:162:11: error: '__builtin_memcpy' forming offset [2, 32] is out of the bounds [0, 1] of object 'bmips_smp_movevec' with type 'char' [-Werror=array-bounds] __ret = __builtin_memcpy((dst), (src), __len); \ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/mips/bcm63xx/prom.c:97:3: note: in expansion of macro 'memcpy' memcpy((void *)0xa0000200, &bmips_smp_movevec, 0x20); ^~~~~~ In file included from arch/mips/bcm63xx/prom.c:14: ./arch/mips/include/asm/bmips.h:80:13: note: 'bmips_smp_movevec' declared here extern char bmips_smp_movevec; Fixes: 18a1eef92dcd ("MIPS: BMIPS: Introduce bmips.h") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle Cc: James Hogan --- arch/mips/bcm63xx/prom.c | 2 +- arch/mips/include/asm/bmips.h | 10 +++++----- arch/mips/kernel/smp-bmips.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c index 77a836e661c9..df69eaa453a1 100644 --- a/arch/mips/bcm63xx/prom.c +++ b/arch/mips/bcm63xx/prom.c @@ -84,7 +84,7 @@ void __init prom_init(void) * Here we will start up CPU1 in the background and ask it to * reconfigure itself then go back to sleep. */ - memcpy((void *)0xa0000200, &bmips_smp_movevec, 0x20); + memcpy((void *)0xa0000200, bmips_smp_movevec, 0x20); __sync(); set_c0_cause(C_SW0); cpumask_set_cpu(1, &bmips_booted_mask); diff --git a/arch/mips/include/asm/bmips.h b/arch/mips/include/asm/bmips.h index bf6a8afd7ad2..581a6a3c66e4 100644 --- a/arch/mips/include/asm/bmips.h +++ b/arch/mips/include/asm/bmips.h @@ -75,11 +75,11 @@ static inline int register_bmips_smp_ops(void) #endif } -extern char bmips_reset_nmi_vec; -extern char bmips_reset_nmi_vec_end; -extern char bmips_smp_movevec; -extern char bmips_smp_int_vec; -extern char bmips_smp_int_vec_end; +extern char bmips_reset_nmi_vec[]; +extern char bmips_reset_nmi_vec_end[]; +extern char bmips_smp_movevec[]; +extern char bmips_smp_int_vec[]; +extern char bmips_smp_int_vec_end[]; extern int bmips_smp_enabled; extern int bmips_cpu_offset; diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 76fae9b79f13..712c15de6ab9 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -464,10 +464,10 @@ static void bmips_wr_vec(unsigned long dst, char *start, char *end) static inline void bmips_nmi_handler_setup(void) { - bmips_wr_vec(BMIPS_NMI_RESET_VEC, &bmips_reset_nmi_vec, - &bmips_reset_nmi_vec_end); - bmips_wr_vec(BMIPS_WARM_RESTART_VEC, &bmips_smp_int_vec, - &bmips_smp_int_vec_end); + bmips_wr_vec(BMIPS_NMI_RESET_VEC, bmips_reset_nmi_vec, + bmips_reset_nmi_vec_end); + bmips_wr_vec(BMIPS_WARM_RESTART_VEC, bmips_smp_int_vec, + bmips_smp_int_vec_end); } struct reset_vec_info { From bc808bced39f4e4b626c5ea8c63d5e41fce7205a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 22 Oct 2019 13:14:37 -0600 Subject: [PATCH 545/572] io_uring: revert "io_uring: optimize submit_and_wait API" There are cases where it isn't always safe to block for submission, even if the caller asked to wait for events as well. Revert the previous optimization of doing that. This reverts two commits: bf7ec93c644cb c576666863b78 Fixes: c576666863b78 ("io_uring: optimize submit_and_wait API") Signed-off-by: Jens Axboe --- fs/io_uring.c | 63 ++++++++++++++------------------------------------- 1 file changed, 17 insertions(+), 46 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 67dbe0201e0d..08c2c428e212 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2292,11 +2292,11 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, } static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, - struct sqe_submit *s, bool force_nonblock) + struct sqe_submit *s) { int ret; - ret = __io_submit_sqe(ctx, req, s, force_nonblock); + ret = __io_submit_sqe(ctx, req, s, true); /* * We async punt it if the file wasn't marked NOWAIT, or if the file @@ -2343,7 +2343,7 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, } static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, - struct sqe_submit *s, bool force_nonblock) + struct sqe_submit *s) { int ret; @@ -2356,18 +2356,17 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, return 0; } - return __io_queue_sqe(ctx, req, s, force_nonblock); + return __io_queue_sqe(ctx, req, s); } static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req, - struct sqe_submit *s, struct io_kiocb *shadow, - bool force_nonblock) + struct sqe_submit *s, struct io_kiocb *shadow) { int ret; int need_submit = false; if (!shadow) - return io_queue_sqe(ctx, req, s, force_nonblock); + return io_queue_sqe(ctx, req, s); /* * Mark the first IO in link list as DRAIN, let all the following @@ -2396,7 +2395,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req, spin_unlock_irq(&ctx->completion_lock); if (need_submit) - return __io_queue_sqe(ctx, req, s, force_nonblock); + return __io_queue_sqe(ctx, req, s); return 0; } @@ -2404,8 +2403,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req, #define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK) static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, - struct io_submit_state *state, struct io_kiocb **link, - bool force_nonblock) + struct io_submit_state *state, struct io_kiocb **link) { struct io_uring_sqe *sqe_copy; struct io_kiocb *req; @@ -2458,7 +2456,7 @@ err: INIT_LIST_HEAD(&req->link_list); *link = req; } else { - io_queue_sqe(ctx, req, s, force_nonblock); + io_queue_sqe(ctx, req, s); } } @@ -2562,8 +2560,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, * that's the end of the chain. Submit the previous link. */ if (!prev_was_link && link) { - io_queue_link_head(ctx, link, &link->submit, shadow_req, - true); + io_queue_link_head(ctx, link, &link->submit, shadow_req); link = NULL; shadow_req = NULL; } @@ -2588,13 +2585,13 @@ out: sqes[i].has_user = has_user; sqes[i].needs_lock = true; sqes[i].needs_fixed_file = true; - io_submit_sqe(ctx, &sqes[i], statep, &link, true); + io_submit_sqe(ctx, &sqes[i], statep, &link); submitted++; } } if (link) - io_queue_link_head(ctx, link, &link->submit, shadow_req, true); + io_queue_link_head(ctx, link, &link->submit, shadow_req); if (statep) io_submit_state_end(&state); @@ -2726,8 +2723,7 @@ static int io_sq_thread(void *data) return 0; } -static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit, - bool block_for_last) +static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) { struct io_submit_state state, *statep = NULL; struct io_kiocb *link = NULL; @@ -2741,7 +2737,6 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit, } for (i = 0; i < to_submit; i++) { - bool force_nonblock = true; struct sqe_submit s; if (!io_get_sqring(ctx, &s)) @@ -2752,8 +2747,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit, * that's the end of the chain. Submit the previous link. */ if (!prev_was_link && link) { - io_queue_link_head(ctx, link, &link->submit, shadow_req, - force_nonblock); + io_queue_link_head(ctx, link, &link->submit, shadow_req); link = NULL; shadow_req = NULL; } @@ -2775,24 +2769,12 @@ out: s.needs_lock = false; s.needs_fixed_file = false; submit++; - - /* - * The caller will block for events after submit, submit the - * last IO non-blocking. This is either the only IO it's - * submitting, or it already submitted the previous ones. This - * improves performance by avoiding an async punt that we don't - * need to do. - */ - if (block_for_last && submit == to_submit) - force_nonblock = false; - - io_submit_sqe(ctx, &s, statep, &link, force_nonblock); + io_submit_sqe(ctx, &s, statep, &link); } io_commit_sqring(ctx); if (link) - io_queue_link_head(ctx, link, &link->submit, shadow_req, - !block_for_last); + io_queue_link_head(ctx, link, &link->submit, shadow_req); if (statep) io_submit_state_end(statep); @@ -3636,21 +3618,10 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, wake_up(&ctx->sqo_wait); submitted = to_submit; } else if (to_submit) { - bool block_for_last = false; - to_submit = min(to_submit, ctx->sq_entries); - /* - * Allow last submission to block in a series, IFF the caller - * asked to wait for events and we don't currently have - * enough. This potentially avoids an async punt. - */ - if (to_submit == min_complete && - io_cqring_events(ctx->rings) < min_complete) - block_for_last = true; - mutex_lock(&ctx->uring_lock); - submitted = io_ring_submit(ctx, to_submit, block_for_last); + submitted = io_ring_submit(ctx, to_submit); mutex_unlock(&ctx->uring_lock); } if (flags & IORING_ENTER_GETEVENTS) { From ef03681ae8df770745978148a7fb84796ae99cba Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 23 Oct 2019 15:10:08 +0800 Subject: [PATCH 546/572] io_uring : correct timeout req sequence when waiting timeout The sequence number of reqs on the timeout_list before the timeout req should be adjusted in io_timeout_fn(), because the current timeout req will consumes a slot in the cq_ring and cq_tail pointer will be increased, otherwise other timeout reqs may return in advance without waiting for enough wait_nr. Signed-off-by: zhangyi (F) Signed-off-by: Jens Axboe --- fs/io_uring.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 08c2c428e212..b65a68582a7c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1877,7 +1877,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) { struct io_ring_ctx *ctx; - struct io_kiocb *req; + struct io_kiocb *req, *prev; unsigned long flags; req = container_of(timer, struct io_kiocb, timeout.timer); @@ -1885,6 +1885,15 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) atomic_inc(&ctx->cq_timeouts); spin_lock_irqsave(&ctx->completion_lock, flags); + /* + * Adjust the reqs sequence before the current one because it + * will consume a slot in the cq_ring and the the cq_tail pointer + * will be increased, otherwise other timeout reqs may return in + * advance without waiting for enough wait_nr. + */ + prev = req; + list_for_each_entry_continue_reverse(prev, &ctx->timeout_list, list) + prev->sequence++; list_del(&req->list); io_cqring_fill_event(ctx, req->user_data, -ETIME); From a1f58ba46f794b1168d1107befcf3d4b9f9fd453 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 23 Oct 2019 15:10:09 +0800 Subject: [PATCH 547/572] io_uring: correct timeout req sequence when inserting a new entry The sequence number of the timeout req (req->sequence) indicate the expected completion request. Because of each timeout req consume a sequence number, so the sequence of each timeout req on the timeout list shouldn't be the same. But now, we may get the same number (also incorrect) if we insert a new entry before the last one, such as submit such two timeout reqs on a new ring instance below. req->sequence req_1 (count = 2): 2 req_2 (count = 1): 2 Then, if we submit a nop req, req_2 will still timeout even the nop req finished. This patch fix this problem by adjust the sequence number of each reordered reqs when inserting a new entry. Signed-off-by: zhangyi (F) Signed-off-by: Jens Axboe --- fs/io_uring.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b65a68582a7c..1b46c72f8975 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1912,6 +1912,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_ring_ctx *ctx = req->ctx; struct list_head *entry; struct timespec64 ts; + unsigned span = 0; if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; @@ -1960,9 +1961,17 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (ctx->cached_sq_head < nxt_sq_head) tmp += UINT_MAX; - if (tmp >= tmp_nxt) + if (tmp > tmp_nxt) break; + + /* + * Sequence of reqs after the insert one and itself should + * be adjusted because each timeout req consumes a slot. + */ + span++; + nxt->sequence++; } + req->sequence -= span; list_add(&req->list, entry); spin_unlock_irq(&ctx->completion_lock); From b42aa3fd5957e4daf4b69129e5ce752a2a53e7d6 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 18 Oct 2019 15:38:48 -0700 Subject: [PATCH 548/572] MIPS: tlbex: Fix build_restore_pagemask KScratch restore build_restore_pagemask() will restore the value of register $1/$at when its restore_scratch argument is non-zero, and aims to do so by filling a branch delay slot. Commit 0b24cae4d535 ("MIPS: Add missing EHB in mtc0 -> mfc0 sequence.") added an EHB instruction (Execution Hazard Barrier) prior to restoring $1 from a KScratch register, in order to resolve a hazard that can result in stale values of the KScratch register being observed. In particular, P-class CPUs from MIPS with out of order execution pipelines such as the P5600 & P6600 are affected. Unfortunately this EHB instruction was inserted in the branch delay slot causing the MFC0 instruction which performs the restoration to no longer execute along with the branch. The result is that the $1 register isn't actually restored, ie. the TLB refill exception handler clobbers it - which is exactly the problem the EHB is meant to avoid for the P-class CPUs. Similarly build_get_pgd_vmalloc() will restore the value of $1/$at when its mode argument equals refill_scratch, and suffers from the same problem. Fix this by in both cases moving the EHB earlier in the emitted code. There's no reason it needs to immediately precede the MFC0 - it simply needs to be between the MTC0 & MFC0. This bug only affects Cavium Octeon systems which use build_fast_tlb_refill_handler(). Signed-off-by: Paul Burton Fixes: 0b24cae4d535 ("MIPS: Add missing EHB in mtc0 -> mfc0 sequence.") Cc: Dmitry Korotin Cc: stable@vger.kernel.org # v3.15+ Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- arch/mips/mm/tlbex.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index e01cb33bfa1a..41bb91f05688 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -653,6 +653,13 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, int restore_scratch) { if (restore_scratch) { + /* + * Ensure the MFC0 below observes the value written to the + * KScratch register by the prior MTC0. + */ + if (scratch_reg >= 0) + uasm_i_ehb(p); + /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); @@ -667,12 +674,10 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, uasm_i_mtc0(p, 0, C0_PAGEMASK); uasm_il_b(p, r, lid); } - if (scratch_reg >= 0) { - uasm_i_ehb(p); + if (scratch_reg >= 0) UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - } else { + else UASM_i_LW(p, 1, scratchpad_offset(0), 0); - } } else { /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { @@ -921,6 +926,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, } if (mode != not_refill && check_for_high_segbits) { uasm_l_large_segbits_fault(l, *p); + + if (mode == refill_scratch && scratch_reg >= 0) + uasm_i_ehb(p); + /* * We get here if we are an xsseg address, or if we are * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary. @@ -939,12 +948,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, uasm_i_jr(p, ptr); if (mode == refill_scratch) { - if (scratch_reg >= 0) { - uasm_i_ehb(p); + if (scratch_reg >= 0) UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - } else { + else UASM_i_LW(p, 1, scratchpad_offset(0), 0); - } } else { uasm_i_nop(p); } From 603d9299da32955d49995738541f750f2ae74839 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Thu, 3 Oct 2019 20:53:23 +0200 Subject: [PATCH 549/572] mfd: mt6397: Fix probe after changing mt6397-core Part 3 from this series [1] was not merged due to wrong splitting and breaks mt6323 pmic on bananapi-r2 dmesg prints this line and at least switch is not initialized on bananapi-r2 mt6397 1000d000.pwrap:mt6323: unsupported chip: 0x0 this patch contains only the probe-changes and chip_data structs from original part 3 by Hsin-Hsiung Wang [1] https://patchwork.kernel.org/project/linux-mediatek/list/?series=164155 Fixes: a4872e80ce7d ("mfd: mt6397: Extract IRQ related code from core driver") Signed-off-by: Frank Wunderlich Signed-off-by: Lee Jones --- drivers/mfd/mt6397-core.c | 64 ++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/drivers/mfd/mt6397-core.c b/drivers/mfd/mt6397-core.c index 310dae26ddff..b2c325ead1c8 100644 --- a/drivers/mfd/mt6397-core.c +++ b/drivers/mfd/mt6397-core.c @@ -129,11 +129,27 @@ static int mt6397_irq_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(mt6397_pm_ops, mt6397_irq_suspend, mt6397_irq_resume); +struct chip_data { + u32 cid_addr; + u32 cid_shift; +}; + +static const struct chip_data mt6323_core = { + .cid_addr = MT6323_CID, + .cid_shift = 0, +}; + +static const struct chip_data mt6397_core = { + .cid_addr = MT6397_CID, + .cid_shift = 0, +}; + static int mt6397_probe(struct platform_device *pdev) { int ret; unsigned int id; struct mt6397_chip *pmic; + const struct chip_data *pmic_core; pmic = devm_kzalloc(&pdev->dev, sizeof(*pmic), GFP_KERNEL); if (!pmic) @@ -149,28 +165,30 @@ static int mt6397_probe(struct platform_device *pdev) if (!pmic->regmap) return -ENODEV; - platform_set_drvdata(pdev, pmic); + pmic_core = of_device_get_match_data(&pdev->dev); + if (!pmic_core) + return -ENODEV; - ret = regmap_read(pmic->regmap, MT6397_CID, &id); + ret = regmap_read(pmic->regmap, pmic_core->cid_addr, &id); if (ret) { - dev_err(pmic->dev, "Failed to read chip id: %d\n", ret); + dev_err(&pdev->dev, "Failed to read chip id: %d\n", ret); return ret; } + pmic->chip_id = (id >> pmic_core->cid_shift) & 0xff; + + platform_set_drvdata(pdev, pmic); + pmic->irq = platform_get_irq(pdev, 0); if (pmic->irq <= 0) return pmic->irq; - switch (id & 0xff) { - case MT6323_CHIP_ID: - pmic->int_con[0] = MT6323_INT_CON0; - pmic->int_con[1] = MT6323_INT_CON1; - pmic->int_status[0] = MT6323_INT_STATUS0; - pmic->int_status[1] = MT6323_INT_STATUS1; - ret = mt6397_irq_init(pmic); - if (ret) - return ret; + ret = mt6397_irq_init(pmic); + if (ret) + return ret; + switch (pmic->chip_id) { + case MT6323_CHIP_ID: ret = devm_mfd_add_devices(&pdev->dev, -1, mt6323_devs, ARRAY_SIZE(mt6323_devs), NULL, 0, pmic->irq_domain); @@ -178,21 +196,13 @@ static int mt6397_probe(struct platform_device *pdev) case MT6391_CHIP_ID: case MT6397_CHIP_ID: - pmic->int_con[0] = MT6397_INT_CON0; - pmic->int_con[1] = MT6397_INT_CON1; - pmic->int_status[0] = MT6397_INT_STATUS0; - pmic->int_status[1] = MT6397_INT_STATUS1; - ret = mt6397_irq_init(pmic); - if (ret) - return ret; - ret = devm_mfd_add_devices(&pdev->dev, -1, mt6397_devs, ARRAY_SIZE(mt6397_devs), NULL, 0, pmic->irq_domain); break; default: - dev_err(&pdev->dev, "unsupported chip: %d\n", id); + dev_err(&pdev->dev, "unsupported chip: %d\n", pmic->chip_id); return -ENODEV; } @@ -205,9 +215,15 @@ static int mt6397_probe(struct platform_device *pdev) } static const struct of_device_id mt6397_of_match[] = { - { .compatible = "mediatek,mt6397" }, - { .compatible = "mediatek,mt6323" }, - { } + { + .compatible = "mediatek,mt6323", + .data = &mt6323_core, + }, { + .compatible = "mediatek,mt6397", + .data = &mt6397_core, + }, { + /* sentinel */ + } }; MODULE_DEVICE_TABLE(of, mt6397_of_match); From 30aecae86e914f526a2ca8d552011960ef6a2615 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Fri, 4 Oct 2019 11:51:58 -0500 Subject: [PATCH 550/572] gfs2: Fix memory leak when gfs2meta's fs_context is freed gfs2 and gfs2meta share an ->init_fs_context function which allocates an args structure stored in fc->fs_private. gfs2 registers a ->free function to free this memory when the fs_context is cleaned up, but there was not one registered for gfs2meta, causing a leak. Register a ->free function for gfs2meta. The existing gfs2_fc_free function does what we need. Reported-by: syzbot+c2fdfd2b783754878fb6@syzkaller.appspotmail.com Fixes: 1f52aa08d12f ("gfs2: Convert gfs2 to fs_context") Signed-off-by: Andrew Price Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 681b44682b0d..dc61af2c4d5e 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1600,6 +1600,7 @@ static int gfs2_meta_get_tree(struct fs_context *fc) } static const struct fs_context_operations gfs2_meta_context_ops = { + .free = gfs2_fc_free, .get_tree = gfs2_meta_get_tree, }; From 8424312516e5d9baeeb0a95d0e4523579b7aa395 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 23 Oct 2019 03:46:26 +0000 Subject: [PATCH 551/572] irqchip/gic-v3-its: Use the exact ITSList for VMOVP On a system without Single VMOVP support (say GITS_TYPER.VMOVP == 0), we will map vPEs only on ITSs that will actually control interrupts for the given VM. And when moving a vPE, the VMOVP command will be issued only for those ITSs. But when issuing VMOVPs we seemed fail to present the exact ITSList to ITSs who are actually included in the synchronization operation. The its_list_map we're currently using includes all ITSs in the system, even though some of them don't have the corresponding vPE mapping at all. Introduce get_its_list() to get the per-VM its_list_map, to indicate which ITSs have vPE mappings for the given VM, and use this map as the expected ITSList when building VMOVP. This is hopefully a performance gain not to do some synchronization with those unsuspecting ITSs. And initialize the whole command descriptor to zero at beginning, since the seq_num and its_list should be RES0 when GITS_TYPER.VMOVP == 1. Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1571802386-2680-1-git-send-email-yuzenghui@huawei.com --- drivers/irqchip/irq-gic-v3-its.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 62e54f1a248b..787e8eec9a7f 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -175,6 +175,22 @@ static DEFINE_IDA(its_vpeid_ida); #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) #define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K) +static u16 get_its_list(struct its_vm *vm) +{ + struct its_node *its; + unsigned long its_list = 0; + + list_for_each_entry(its, &its_nodes, entry) { + if (!its->is_v4) + continue; + + if (vm->vlpi_count[its->list_nr]) + __set_bit(its->list_nr, &its_list); + } + + return (u16)its_list; +} + static struct its_collection *dev_event_to_col(struct its_device *its_dev, u32 event) { @@ -976,17 +992,15 @@ static void its_send_vmapp(struct its_node *its, static void its_send_vmovp(struct its_vpe *vpe) { - struct its_cmd_desc desc; + struct its_cmd_desc desc = {}; struct its_node *its; unsigned long flags; int col_id = vpe->col_idx; desc.its_vmovp_cmd.vpe = vpe; - desc.its_vmovp_cmd.its_list = (u16)its_list_map; if (!its_list_map) { its = list_first_entry(&its_nodes, struct its_node, entry); - desc.its_vmovp_cmd.seq_num = 0; desc.its_vmovp_cmd.col = &its->collections[col_id]; its_send_single_vcommand(its, its_build_vmovp_cmd, &desc); return; @@ -1003,6 +1017,7 @@ static void its_send_vmovp(struct its_vpe *vpe) raw_spin_lock_irqsave(&vmovp_lock, flags); desc.its_vmovp_cmd.seq_num = vmovp_seq_num++; + desc.its_vmovp_cmd.its_list = get_its_list(vpe->its_vm); /* Emit VMOVPs */ list_for_each_entry(its, &its_nodes, entry) { From 62931ac2f9015ea38d80494ec37658ab3df6a6d7 Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Fri, 18 Oct 2019 19:32:13 +0200 Subject: [PATCH 552/572] i2c: mt65xx: fix NULL ptr dereference Since commit abf4923e97c3 ("i2c: mediatek: disable zero-length transfers for mt8183"), there is a NULL pointer dereference for all the SoCs that don't have any quirk. mtk_i2c_functionality is not checking that the quirks pointer is not NULL before starting to use it. This commit add a call to i2c_check_quirks which will check whether the quirks pointer is set, and if so will check if the IP has the NO_ZERO_LEN quirk. Fixes: abf4923e97c3 ("i2c: mediatek: disable zero-length transfers for mt8183") Signed-off-by: Fabien Parent Reviewed-by: Cengiz Can Reviewed-by: Hsin-Yi Wang Tested-by: Ulrich Hecht Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 29eae1bf4f86..2152ec5f535c 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -875,7 +875,7 @@ static irqreturn_t mtk_i2c_irq(int irqno, void *dev_id) static u32 mtk_i2c_functionality(struct i2c_adapter *adap) { - if (adap->quirks->flags & I2C_AQ_NO_ZERO_LEN) + if (i2c_check_quirks(adap, I2C_AQ_NO_ZERO_LEN)) return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); else From 02e64276c6dbcc4c5f39844f33d18180832a58f3 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Mon, 30 Sep 2019 17:28:01 +0200 Subject: [PATCH 553/572] i2c: stm32f7: fix first byte to send in slave mode The slave-interface documentation [1] states "the bus driver should transmit the first byte" upon I2C_SLAVE_READ_REQUESTED slave event: - 'val': backend returns first byte to be sent The driver currently ignores the 1st byte to send on this event. [1] https://www.kernel.org/doc/Documentation/i2c/slave-interface Fixes: 60d609f30de2 ("i2c: i2c-stm32f7: Add slave support") Signed-off-by: Fabrice Gasnier Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f7.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index d36cf08461f7..c8ce6c8dd404 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -1192,6 +1192,8 @@ static void stm32f7_i2c_slave_start(struct stm32f7_i2c_dev *i2c_dev) STM32F7_I2C_CR1_TXIE; stm32f7_i2c_set_bits(base + STM32F7_I2C_CR1, mask); + /* Write 1st data byte */ + writel_relaxed(value, base + STM32F7_I2C_TXDR); } else { /* Notify i2c slave that new write transfer is starting */ i2c_slave_event(slave, I2C_SLAVE_WRITE_REQUESTED, &value); From 6d6b0d0d5afc8c4c84b08261260ba11dfa5206f2 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 1 Oct 2019 10:51:09 +0200 Subject: [PATCH 554/572] i2c: stm32f7: fix a race in slave mode with arbitration loss irq When in slave mode, an arbitration loss (ARLO) may be detected before the slave had a chance to detect the stop condition (STOPF in ISR). This is seen when two master + slave adapters switch their roles. It provokes the i2c bus to be stuck, busy as SCL line is stretched. - the I2C_SLAVE_STOP event is never generated due to STOPF flag is set but don't generate an irq (race with ARLO irq, STOPIE is masked). STOPF flag remains set until next master xfer (e.g. when STOPIE irq get unmasked). In this case, completion is generated too early: immediately upon new transfer request (then it doesn't send all data). - Some data get stuck in TXDR register. As a consequence, the controller stretches the SCL line: the bus gets busy until a future master transfer triggers the bus busy / recovery mechanism (this can take time... and may never happen at all) So choice is to let the STOPF being detected by the slave isr handler, to properly handle this stop condition. E.g. don't mask IRQs in error handler, when the slave is running. Fixes: 60d609f30de2 ("i2c: i2c-stm32f7: Add slave support") Signed-off-by: Fabrice Gasnier Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f7.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index c8ce6c8dd404..073bf57a9821 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -1503,7 +1503,7 @@ static irqreturn_t stm32f7_i2c_isr_error(int irq, void *data) void __iomem *base = i2c_dev->base; struct device *dev = i2c_dev->dev; struct stm32_i2c_dma *dma = i2c_dev->dma; - u32 mask, status; + u32 status; status = readl_relaxed(i2c_dev->base + STM32F7_I2C_ISR); @@ -1528,12 +1528,15 @@ static irqreturn_t stm32f7_i2c_isr_error(int irq, void *data) f7_msg->result = -EINVAL; } - /* Disable interrupts */ - if (stm32f7_i2c_is_slave_registered(i2c_dev)) - mask = STM32F7_I2C_XFER_IRQ_MASK; - else - mask = STM32F7_I2C_ALL_IRQ_MASK; - stm32f7_i2c_disable_irq(i2c_dev, mask); + if (!i2c_dev->slave_running) { + u32 mask; + /* Disable interrupts */ + if (stm32f7_i2c_is_slave_registered(i2c_dev)) + mask = STM32F7_I2C_XFER_IRQ_MASK; + else + mask = STM32F7_I2C_ALL_IRQ_MASK; + stm32f7_i2c_disable_irq(i2c_dev, mask); + } /* Disable dma */ if (i2c_dev->use_dma) { From 348e46fbb4cdb2aead79aee1fd8bb25ec5fd25db Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Tue, 15 Oct 2019 15:11:58 +0200 Subject: [PATCH 555/572] i2c: stm32f7: remove warning when compiling with W=1 Remove the following warning: drivers/i2c/busses/i2c-stm32f7.c:315: warning: cannot understand function prototype: 'struct stm32f7_i2c_spec i2c_specs[] = Replace a comment starting with /** by simply /* to avoid having it interpreted as a kernel-doc comment. Fixes: aeb068c57214 ("i2c: i2c-stm32f7: add driver") Signed-off-by: Alain Volmat Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 073bf57a9821..b24e7b937f21 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -305,7 +305,7 @@ struct stm32f7_i2c_dev { struct regmap *regmap; }; -/** +/* * All these values are coming from I2C Specification, Version 6.0, 4th of * April 2014. * From abe57073d08c13b95a46ccf48cc9dc957d5c6fdb Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 22 Oct 2019 08:41:42 -0700 Subject: [PATCH 556/572] CIFS: Fix retry mid list corruption on reconnects When the client hits reconnect it iterates over the mid pending queue marking entries for retry and moving them to a temporary list to issue callbacks later without holding GlobalMid_Lock. In the same time there is no guarantee that mids can't be removed from the temporary list or even freed completely by another thread. It may cause a temporary list corruption: [ 430.454897] list_del corruption. prev->next should be ffff98d3a8f316c0, but was 2e885cb266355469 [ 430.464668] ------------[ cut here ]------------ [ 430.466569] kernel BUG at lib/list_debug.c:51! [ 430.468476] invalid opcode: 0000 [#1] SMP PTI [ 430.470286] CPU: 0 PID: 13267 Comm: cifsd Kdump: loaded Not tainted 5.4.0-rc3+ #19 [ 430.473472] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 430.475872] RIP: 0010:__list_del_entry_valid.cold+0x31/0x55 ... [ 430.510426] Call Trace: [ 430.511500] cifs_reconnect+0x25e/0x610 [cifs] [ 430.513350] cifs_readv_from_socket+0x220/0x250 [cifs] [ 430.515464] cifs_read_from_socket+0x4a/0x70 [cifs] [ 430.517452] ? try_to_wake_up+0x212/0x650 [ 430.519122] ? cifs_small_buf_get+0x16/0x30 [cifs] [ 430.521086] ? allocate_buffers+0x66/0x120 [cifs] [ 430.523019] cifs_demultiplex_thread+0xdc/0xc30 [cifs] [ 430.525116] kthread+0xfb/0x130 [ 430.526421] ? cifs_handle_standard+0x190/0x190 [cifs] [ 430.528514] ? kthread_park+0x90/0x90 [ 430.530019] ret_from_fork+0x35/0x40 Fix this by obtaining extra references for mids being retried and marking them as MID_DELETED which indicates that such a mid has been dequeued from the pending list. Also move mid cleanup logic from DeleteMidQEntry to _cifs_mid_q_entry_release which is called when the last reference to a particular mid is put. This allows to avoid any use-after-free of response buffers. The patch needs to be backported to stable kernels. A stable tag is not mentioned below because the patch doesn't apply cleanly to any actively maintained stable kernel. Reviewed-by: Ronnie Sahlberg Reviewed-and-tested-by: David Wysochanski Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/connect.c | 10 +++++++++- fs/cifs/transport.c | 42 +++++++++++++++++++++++------------------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index bdea4b3e8005..ccaa8bad336f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -564,9 +564,11 @@ cifs_reconnect(struct TCP_Server_Info *server) spin_lock(&GlobalMid_Lock); list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); + kref_get(&mid_entry->refcount); if (mid_entry->mid_state == MID_REQUEST_SUBMITTED) mid_entry->mid_state = MID_RETRY_NEEDED; list_move(&mid_entry->qhead, &retry_list); + mid_entry->mid_flags |= MID_DELETED; } spin_unlock(&GlobalMid_Lock); mutex_unlock(&server->srv_mutex); @@ -576,6 +578,7 @@ cifs_reconnect(struct TCP_Server_Info *server) mid_entry = list_entry(tmp, struct mid_q_entry, qhead); list_del_init(&mid_entry->qhead); mid_entry->callback(mid_entry); + cifs_mid_q_entry_release(mid_entry); } if (cifs_rdma_enabled(server)) { @@ -895,8 +898,10 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed) if (mid->mid_flags & MID_DELETED) printk_once(KERN_WARNING "trying to dequeue a deleted mid\n"); - else + else { list_del_init(&mid->qhead); + mid->mid_flags |= MID_DELETED; + } spin_unlock(&GlobalMid_Lock); } @@ -966,8 +971,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); cifs_dbg(FYI, "Clearing mid 0x%llx\n", mid_entry->mid); + kref_get(&mid_entry->refcount); mid_entry->mid_state = MID_SHUTDOWN; list_move(&mid_entry->qhead, &dispose_list); + mid_entry->mid_flags |= MID_DELETED; } spin_unlock(&GlobalMid_Lock); @@ -977,6 +984,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) cifs_dbg(FYI, "Callback mid 0x%llx\n", mid_entry->mid); list_del_init(&mid_entry->qhead); mid_entry->callback(mid_entry); + cifs_mid_q_entry_release(mid_entry); } /* 1/8th of sec is more than enough time for them to exit */ msleep(125); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 308ad0f495e1..ca3de62688d6 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -86,22 +86,8 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) static void _cifs_mid_q_entry_release(struct kref *refcount) { - struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry, - refcount); - - mempool_free(mid, cifs_mid_poolp); -} - -void cifs_mid_q_entry_release(struct mid_q_entry *midEntry) -{ - spin_lock(&GlobalMid_Lock); - kref_put(&midEntry->refcount, _cifs_mid_q_entry_release); - spin_unlock(&GlobalMid_Lock); -} - -void -DeleteMidQEntry(struct mid_q_entry *midEntry) -{ + struct mid_q_entry *midEntry = + container_of(refcount, struct mid_q_entry, refcount); #ifdef CONFIG_CIFS_STATS2 __le16 command = midEntry->server->vals->lock_cmd; __u16 smb_cmd = le16_to_cpu(midEntry->command); @@ -166,6 +152,19 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) } } #endif + + mempool_free(midEntry, cifs_mid_poolp); +} + +void cifs_mid_q_entry_release(struct mid_q_entry *midEntry) +{ + spin_lock(&GlobalMid_Lock); + kref_put(&midEntry->refcount, _cifs_mid_q_entry_release); + spin_unlock(&GlobalMid_Lock); +} + +void DeleteMidQEntry(struct mid_q_entry *midEntry) +{ cifs_mid_q_entry_release(midEntry); } @@ -173,8 +172,10 @@ void cifs_delete_mid(struct mid_q_entry *mid) { spin_lock(&GlobalMid_Lock); - list_del_init(&mid->qhead); - mid->mid_flags |= MID_DELETED; + if (!(mid->mid_flags & MID_DELETED)) { + list_del_init(&mid->qhead); + mid->mid_flags |= MID_DELETED; + } spin_unlock(&GlobalMid_Lock); DeleteMidQEntry(mid); @@ -872,7 +873,10 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) rc = -EHOSTDOWN; break; default: - list_del_init(&mid->qhead); + if (!(mid->mid_flags & MID_DELETED)) { + list_del_init(&mid->qhead); + mid->mid_flags |= MID_DELETED; + } cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", __func__, mid->mid, mid->mid_state); rc = -EIO; From 1a67c415965752879e2e9fad407bc44fc7f25f23 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 23 Oct 2019 15:37:19 -0700 Subject: [PATCH 557/572] CIFS: Fix use after free of file info structures Currently the code assumes that if a file info entry belongs to lists of open file handles of an inode and a tcon then it has non-zero reference. The recent changes broke that assumption when putting the last reference of the file info. There may be a situation when a file is being deleted but nothing prevents another thread to reference it again and start using it. This happens because we do not hold the inode list lock while checking the number of references of the file info structure. Fix this by doing the proper locking when doing the check. Fixes: 487317c99477d ("cifs: add spinlock for the openFileList to cifsInodeInfo") Fixes: cb248819d209d ("cifs: use cifsInodeInfo->open_file_lock while iterating to avoid a panic") Cc: Stable Reviewed-by: Ronnie Sahlberg Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5ad15de2bb4f..64827938ecf7 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -405,10 +405,11 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) bool oplock_break_cancelled; spin_lock(&tcon->open_file_lock); - + spin_lock(&cifsi->open_file_lock); spin_lock(&cifs_file->file_info_lock); if (--cifs_file->count > 0) { spin_unlock(&cifs_file->file_info_lock); + spin_unlock(&cifsi->open_file_lock); spin_unlock(&tcon->open_file_lock); return; } @@ -421,9 +422,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); /* remove it from the lists */ - spin_lock(&cifsi->open_file_lock); list_del(&cifs_file->flist); - spin_unlock(&cifsi->open_file_lock); list_del(&cifs_file->tlist); atomic_dec(&tcon->num_local_opens); @@ -440,6 +439,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) cifs_set_oplock_level(cifsi, 0); } + spin_unlock(&cifsi->open_file_lock); spin_unlock(&tcon->open_file_lock); oplock_break_cancelled = wait_oplock_handler ? From d46b0da7a33dd8c99d969834f682267a45444ab3 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Wed, 23 Oct 2019 05:02:33 -0400 Subject: [PATCH 558/572] cifs: Fix cifsInodeInfo lock_sem deadlock when reconnect occurs There's a deadlock that is possible and can easily be seen with a test where multiple readers open/read/close of the same file and a disruption occurs causing reconnect. The deadlock is due a reader thread inside cifs_strict_readv calling down_read and obtaining lock_sem, and then after reconnect inside cifs_reopen_file calling down_read a second time. If in between the two down_read calls, a down_write comes from another process, deadlock occurs. CPU0 CPU1 ---- ---- cifs_strict_readv() down_read(&cifsi->lock_sem); _cifsFileInfo_put OR cifs_new_fileinfo down_write(&cifsi->lock_sem); cifs_reopen_file() down_read(&cifsi->lock_sem); Fix the above by changing all down_write(lock_sem) calls to down_write_trylock(lock_sem)/msleep() loop, which in turn makes the second down_read call benign since it will never block behind the writer while holding lock_sem. Signed-off-by: Dave Wysochanski Suggested-by: Ronnie Sahlberg Reviewed--by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky --- fs/cifs/cifsglob.h | 5 +++++ fs/cifs/cifsproto.h | 1 + fs/cifs/file.c | 23 +++++++++++++++-------- fs/cifs/smb2file.c | 2 +- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 50dfd9049370..d78bfcc19156 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1391,6 +1391,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file); struct cifsInodeInfo { bool can_cache_brlcks; struct list_head llist; /* locks helb by this inode */ + /* + * NOTE: Some code paths call down_read(lock_sem) twice, so + * we must always use use cifs_down_write() instead of down_write() + * for this semaphore to avoid deadlocks. + */ struct rw_semaphore lock_sem; /* protect the fields above */ /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index e53e9f62b87b..fe597d3d5208 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -170,6 +170,7 @@ extern int cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, const unsigned int xid); extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile); +extern void cifs_down_write(struct rw_semaphore *sem); extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 64827938ecf7..fa7b0fa72bb3 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -281,6 +281,13 @@ cifs_has_mand_locks(struct cifsInodeInfo *cinode) return has_locks; } +void +cifs_down_write(struct rw_semaphore *sem) +{ + while (!down_write_trylock(sem)) + msleep(10); +} + struct cifsFileInfo * cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock) @@ -306,7 +313,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, INIT_LIST_HEAD(&fdlocks->locks); fdlocks->cfile = cfile; cfile->llist = fdlocks; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_add(&fdlocks->llist, &cinode->llist); up_write(&cinode->lock_sem); @@ -464,7 +471,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) * Delete any outstanding lock records. We'll lose them when the file * is closed anyway. */ - down_write(&cifsi->lock_sem); + cifs_down_write(&cifsi->lock_sem); list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { list_del(&li->llist); cifs_del_lock_waiters(li); @@ -1027,7 +1034,7 @@ static void cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) { struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_add_tail(&lock->llist, &cfile->llist->locks); up_write(&cinode->lock_sem); } @@ -1049,7 +1056,7 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, try_again: exist = false; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, lock->type, lock->flags, &conf_lock, @@ -1072,7 +1079,7 @@ try_again: (lock->blist.next == &lock->blist)); if (!rc) goto try_again; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_del_init(&lock->blist); } @@ -1125,7 +1132,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock) return rc; try_again: - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { up_write(&cinode->lock_sem); return rc; @@ -1331,7 +1338,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) int rc = 0; /* we are going to update can_cache_brlcks here - need a write access */ - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { up_write(&cinode->lock_sem); return rc; @@ -1522,7 +1529,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, if (!buf) return -ENOMEM; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); for (i = 0; i < 2; i++) { cur = buf; num = 0; diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index e6a1fc72018f..8b0b512c5792 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -145,7 +145,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, cur = buf; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { if (flock->fl_start > li->offset || (flock->fl_start + length) < From 41860cc447045c811ce6d5a92f93a065a691fe8e Mon Sep 17 00:00:00 2001 From: Alan Mikhak Date: Thu, 24 Oct 2019 09:11:43 -0700 Subject: [PATCH 559/572] irqchip/sifive-plic: Skip contexts except supervisor in plic_init() Modify plic_init() to skip .dts interrupt contexts other than supervisor external interrupt. The .dts entry for plic may specify multiple interrupt contexts. For example, it may assign two entries IRQ_M_EXT and IRQ_S_EXT, in that order, to the same interrupt controller. This patch modifies plic_init() to skip the IRQ_M_EXT context since IRQ_S_EXT is currently the only supported context. If IRQ_M_EXT is not skipped, plic_init() will report "handler already present for context" when it comes across the IRQ_S_EXT context in the next iteration of its loop. Without this patch, .dts would have to be edited to replace the value of IRQ_M_EXT with -1 for it to be skipped. Signed-off-by: Alan Mikhak Signed-off-by: Marc Zyngier Reviewed-by: Christoph Hellwig Acked-by: Paul Walmsley # arch/riscv Link: https://lkml.kernel.org/r/1571933503-21504-1-git-send-email-alan.mikhak@sifive.com --- drivers/irqchip/irq-sifive-plic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 3e51deedbcc8..b1a33f97db03 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -251,8 +251,8 @@ static int __init plic_init(struct device_node *node, continue; } - /* skip context holes */ - if (parent.args[0] == -1) + /* skip contexts other than supervisor external interrupt */ + if (parent.args[0] != IRQ_S_EXT) continue; hartid = plic_find_hart_id(parent.np); From 6ccae60d014d5d1f89c40e7e4b619f343ca24b03 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 25 Oct 2019 09:38:58 +0200 Subject: [PATCH 560/572] xen: issue deprecation warning for 32-bit pv guest Support for the kernel as Xen 32-bit PV guest will soon be removed. Issue a warning when booted as such. Signed-off-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- arch/x86/xen/enlighten_pv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 7ceb32821093..bd5a61f2c9f1 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -117,6 +117,14 @@ static void __init xen_banner(void) printk(KERN_INFO "Xen version: %d.%d%s%s\n", version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); + +#ifdef CONFIG_X86_32 + pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n" + "Support for running as 32-bit PV-guest under Xen will soon be removed\n" + "from the Linux kernel!\n" + "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n" + "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"); +#endif } static void __init xen_pv_init_platform(void) From 84d55dc5b9e57b513a702fbc358e1b5489651590 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 25 Oct 2019 12:31:29 +0300 Subject: [PATCH 561/572] io_uring: Fix corrupted user_data There is a bug, where failed linked requests are returned not with specified @user_data, but with garbage from a kernel stack. The reason is that io_fail_links() uses req->user_data, which is uninitialised when called from io_queue_sqe() on fail path. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1b46c72f8975..0e141d905a5b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2448,6 +2448,8 @@ err: return; } + req->user_data = s->sqe->user_data; + /* * If we already have a head request, queue this one for async * submittal once the head completes. If we don't have a head but From fb5ccc98782f654778cb8d96ba8a998304f9a51f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 25 Oct 2019 12:31:30 +0300 Subject: [PATCH 562/572] io_uring: Fix broken links with offloading io_sq_thread() processes sqes by 8 without considering links. As a result, links will be randomely subdivided. The easiest way to fix it is to call io_get_sqring() inside io_submit_sqes() as do io_ring_submit(). Downsides: 1. This removes optimisation of not grabbing mm_struct for fixed files 2. It submitting all sqes in one go, without finer-grained sheduling with cq processing. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 62 +++++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0e141d905a5b..949c82a40d16 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -735,6 +735,14 @@ static unsigned io_cqring_events(struct io_rings *rings) return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head); } +static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) +{ + struct io_rings *rings = ctx->rings; + + /* make sure SQ entry isn't read before tail */ + return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; +} + /* * Find and free completed poll iocbs */ @@ -2560,8 +2568,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) return false; } -static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, - unsigned int nr, bool has_user, bool mm_fault) +static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, + bool has_user, bool mm_fault) { struct io_submit_state state, *statep = NULL; struct io_kiocb *link = NULL; @@ -2575,6 +2583,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, } for (i = 0; i < nr; i++) { + struct sqe_submit s; + + if (!io_get_sqring(ctx, &s)) + break; + /* * If previous wasn't linked and we have a linked command, * that's the end of the chain. Submit the previous link. @@ -2584,9 +2597,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, link = NULL; shadow_req = NULL; } - prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0; + prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0; - if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) { + if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) { if (!shadow_req) { shadow_req = io_get_req(ctx, NULL); if (unlikely(!shadow_req)) @@ -2594,18 +2607,18 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN); refcount_dec(&shadow_req->refs); } - shadow_req->sequence = sqes[i].sequence; + shadow_req->sequence = s.sequence; } out: if (unlikely(mm_fault)) { - io_cqring_add_event(ctx, sqes[i].sqe->user_data, + io_cqring_add_event(ctx, s.sqe->user_data, -EFAULT); } else { - sqes[i].has_user = has_user; - sqes[i].needs_lock = true; - sqes[i].needs_fixed_file = true; - io_submit_sqe(ctx, &sqes[i], statep, &link); + s.has_user = has_user; + s.needs_lock = true; + s.needs_fixed_file = true; + io_submit_sqe(ctx, &s, statep, &link); submitted++; } } @@ -2620,7 +2633,6 @@ out: static int io_sq_thread(void *data) { - struct sqe_submit sqes[IO_IOPOLL_BATCH]; struct io_ring_ctx *ctx = data; struct mm_struct *cur_mm = NULL; mm_segment_t old_fs; @@ -2635,8 +2647,8 @@ static int io_sq_thread(void *data) timeout = inflight = 0; while (!kthread_should_park()) { - bool all_fixed, mm_fault = false; - int i; + bool mm_fault = false; + unsigned int to_submit; if (inflight) { unsigned nr_events = 0; @@ -2656,7 +2668,8 @@ static int io_sq_thread(void *data) timeout = jiffies + ctx->sq_thread_idle; } - if (!io_get_sqring(ctx, &sqes[0])) { + to_submit = io_sqring_entries(ctx); + if (!to_submit) { /* * We're polling. If we're within the defined idle * period, then let us spin without work before going @@ -2687,7 +2700,8 @@ static int io_sq_thread(void *data) /* make sure to read SQ tail after writing flags */ smp_mb(); - if (!io_get_sqring(ctx, &sqes[0])) { + to_submit = io_sqring_entries(ctx); + if (!to_submit) { if (kthread_should_park()) { finish_wait(&ctx->sqo_wait, &wait); break; @@ -2705,19 +2719,8 @@ static int io_sq_thread(void *data) ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; } - i = 0; - all_fixed = true; - do { - if (all_fixed && io_sqe_needs_user(sqes[i].sqe)) - all_fixed = false; - - i++; - if (i == ARRAY_SIZE(sqes)) - break; - } while (io_get_sqring(ctx, &sqes[i])); - /* Unless all new commands are FIXED regions, grab mm */ - if (!all_fixed && !cur_mm) { + if (!cur_mm) { mm_fault = !mmget_not_zero(ctx->sqo_mm); if (!mm_fault) { use_mm(ctx->sqo_mm); @@ -2725,8 +2728,9 @@ static int io_sq_thread(void *data) } } - inflight += io_submit_sqes(ctx, sqes, i, cur_mm != NULL, - mm_fault); + to_submit = min(to_submit, ctx->sq_entries); + inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL, + mm_fault); /* Commit SQ ring head once we've consumed all SQEs */ io_commit_sqring(ctx); From 935d1e45908afb8853c497f2c2bbbb685dec51dc Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 25 Oct 2019 12:31:31 +0300 Subject: [PATCH 563/572] io_uring: Fix race for sqes with userspace io_ring_submit() finalises with 1. io_commit_sqring(), which releases sqes to the userspace 2. Then calls to io_queue_link_head(), accessing released head's sqe Reorder them. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 949c82a40d16..32f6598ecae9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2795,13 +2795,14 @@ out: submit++; io_submit_sqe(ctx, &s, statep, &link); } - io_commit_sqring(ctx); if (link) io_queue_link_head(ctx, link, &link->submit, shadow_req); if (statep) io_submit_state_end(statep); + io_commit_sqring(ctx); + return submit; } From 86ec2e1739aa1d6565888b4b2059fa47354e1a89 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Fri, 25 Oct 2019 15:01:22 +0200 Subject: [PATCH 564/572] ARM: dts: stm32: relax qspi pins slew-rate for stm32mp157 Relax qspi pins slew-rate to minimize peak currents. Fixes: 844030057339 ("ARM: dts: stm32: add flash nor support on stm32mp157c eval board") Link: https://lore.kernel.org/r/20191025130122.11407-1-alexandre.torgue@st.com Signed-off-by: Patrice Chotard Signed-off-by: Alexandre Torgue Signed-off-by: Olof Johansson --- arch/arm/boot/dts/stm32mp157-pinctrl.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi index e4a0d51ec3a8..0a3a7d66737b 100644 --- a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi +++ b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi @@ -609,13 +609,13 @@ ; /* QSPI_BK1_IO3 */ bias-disable; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; pins2 { pinmux = ; /* QSPI_BK1_NCS */ bias-pull-up; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; }; @@ -637,13 +637,13 @@ ; /* QSPI_BK2_IO3 */ bias-disable; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; pins2 { pinmux = ; /* QSPI_BK2_NCS */ bias-pull-up; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; }; From 498ccd9eda49117c34e0041563d0da6ac40e52b8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 25 Oct 2019 10:04:25 -0600 Subject: [PATCH 565/572] io_uring: used cached copies of sq->dropped and cq->overflow We currently use the ring values directly, but that can lead to issues if the application is malicious and changes these values on our behalf. Created in-kernel cached versions of them, and just overwrite the user side when we update them. This is similar to how we treat the sq/cq ring tail/head updates. Reported-by: Pavel Begunkov Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 32f6598ecae9..292c4c733cbe 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -197,6 +197,7 @@ struct io_ring_ctx { unsigned sq_entries; unsigned sq_mask; unsigned sq_thread_idle; + unsigned cached_sq_dropped; struct io_uring_sqe *sq_sqes; struct list_head defer_list; @@ -212,6 +213,7 @@ struct io_ring_ctx { struct { unsigned cached_cq_tail; + atomic_t cached_cq_overflow; unsigned cq_entries; unsigned cq_mask; struct wait_queue_head cq_wait; @@ -420,7 +422,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) static inline bool __io_sequence_defer(struct io_ring_ctx *ctx, struct io_kiocb *req) { - return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped; + return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped + + atomic_read(&ctx->cached_cq_overflow); } static inline bool io_sequence_defer(struct io_ring_ctx *ctx, @@ -567,9 +570,8 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, WRITE_ONCE(cqe->res, res); WRITE_ONCE(cqe->flags, 0); } else { - unsigned overflow = READ_ONCE(ctx->rings->cq_overflow); - - WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1); + WRITE_ONCE(ctx->rings->cq_overflow, + atomic_inc_return(&ctx->cached_cq_overflow)); } } @@ -2564,7 +2566,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) /* drop invalid entries */ ctx->cached_sq_head++; - rings->sq_dropped++; + ctx->cached_sq_dropped++; + WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped); return false; } From 2b2ed9750fc9d040b9f6d076afcef6f00b6f1f7c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 25 Oct 2019 10:06:15 -0600 Subject: [PATCH 566/572] io_uring: fix bad inflight accounting for SETUP_IOPOLL|SETUP_SQTHREAD We currently assume that submissions from the sqthread are successful, and if IO polling is enabled, we use that value for knowing how many completions to look for. But if we overflowed the CQ ring or some requests simply got errored and already completed, they won't be available for polling. For the case of IO polling and SQTHREAD usage, look at the pending poll list. If it ever hits empty then we know that we don't have anymore pollable requests inflight. For that case, simply reset the inflight count to zero. Reported-by: Pavel Begunkov Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 292c4c733cbe..a30c4f622cb3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -874,19 +874,11 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } -static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, - long min) +static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, + long min) { - int iters, ret = 0; + int iters = 0, ret = 0; - /* - * We disallow the app entering submit/complete with polling, but we - * still need to lock the ring to prevent racing with polled issue - * that got punted to a workqueue. - */ - mutex_lock(&ctx->uring_lock); - - iters = 0; do { int tmin = 0; @@ -922,6 +914,21 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, ret = 0; } while (min && !*nr_events && !need_resched()); + return ret; +} + +static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, + long min) +{ + int ret; + + /* + * We disallow the app entering submit/complete with polling, but we + * still need to lock the ring to prevent racing with polled issue + * that got punted to a workqueue. + */ + mutex_lock(&ctx->uring_lock); + ret = __io_iopoll_check(ctx, nr_events, min); mutex_unlock(&ctx->uring_lock); return ret; } @@ -2657,7 +2664,20 @@ static int io_sq_thread(void *data) unsigned nr_events = 0; if (ctx->flags & IORING_SETUP_IOPOLL) { - io_iopoll_check(ctx, &nr_events, 0); + /* + * inflight is the count of the maximum possible + * entries we submitted, but it can be smaller + * if we dropped some of them. If we don't have + * poll entries available, then we know that we + * have nothing left to poll for. Reset the + * inflight count to zero in that case. + */ + mutex_lock(&ctx->uring_lock); + if (!list_empty(&ctx->poll_list)) + __io_iopoll_check(ctx, &nr_events, 0); + else + inflight = 0; + mutex_unlock(&ctx->uring_lock); } else { /* * Normal IO, just pretend everything completed. From de6346ecbc8f5591ebd6c44ac164e8b8671d71d7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 21 Oct 2019 15:56:27 -0400 Subject: [PATCH 567/572] nbd: protect cmd->status with cmd->lock We already do this for the most part, except in timeout and clear_req. For the timeout case we take the lock after we grab a ref on the config, but that isn't really necessary because we're safe to touch the cmd at this point, so just move the order around. For the clear_req cause this is initiated by the user, so again is safe. Reviewed-by: Mike Christie Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 478aa86fc1f2..f9e189534304 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -385,17 +385,16 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, struct nbd_device *nbd = cmd->nbd; struct nbd_config *config; + if (!mutex_trylock(&cmd->lock)) + return BLK_EH_RESET_TIMER; + if (!refcount_inc_not_zero(&nbd->config_refs)) { cmd->status = BLK_STS_TIMEOUT; + mutex_unlock(&cmd->lock); goto done; } config = nbd->config; - if (!mutex_trylock(&cmd->lock)) { - nbd_config_put(nbd); - return BLK_EH_RESET_TIMER; - } - if (config->num_connections > 1) { dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out, retrying (%d/%d alive)\n", @@ -792,7 +791,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); + mutex_lock(&cmd->lock); cmd->status = BLK_STS_IOERR; + mutex_unlock(&cmd->lock); + blk_mq_complete_request(req); return true; } From 7ce23e8e0a9cd38338fc8316ac5772666b565ca9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 21 Oct 2019 15:56:28 -0400 Subject: [PATCH 568/572] nbd: handle racing with error'ed out commands We hit the following warning in production print_req_error: I/O error, dev nbd0, sector 7213934408 flags 80700 ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 25 PID: 32407 at lib/refcount.c:190 refcount_sub_and_test_checked+0x53/0x60 Workqueue: knbd-recv recv_work [nbd] RIP: 0010:refcount_sub_and_test_checked+0x53/0x60 Call Trace: blk_mq_free_request+0xb7/0xf0 blk_mq_complete_request+0x62/0xf0 recv_work+0x29/0xa1 [nbd] process_one_work+0x1f5/0x3f0 worker_thread+0x2d/0x3d0 ? rescuer_thread+0x340/0x340 kthread+0x111/0x130 ? kthread_create_on_node+0x60/0x60 ret_from_fork+0x1f/0x30 ---[ end trace b079c3c67f98bb7c ]--- This was preceded by us timing out everything and shutting down the sockets for the device. The problem is we had a request in the queue at the same time, so we completed the request twice. This can actually happen in a lot of cases, we fail to get a ref on our config, we only have one connection and just error out the command, etc. Fix this by checking cmd->status in nbd_read_stat. We only change this under the cmd->lock, so we are safe to check this here and see if we've already error'ed this command out, which would indicate that we've completed it as well. Reviewed-by: Mike Christie Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index f9e189534304..e200c7c9ddc4 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -710,6 +710,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ret = -ENOENT; goto out; } + if (cmd->status != BLK_STS_OK) { + dev_err(disk_to_dev(nbd->disk), "Command already handled %p\n", + req); + ret = -ENOENT; + goto out; + } if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) { dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n", req); From 962399bb7fbf5ce0c5b768ca7115614f31ff8f3f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 16 Oct 2019 11:51:05 +0100 Subject: [PATCH 569/572] ata: libahci_platform: Fix regulator_get_optional() misuse This driver is using regulator_get_optional() to handle all the supplies that it handles, and only ever enables and disables all supplies en masse without ever doing any other configuration of the device to handle missing power. These are clear signs that the API is being misused - it should only be used for supplies that may be physically absent from the system and in these cases the hardware usually needs different configuration if the supply is missing. Instead use normal regualtor_get(), if the supply is not described in DT then the framework will substitute a dummy regulator in so no special handling is needed by the consumer driver. In the case of the PHY regulator the handling in the driver is a hack to deal with integrated PHYs; the supplies are only optional in the sense that that there's some confusion in the code about where they're bound to. From a code point of view they function exactly as normal supplies so can be treated as such. It'd probably be better to model this by instantiating a PHY object for integrated PHYs. Reviewed-by: Hans de Goede Signed-off-by: Mark Brown Signed-off-by: Jens Axboe --- drivers/ata/libahci_platform.c | 38 +++++++++++++--------------------- 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index e742780950de..8befce036af8 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -153,17 +153,13 @@ int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv) { int rc, i; - if (hpriv->ahci_regulator) { - rc = regulator_enable(hpriv->ahci_regulator); - if (rc) - return rc; - } + rc = regulator_enable(hpriv->ahci_regulator); + if (rc) + return rc; - if (hpriv->phy_regulator) { - rc = regulator_enable(hpriv->phy_regulator); - if (rc) - goto disable_ahci_pwrs; - } + rc = regulator_enable(hpriv->phy_regulator); + if (rc) + goto disable_ahci_pwrs; for (i = 0; i < hpriv->nports; i++) { if (!hpriv->target_pwrs[i]) @@ -181,11 +177,9 @@ disable_target_pwrs: if (hpriv->target_pwrs[i]) regulator_disable(hpriv->target_pwrs[i]); - if (hpriv->phy_regulator) - regulator_disable(hpriv->phy_regulator); + regulator_disable(hpriv->phy_regulator); disable_ahci_pwrs: - if (hpriv->ahci_regulator) - regulator_disable(hpriv->ahci_regulator); + regulator_disable(hpriv->ahci_regulator); return rc; } EXPORT_SYMBOL_GPL(ahci_platform_enable_regulators); @@ -207,10 +201,8 @@ void ahci_platform_disable_regulators(struct ahci_host_priv *hpriv) regulator_disable(hpriv->target_pwrs[i]); } - if (hpriv->ahci_regulator) - regulator_disable(hpriv->ahci_regulator); - if (hpriv->phy_regulator) - regulator_disable(hpriv->phy_regulator); + regulator_disable(hpriv->ahci_regulator); + regulator_disable(hpriv->phy_regulator); } EXPORT_SYMBOL_GPL(ahci_platform_disable_regulators); /** @@ -359,7 +351,7 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port, struct regulator *target_pwr; int rc = 0; - target_pwr = regulator_get_optional(dev, "target"); + target_pwr = regulator_get(dev, "target"); if (!IS_ERR(target_pwr)) hpriv->target_pwrs[port] = target_pwr; @@ -436,16 +428,14 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, hpriv->clks[i] = clk; } - hpriv->ahci_regulator = devm_regulator_get_optional(dev, "ahci"); + hpriv->ahci_regulator = devm_regulator_get(dev, "ahci"); if (IS_ERR(hpriv->ahci_regulator)) { rc = PTR_ERR(hpriv->ahci_regulator); - if (rc == -EPROBE_DEFER) + if (rc != 0) goto err_out; - rc = 0; - hpriv->ahci_regulator = NULL; } - hpriv->phy_regulator = devm_regulator_get_optional(dev, "phy"); + hpriv->phy_regulator = devm_regulator_get(dev, "phy"); if (IS_ERR(hpriv->phy_regulator)) { rc = PTR_ERR(hpriv->phy_regulator); if (rc == -EPROBE_DEFER) From cf1b2326b734896734c6e167e41766f9cee7686a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 17 Oct 2019 16:27:34 -0500 Subject: [PATCH 570/572] nbd: verify socket is supported during setup nbd requires socket families to support the shutdown method so the nbd recv workqueue can be woken up from its sock_recvmsg call. If the socket does not support the callout we will leave recv works running or get hangs later when the device or module is removed. This adds a check during socket connection/reconnection to make sure the socket being passed in supports the needed callout. Reported-by: syzbot+24c12fa8d218ed26011a@syzkaller.appspotmail.com Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs") Tested-by: Richard W.M. Jones Signed-off-by: Mike Christie Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index e200c7c9ddc4..a94ee45440b3 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -980,6 +980,25 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; } +static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd, + int *err) +{ + struct socket *sock; + + *err = 0; + sock = sockfd_lookup(fd, err); + if (!sock) + return NULL; + + if (sock->ops->shutdown == sock_no_shutdown) { + dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n"); + *err = -EINVAL; + return NULL; + } + + return sock; +} + static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, bool netlink) { @@ -989,7 +1008,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, struct nbd_sock *nsock; int err; - sock = sockfd_lookup(arg, &err); + sock = nbd_get_socket(nbd, arg, &err); if (!sock) return err; @@ -1041,7 +1060,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) int i; int err; - sock = sockfd_lookup(arg, &err); + sock = nbd_get_socket(nbd, arg, &err); if (!sock) return err; From e8f44c50dfe75315d1ff6efc837d62cbe7368c9b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Oct 2019 19:37:30 +0200 Subject: [PATCH 571/572] riscv: cleanup do_trap_break If we always compile the get_break_insn_length inline function we can remove the ifdefs and let dead code elimination take care of the warn branch that is now unreadable because the report_bug stub always returns BUG_TRAP_TYPE_BUG. Signed-off-by: Christoph Hellwig Reviewed-by: Anup Patel Signed-off-by: Paul Walmsley --- arch/riscv/kernel/traps.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 1ac75f7d0bff..10a17e545f43 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -111,7 +111,6 @@ DO_ERROR_INFO(do_trap_ecall_s, DO_ERROR_INFO(do_trap_ecall_m, SIGILL, ILL_ILLTRP, "environment call from M-mode"); -#ifdef CONFIG_GENERIC_BUG static inline unsigned long get_break_insn_length(unsigned long pc) { bug_insn_t insn; @@ -120,28 +119,15 @@ static inline unsigned long get_break_insn_length(unsigned long pc) return 0; return (((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? 4UL : 2UL); } -#endif /* CONFIG_GENERIC_BUG */ asmlinkage void do_trap_break(struct pt_regs *regs) { - if (user_mode(regs)) { - force_sig_fault(SIGTRAP, TRAP_BRKPT, - (void __user *)(regs->sepc)); - return; - } -#ifdef CONFIG_GENERIC_BUG - { - enum bug_trap_type type; - - type = report_bug(regs->sepc, regs); - if (type == BUG_TRAP_TYPE_WARN) { - regs->sepc += get_break_insn_length(regs->sepc); - return; - } - } -#endif /* CONFIG_GENERIC_BUG */ - - die(regs, "Kernel BUG"); + if (user_mode(regs)) + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->sepc); + else if (report_bug(regs->sepc, regs) == BUG_TRAP_TYPE_WARN) + regs->sepc += get_break_insn_length(regs->sepc); + else + die(regs, "Kernel BUG"); } #ifdef CONFIG_GENERIC_BUG From d6d5df1db6e9d7f8f76d2911707f7d5877251b02 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Oct 2019 13:19:19 -0400 Subject: [PATCH 572/572] Linux 5.4-rc5 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 5475cdb6d57d..79be70bf2899 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,8 @@ VERSION = 5 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc4 -NAME = Nesting Opossum +EXTRAVERSION = -rc5 +NAME = Kleptomaniac Octopus # *DOCUMENTATION* # To see a list of typical targets execute "make help"