From 923a8c1d8069104726bde55c37cec66324ccc328 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 31 May 2015 21:44:22 +0300 Subject: [PATCH 01/84] iwlwifi: mvm: fix antenna selection when BT is active When BT is active, we want to avoid the shared antenna for management frame to make sure we don't disturb BT. There was a bug in that code because it chose the antenna BIT(ANT_A) where ANT_A is already a bitmap (0x1). This means that the antenna chosen in the end was ANT_B. While this is not optimal on devices with 2 antennas (it'd disturb BT), it is critical on single antenna devices like 3160 which couldn't connect at all when BT was active. This fixes: https://bugzilla.kernel.org/show_bug.cgi?id=97181 CC: [3.17+] Fixes: 34c8b24ff284 ("iwlwifi: mvm: BT Coex - avoid the shared antenna for management frames") Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c index 7ba7a118ff5c..89116864d2a0 100644 --- a/drivers/net/wireless/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/iwlwifi/mvm/tx.c @@ -252,7 +252,7 @@ void iwl_mvm_set_tx_cmd_rate(struct iwl_mvm *mvm, struct iwl_tx_cmd *tx_cmd, if (info->band == IEEE80211_BAND_2GHZ && !iwl_mvm_bt_coex_is_shared_ant_avail(mvm)) - rate_flags = BIT(mvm->cfg->non_shared_ant) << RATE_MCS_ANT_POS; + rate_flags = mvm->cfg->non_shared_ant << RATE_MCS_ANT_POS; else rate_flags = BIT(mvm->mgmt_last_antenna_idx) << RATE_MCS_ANT_POS; From 11828dbce6f769ed631919aa378b645b1af6bda6 Mon Sep 17 00:00:00 2001 From: Matti Gottlieb Date: Mon, 1 Jun 2015 15:15:11 +0300 Subject: [PATCH 02/84] iwlwifi: mvm: Avoid accessing Null pointer when setting igtk Sometimes when setting an igtk key the station maybe NULL. In the of case igtk the function will skip to the end, and try to print sta->addr, if sta is Null - we will access a Null pointer. Avoid accessing a Null pointer when setting a igtk key & the sta == NULL, and print a default MAC address instead. Signed-off-by: Matti Gottlieb Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/sta.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c index d68dc697a4a0..26f076e82149 100644 --- a/drivers/net/wireless/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/iwlwifi/mvm/sta.c @@ -1401,6 +1401,7 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm, bool mcast = !(keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE); u8 sta_id; int ret; + static const u8 __maybe_unused zero_addr[ETH_ALEN] = {0}; lockdep_assert_held(&mvm->mutex); @@ -1467,7 +1468,7 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm, end: IWL_DEBUG_WEP(mvm, "key: cipher=%x len=%d idx=%d sta=%pM ret=%d\n", keyconf->cipher, keyconf->keylen, keyconf->keyidx, - sta->addr, ret); + sta ? sta->addr : zero_addr, ret); return ret; } From 66337b7c67f6237720ba13f6d41b9d8dbcb59cda Mon Sep 17 00:00:00 2001 From: "Dreyfuss, Haim" Date: Thu, 4 Jun 2015 11:45:33 +0300 Subject: [PATCH 03/84] iwlwifi: pcie: Fix bug in NIC's PM registers access While cleanig the access to those hw-dependent registers, instead of using the product family type, wrong condition was added mistakenly and enabled 8000 family devices a forbidden access to HW registers, fix it. Fixes: 95411d0455cc ("iwlwifi: pcie: Control access to the NIC's PM registers via iwl_cfg") Signed-off-by: Dreyfuss, Haim Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/trans.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 43ae658af6ec..608ba1e39a0f 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -182,7 +182,7 @@ static void iwl_trans_pcie_write_shr(struct iwl_trans *trans, u32 reg, u32 val) static void iwl_pcie_set_pwr(struct iwl_trans *trans, bool vaux) { - if (!trans->cfg->apmg_not_supported) + if (trans->cfg->apmg_not_supported) return; if (vaux && pci_pme_capable(to_pci_dev(trans->dev), PCI_D3cold)) From af3f2f740173f8b5c61dba46f900998c5984ccd9 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 4 Jun 2015 09:51:11 +0300 Subject: [PATCH 04/84] iwlwifi: pcie: don't panic if pcie transport alloc fails iwl_trans_pcie_alloc needs to return a non-zero value if it fails. Otherwise the iwl_drv_start will think that the allocation succeeded. Remove the duplication of err and ret variable and use ret which is the name we usually use in other places of the driver. Fixes: c278754a21e6 ("iwlwifi: mvm: support family 8000 B2/C steps") CC: [4.1] Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/trans.c | 44 +++++++++++------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 608ba1e39a0f..fe61d0aa5550 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -2459,7 +2459,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, struct iwl_trans_pcie *trans_pcie; struct iwl_trans *trans; u16 pci_cmd; - int err; + int ret; trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie), &pdev->dev, cfg, &trans_ops_pcie, 0); @@ -2474,8 +2474,8 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, spin_lock_init(&trans_pcie->ref_lock); init_waitqueue_head(&trans_pcie->ucode_write_waitq); - err = pci_enable_device(pdev); - if (err) + ret = pci_enable_device(pdev); + if (ret) goto out_no_pci; if (!cfg->base_params->pcie_l1_allowed) { @@ -2491,23 +2491,23 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, pci_set_master(pdev); - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(36)); - if (!err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(36)); - if (err) { - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (!err) - err = pci_set_consistent_dma_mask(pdev, + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(36)); + if (!ret) + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(36)); + if (ret) { + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (!ret) + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); /* both attempts failed: */ - if (err) { + if (ret) { dev_err(&pdev->dev, "No suitable DMA available\n"); goto out_pci_disable_device; } } - err = pci_request_regions(pdev, DRV_NAME); - if (err) { + ret = pci_request_regions(pdev, DRV_NAME); + if (ret) { dev_err(&pdev->dev, "pci_request_regions failed\n"); goto out_pci_disable_device; } @@ -2515,7 +2515,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans_pcie->hw_base = pci_ioremap_bar(pdev, 0); if (!trans_pcie->hw_base) { dev_err(&pdev->dev, "pci_ioremap_bar failed\n"); - err = -ENODEV; + ret = -ENODEV; goto out_pci_release_regions; } @@ -2527,9 +2527,9 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans_pcie->pci_dev = pdev; iwl_disable_interrupts(trans); - err = pci_enable_msi(pdev); - if (err) { - dev_err(&pdev->dev, "pci_enable_msi failed(0X%x)\n", err); + ret = pci_enable_msi(pdev); + if (ret) { + dev_err(&pdev->dev, "pci_enable_msi failed(0X%x)\n", ret); /* enable rfkill interrupt: hw bug w/a */ pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd); if (pci_cmd & PCI_COMMAND_INTX_DISABLE) { @@ -2547,7 +2547,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, */ if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) { unsigned long flags; - int ret; trans->hw_rev = (trans->hw_rev & 0xfff0) | (CSR_HW_REV_STEP(trans->hw_rev << 2) << 2); @@ -2591,13 +2590,14 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, /* Initialize the wait queue for commands */ init_waitqueue_head(&trans_pcie->wait_command_queue); - if (iwl_pcie_alloc_ict(trans)) + ret = iwl_pcie_alloc_ict(trans); + if (ret) goto out_pci_disable_msi; - err = request_threaded_irq(pdev->irq, iwl_pcie_isr, + ret = request_threaded_irq(pdev->irq, iwl_pcie_isr, iwl_pcie_irq_handler, IRQF_SHARED, DRV_NAME, trans); - if (err) { + if (ret) { IWL_ERR(trans, "Error allocating IRQ %d\n", pdev->irq); goto out_free_ict; } @@ -2617,5 +2617,5 @@ out_pci_disable_device: pci_disable_device(pdev); out_no_pci: iwl_trans_free(trans); - return ERR_PTR(err); + return ERR_PTR(ret); } From 7865598ec24ab4162d976d0bd75a9b2a2f58cd64 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 21 Jun 2015 19:47:46 +0200 Subject: [PATCH 05/84] ath9k_hw: fix device ID check for AR956x Because of the missing return, the macVersion value was being overwritten with an invalid register read Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 5e15e8e10ed3..a31a6804dc34 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -279,6 +279,7 @@ static void ath9k_hw_read_revisions(struct ath_hw *ah) return; case AR9300_DEVID_QCA956X: ah->hw_version.macVersion = AR_SREV_VERSION_9561; + return; } val = REG_READ(ah, AR_SREV) & AR_SREV_ID; From f9e5554cd8ca1d1212ec922755b397a20f737923 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 4 Jun 2015 11:09:47 +0300 Subject: [PATCH 06/84] iwlwifi: pcie: prepare the device before accessing it For 8000 series, we need to access the device to know what firmware to load. Before we do so, we need to prepare the device otherwise we might not be able to access the hardware. Fixes: c278754a21e6 ("iwlwifi: mvm: support family 8000 B2/C steps") CC: [4.1] Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/trans.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index fe61d0aa5550..6203c4ad9bba 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -2551,6 +2551,12 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans->hw_rev = (trans->hw_rev & 0xfff0) | (CSR_HW_REV_STEP(trans->hw_rev << 2) << 2); + ret = iwl_pcie_prepare_card_hw(trans); + if (ret) { + IWL_WARN(trans, "Exit HW not ready\n"); + goto out_pci_disable_msi; + } + /* * in-order to recognize C step driver should read chip version * id located at the AUX bus MISC address space. From 00fd233ac491708d1f64b7176be73c84a18a54a2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Jun 2015 16:04:01 +0200 Subject: [PATCH 07/84] iwlwifi: mvm: check time-event vif to avoid bad deletion The time event is initialized relatively late in interface (mvmvif) initialization, so it's possible to fail before that happens. As a consequence, the driver will crash if it ever tries to delete this time event in case initialization was unsuccessful. Avoid this by using the time event's vif pointer to indicate validity. The vif pointer is != NULL whenever the id is != TE_MAX, except for this special error case where the vif pointer will have the correct property (as the whole memory is cleared on allocation) whereas the id is 0, causing a crash in trying to delete the time event from the list. Signed-off-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/time-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/time-event.c b/drivers/net/wireless/iwlwifi/mvm/time-event.c index d24b6a83e68c..e472729e5f14 100644 --- a/drivers/net/wireless/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/iwlwifi/mvm/time-event.c @@ -86,7 +86,7 @@ void iwl_mvm_te_clear_data(struct iwl_mvm *mvm, { lockdep_assert_held(&mvm->time_event_lock); - if (te_data->id == TE_MAX) + if (!te_data->vif) return; list_del(&te_data->list); From b8eee757077e2aced778cfad8c1d989e2deec584 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Mon, 22 Jun 2015 13:44:05 +0300 Subject: [PATCH 08/84] iwlwifi: edit the 3165 series and 8000 series PCI IDs Add new 3165 devices support. Add one new 8000 series device support. Remove support for 0x0000, 0xC030 and 0xD030 sub-system IDs in the 8000 series. Signed-off-by: Oren Givon Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index 2ed1e4d2774d..9f65c1cff1b1 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -368,12 +368,14 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 3165 Series */ {IWL_PCI_DEVICE(0x3165, 0x4010, iwl3165_2ac_cfg)}, {IWL_PCI_DEVICE(0x3165, 0x4012, iwl3165_2ac_cfg)}, + {IWL_PCI_DEVICE(0x3166, 0x4212, iwl3165_2ac_cfg)}, {IWL_PCI_DEVICE(0x3165, 0x4410, iwl3165_2ac_cfg)}, {IWL_PCI_DEVICE(0x3165, 0x4510, iwl3165_2ac_cfg)}, {IWL_PCI_DEVICE(0x3165, 0x4110, iwl3165_2ac_cfg)}, {IWL_PCI_DEVICE(0x3166, 0x4310, iwl3165_2ac_cfg)}, {IWL_PCI_DEVICE(0x3166, 0x4210, iwl3165_2ac_cfg)}, {IWL_PCI_DEVICE(0x3165, 0x8010, iwl3165_2ac_cfg)}, + {IWL_PCI_DEVICE(0x3165, 0x8110, iwl3165_2ac_cfg)}, /* 7265 Series */ {IWL_PCI_DEVICE(0x095A, 0x5010, iwl7265_2ac_cfg)}, @@ -426,9 +428,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24F4, 0x1130, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x1030, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xC010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0xC110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xD010, iwl8260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x24F4, 0xC030, iwl8260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x24F4, 0xD030, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xC050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xD050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x8010, iwl8260_2ac_cfg)}, From 8465fe6ac5cc054a7969e78e56aea40cd7808540 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Sun, 21 Jun 2015 12:06:13 +0300 Subject: [PATCH 09/84] iwlwifi: mvm: Add preemptive flag to scheulded scan Add preemptive flag to scheduled scan command flags. Without this flag, all scan requests after scheduled scan was started will be delayed until scheduled scan stops. As a result, P2P_FIND will be blocked while scheduled scan is active. This flag was omitted during refactoring. Signed-off-by: Avraham Stern Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h | 3 ++- drivers/net/wireless/iwlwifi/mvm/scan.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h index 5e4cbdb44c60..737774a01c74 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h +++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h @@ -660,7 +660,8 @@ struct iwl_scan_config { * iwl_umac_scan_flags *@IWL_UMAC_SCAN_FLAG_PREEMPTIVE: scan process triggered by this scan request * can be preempted by other scan requests with higher priority. - * The low priority scan is aborted. + * The low priority scan will be resumed when the higher proirity scan is + * completed. *@IWL_UMAC_SCAN_FLAG_START_NOTIF: notification will be sent to the driver * when scan starts. */ diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index 5de144968723..5000bfcded61 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -1109,6 +1109,9 @@ static int iwl_mvm_scan_umac(struct iwl_mvm *mvm, struct ieee80211_vif *vif, cmd->uid = cpu_to_le32(uid); cmd->general_flags = cpu_to_le32(iwl_mvm_scan_umac_flags(mvm, params)); + if (type == IWL_MVM_SCAN_SCHED) + cmd->flags = cpu_to_le32(IWL_UMAC_SCAN_FLAG_PREEMPTIVE); + if (iwl_mvm_scan_use_ebs(mvm, vif, n_iterations)) cmd->channel_flags = IWL_SCAN_CHANNEL_FLAG_EBS | IWL_SCAN_CHANNEL_FLAG_EBS_ACCURATE | From 255ba06533f09875973d6b400f9c5b88065938df Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sat, 11 Jul 2015 22:30:49 +0300 Subject: [PATCH 10/84] Revert "iwlwifi: pcie: New RBD allocation model" This reverts commit 5f17570354f91275b0a37a4da33d29a2ab57d32e. This patch introduced a high latency in buffer allocation under extreme load. This latency caused a firmwre crash. The same scenario works fine with this patch reverted. Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/iwl-fh.h | 6 + drivers/net/wireless/iwlwifi/pcie/internal.h | 51 +-- drivers/net/wireless/iwlwifi/pcie/rx.c | 414 ++++--------------- 3 files changed, 97 insertions(+), 374 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-fh.h b/drivers/net/wireless/iwlwifi/iwl-fh.h index d56064861a9c..d45dc021cda2 100644 --- a/drivers/net/wireless/iwlwifi/iwl-fh.h +++ b/drivers/net/wireless/iwlwifi/iwl-fh.h @@ -438,6 +438,12 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(unsigned int chnl) #define RX_QUEUE_MASK 255 #define RX_QUEUE_SIZE_LOG 8 +/* + * RX related structures and functions + */ +#define RX_FREE_BUFFERS 64 +#define RX_LOW_WATERMARK 8 + /** * struct iwl_rb_status - reserve buffer status * host memory mapped FH registers diff --git a/drivers/net/wireless/iwlwifi/pcie/internal.h b/drivers/net/wireless/iwlwifi/pcie/internal.h index 31f72a61cc3f..376b84e54ad7 100644 --- a/drivers/net/wireless/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/iwlwifi/pcie/internal.h @@ -44,15 +44,6 @@ #include "iwl-io.h" #include "iwl-op-mode.h" -/* - * RX related structures and functions - */ -#define RX_NUM_QUEUES 1 -#define RX_POST_REQ_ALLOC 2 -#define RX_CLAIM_REQ_ALLOC 8 -#define RX_POOL_SIZE ((RX_CLAIM_REQ_ALLOC - RX_POST_REQ_ALLOC) * RX_NUM_QUEUES) -#define RX_LOW_WATERMARK 8 - struct iwl_host_cmd; /*This file includes the declaration that are internal to the @@ -86,29 +77,29 @@ struct isr_statistics { * struct iwl_rxq - Rx queue * @bd: driver's pointer to buffer of receive buffer descriptors (rbd) * @bd_dma: bus address of buffer of receive buffer descriptors (rbd) + * @pool: + * @queue: * @read: Shared index to newest available Rx buffer * @write: Shared index to oldest written Rx packet * @free_count: Number of pre-allocated buffers in rx_free - * @used_count: Number of RBDs handled to allocator to use for allocation * @write_actual: - * @rx_free: list of RBDs with allocated RB ready for use - * @rx_used: list of RBDs with no RB attached + * @rx_free: list of free SKBs for use + * @rx_used: List of Rx buffers with no SKB * @need_update: flag to indicate we need to update read/write index * @rb_stts: driver's pointer to receive buffer status * @rb_stts_dma: bus address of receive buffer status * @lock: - * @pool: initial pool of iwl_rx_mem_buffer for the queue - * @queue: actual rx queue * * NOTE: rx_free and rx_used are used as a FIFO for iwl_rx_mem_buffers */ struct iwl_rxq { __le32 *bd; dma_addr_t bd_dma; + struct iwl_rx_mem_buffer pool[RX_QUEUE_SIZE + RX_FREE_BUFFERS]; + struct iwl_rx_mem_buffer *queue[RX_QUEUE_SIZE]; u32 read; u32 write; u32 free_count; - u32 used_count; u32 write_actual; struct list_head rx_free; struct list_head rx_used; @@ -116,32 +107,6 @@ struct iwl_rxq { struct iwl_rb_status *rb_stts; dma_addr_t rb_stts_dma; spinlock_t lock; - struct iwl_rx_mem_buffer pool[RX_QUEUE_SIZE]; - struct iwl_rx_mem_buffer *queue[RX_QUEUE_SIZE]; -}; - -/** - * struct iwl_rb_allocator - Rx allocator - * @pool: initial pool of allocator - * @req_pending: number of requests the allcator had not processed yet - * @req_ready: number of requests honored and ready for claiming - * @rbd_allocated: RBDs with pages allocated and ready to be handled to - * the queue. This is a list of &struct iwl_rx_mem_buffer - * @rbd_empty: RBDs with no page attached for allocator use. This is a list - * of &struct iwl_rx_mem_buffer - * @lock: protects the rbd_allocated and rbd_empty lists - * @alloc_wq: work queue for background calls - * @rx_alloc: work struct for background calls - */ -struct iwl_rb_allocator { - struct iwl_rx_mem_buffer pool[RX_POOL_SIZE]; - atomic_t req_pending; - atomic_t req_ready; - struct list_head rbd_allocated; - struct list_head rbd_empty; - spinlock_t lock; - struct workqueue_struct *alloc_wq; - struct work_struct rx_alloc; }; struct iwl_dma_ptr { @@ -285,7 +250,7 @@ iwl_pcie_get_scratchbuf_dma(struct iwl_txq *txq, int idx) /** * struct iwl_trans_pcie - PCIe transport specific data * @rxq: all the RX queue data - * @rba: allocator for RX replenishing + * @rx_replenish: work that will be called when buffers need to be allocated * @drv - pointer to iwl_drv * @trans: pointer to the generic transport area * @scd_base_addr: scheduler sram base address in SRAM @@ -308,7 +273,7 @@ iwl_pcie_get_scratchbuf_dma(struct iwl_txq *txq, int idx) */ struct iwl_trans_pcie { struct iwl_rxq rxq; - struct iwl_rb_allocator rba; + struct work_struct rx_replenish; struct iwl_trans *trans; struct iwl_drv *drv; diff --git a/drivers/net/wireless/iwlwifi/pcie/rx.c b/drivers/net/wireless/iwlwifi/pcie/rx.c index a3fbaa0ef5e0..adad8d0fae7f 100644 --- a/drivers/net/wireless/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/iwlwifi/pcie/rx.c @@ -1,7 +1,7 @@ /****************************************************************************** * * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH * * Portions of this file are derived from the ipw3945 project, as well * as portions of the ieee80211 subsystem header files. @@ -74,29 +74,16 @@ * resets the Rx queue buffers with new memory. * * The management in the driver is as follows: - * + A list of pre-allocated RBDs is stored in iwl->rxq->rx_free. - * When the interrupt handler is called, the request is processed. - * The page is either stolen - transferred to the upper layer - * or reused - added immediately to the iwl->rxq->rx_free list. - * + When the page is stolen - the driver updates the matching queue's used - * count, detaches the RBD and transfers it to the queue used list. - * When there are two used RBDs - they are transferred to the allocator empty - * list. Work is then scheduled for the allocator to start allocating - * eight buffers. - * When there are another 6 used RBDs - they are transferred to the allocator - * empty list and the driver tries to claim the pre-allocated buffers and - * add them to iwl->rxq->rx_free. If it fails - it continues to claim them - * until ready. - * When there are 8+ buffers in the free list - either from allocation or from - * 8 reused unstolen pages - restock is called to update the FW and indexes. - * + In order to make sure the allocator always has RBDs to use for allocation - * the allocator has initial pool in the size of num_queues*(8-2) - the - * maximum missing RBDs per allocation request (request posted with 2 - * empty RBDs, there is no guarantee when the other 6 RBDs are supplied). - * The queues supplies the recycle of the rest of the RBDs. + * + A list of pre-allocated SKBs is stored in iwl->rxq->rx_free. When + * iwl->rxq->free_count drops to or below RX_LOW_WATERMARK, work is scheduled + * to replenish the iwl->rxq->rx_free. + * + In iwl_pcie_rx_replenish (scheduled) if 'processed' != 'read' then the + * iwl->rxq is replenished and the READ INDEX is updated (updating the + * 'processed' and 'read' driver indexes as well) * + A received packet is processed and handed to the kernel network stack, * detached from the iwl->rxq. The driver 'processed' index is updated. - * + If there are no allocated buffers in iwl->rxq->rx_free, + * + The Host/Firmware iwl->rxq is replenished at irq thread time from the + * rx_free list. If there are no allocated buffers in iwl->rxq->rx_free, * the READ INDEX is not incremented and iwl->status(RX_STALLED) is set. * If there were enough free buffers and RX_STALLED is set it is cleared. * @@ -105,32 +92,18 @@ * * iwl_rxq_alloc() Allocates rx_free * iwl_pcie_rx_replenish() Replenishes rx_free list from rx_used, and calls - * iwl_pcie_rxq_restock. - * Used only during initialization. + * iwl_pcie_rxq_restock * iwl_pcie_rxq_restock() Moves available buffers from rx_free into Rx * queue, updates firmware pointers, and updates - * the WRITE index. - * iwl_pcie_rx_allocator() Background work for allocating pages. + * the WRITE index. If insufficient rx_free buffers + * are available, schedules iwl_pcie_rx_replenish * * -- enable interrupts -- * ISR - iwl_rx() Detach iwl_rx_mem_buffers from pool up to the * READ INDEX, detaching the SKB from the pool. * Moves the packet buffer from queue to rx_used. - * Posts and claims requests to the allocator. * Calls iwl_pcie_rxq_restock to refill any empty * slots. - * - * RBD life-cycle: - * - * Init: - * rxq.pool -> rxq.rx_used -> rxq.rx_free -> rxq.queue - * - * Regular Receive interrupt: - * Page Stolen: - * rxq.queue -> rxq.rx_used -> allocator.rbd_empty -> - * allocator.rbd_allocated -> rxq.rx_free -> rxq.queue - * Page not Stolen: - * rxq.queue -> rxq.rx_free -> rxq.queue * ... * */ @@ -267,6 +240,10 @@ static void iwl_pcie_rxq_restock(struct iwl_trans *trans) rxq->free_count--; } spin_unlock(&rxq->lock); + /* If the pre-allocated buffer pool is dropping low, schedule to + * refill it */ + if (rxq->free_count <= RX_LOW_WATERMARK) + schedule_work(&trans_pcie->rx_replenish); /* If we've added more space for the firmware to place data, tell it. * Increment device's write pointer in multiples of 8. */ @@ -277,44 +254,6 @@ static void iwl_pcie_rxq_restock(struct iwl_trans *trans) } } -/* - * iwl_pcie_rx_alloc_page - allocates and returns a page. - * - */ -static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans) -{ - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; - struct page *page; - gfp_t gfp_mask = GFP_KERNEL; - - if (rxq->free_count > RX_LOW_WATERMARK) - gfp_mask |= __GFP_NOWARN; - - if (trans_pcie->rx_page_order > 0) - gfp_mask |= __GFP_COMP; - - /* Alloc a new receive buffer */ - page = alloc_pages(gfp_mask, trans_pcie->rx_page_order); - if (!page) { - if (net_ratelimit()) - IWL_DEBUG_INFO(trans, "alloc_pages failed, order: %d\n", - trans_pcie->rx_page_order); - /* Issue an error if the hardware has consumed more than half - * of its free buffer list and we don't have enough - * pre-allocated buffers. -` */ - if (rxq->free_count <= RX_LOW_WATERMARK && - iwl_rxq_space(rxq) > (RX_QUEUE_SIZE / 2) && - net_ratelimit()) - IWL_CRIT(trans, - "Failed to alloc_pages with GFP_KERNEL. Only %u free buffers remaining.\n", - rxq->free_count); - return NULL; - } - return page; -} - /* * iwl_pcie_rxq_alloc_rbs - allocate a page for each used RBD * @@ -324,12 +263,13 @@ static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans) * iwl_pcie_rxq_restock. The latter function will update the HW to use the newly * allocated buffers. */ -static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans) +static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_rxq *rxq = &trans_pcie->rxq; struct iwl_rx_mem_buffer *rxb; struct page *page; + gfp_t gfp_mask = priority; while (1) { spin_lock(&rxq->lock); @@ -339,10 +279,32 @@ static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans) } spin_unlock(&rxq->lock); + if (rxq->free_count > RX_LOW_WATERMARK) + gfp_mask |= __GFP_NOWARN; + + if (trans_pcie->rx_page_order > 0) + gfp_mask |= __GFP_COMP; + /* Alloc a new receive buffer */ - page = iwl_pcie_rx_alloc_page(trans); - if (!page) + page = alloc_pages(gfp_mask, trans_pcie->rx_page_order); + if (!page) { + if (net_ratelimit()) + IWL_DEBUG_INFO(trans, "alloc_pages failed, " + "order: %d\n", + trans_pcie->rx_page_order); + + if ((rxq->free_count <= RX_LOW_WATERMARK) && + net_ratelimit()) + IWL_CRIT(trans, "Failed to alloc_pages with %s." + "Only %u free buffers remaining.\n", + priority == GFP_ATOMIC ? + "GFP_ATOMIC" : "GFP_KERNEL", + rxq->free_count); + /* We don't reschedule replenish work here -- we will + * call the restock method and if it still needs + * more buffers it will schedule replenish */ return; + } spin_lock(&rxq->lock); @@ -393,7 +355,7 @@ static void iwl_pcie_rxq_free_rbs(struct iwl_trans *trans) lockdep_assert_held(&rxq->lock); - for (i = 0; i < RX_QUEUE_SIZE; i++) { + for (i = 0; i < RX_FREE_BUFFERS + RX_QUEUE_SIZE; i++) { if (!rxq->pool[i].page) continue; dma_unmap_page(trans->dev, rxq->pool[i].page_dma, @@ -410,144 +372,32 @@ static void iwl_pcie_rxq_free_rbs(struct iwl_trans *trans) * When moving to rx_free an page is allocated for the slot. * * Also restock the Rx queue via iwl_pcie_rxq_restock. - * This is called only during initialization + * This is called as a scheduled work item (except for during initialization) */ -static void iwl_pcie_rx_replenish(struct iwl_trans *trans) +static void iwl_pcie_rx_replenish(struct iwl_trans *trans, gfp_t gfp) { - iwl_pcie_rxq_alloc_rbs(trans); + iwl_pcie_rxq_alloc_rbs(trans, gfp); iwl_pcie_rxq_restock(trans); } -/* - * iwl_pcie_rx_allocator - Allocates pages in the background for RX queues - * - * Allocates for each received request 8 pages - * Called as a scheduled work item. - */ -static void iwl_pcie_rx_allocator(struct iwl_trans *trans) +static void iwl_pcie_rx_replenish_work(struct work_struct *data) { - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rb_allocator *rba = &trans_pcie->rba; - - while (atomic_read(&rba->req_pending)) { - int i; - struct list_head local_empty; - struct list_head local_allocated; - - INIT_LIST_HEAD(&local_allocated); - spin_lock(&rba->lock); - /* swap out the entire rba->rbd_empty to a local list */ - list_replace_init(&rba->rbd_empty, &local_empty); - spin_unlock(&rba->lock); - - for (i = 0; i < RX_CLAIM_REQ_ALLOC;) { - struct iwl_rx_mem_buffer *rxb; - struct page *page; - - /* List should never be empty - each reused RBD is - * returned to the list, and initial pool covers any - * possible gap between the time the page is allocated - * to the time the RBD is added. - */ - BUG_ON(list_empty(&local_empty)); - /* Get the first rxb from the rbd list */ - rxb = list_first_entry(&local_empty, - struct iwl_rx_mem_buffer, list); - BUG_ON(rxb->page); - - /* Alloc a new receive buffer */ - page = iwl_pcie_rx_alloc_page(trans); - if (!page) - continue; - rxb->page = page; - - /* Get physical address of the RB */ - rxb->page_dma = dma_map_page(trans->dev, page, 0, - PAGE_SIZE << trans_pcie->rx_page_order, - DMA_FROM_DEVICE); - if (dma_mapping_error(trans->dev, rxb->page_dma)) { - rxb->page = NULL; - __free_pages(page, trans_pcie->rx_page_order); - continue; - } - /* dma address must be no more than 36 bits */ - BUG_ON(rxb->page_dma & ~DMA_BIT_MASK(36)); - /* and also 256 byte aligned! */ - BUG_ON(rxb->page_dma & DMA_BIT_MASK(8)); - - /* move the allocated entry to the out list */ - list_move(&rxb->list, &local_allocated); - i++; - } - - spin_lock(&rba->lock); - /* add the allocated rbds to the allocator allocated list */ - list_splice_tail(&local_allocated, &rba->rbd_allocated); - /* add the unused rbds back to the allocator empty list */ - list_splice_tail(&local_empty, &rba->rbd_empty); - spin_unlock(&rba->lock); - - atomic_dec(&rba->req_pending); - atomic_inc(&rba->req_ready); - } -} - -/* - * iwl_pcie_rx_allocator_get - Returns the pre-allocated pages -.* -.* Called by queue when the queue posted allocation request and - * has freed 8 RBDs in order to restock itself. - */ -static int iwl_pcie_rx_allocator_get(struct iwl_trans *trans, - struct iwl_rx_mem_buffer - *out[RX_CLAIM_REQ_ALLOC]) -{ - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rb_allocator *rba = &trans_pcie->rba; - int i; - - if (atomic_dec_return(&rba->req_ready) < 0) { - atomic_inc(&rba->req_ready); - IWL_DEBUG_RX(trans, - "Allocation request not ready, pending requests = %d\n", - atomic_read(&rba->req_pending)); - return -ENOMEM; - } - - spin_lock(&rba->lock); - for (i = 0; i < RX_CLAIM_REQ_ALLOC; i++) { - /* Get next free Rx buffer, remove it from free list */ - out[i] = list_first_entry(&rba->rbd_allocated, - struct iwl_rx_mem_buffer, list); - list_del(&out[i]->list); - } - spin_unlock(&rba->lock); - - return 0; -} - -static void iwl_pcie_rx_allocator_work(struct work_struct *data) -{ - struct iwl_rb_allocator *rba_p = - container_of(data, struct iwl_rb_allocator, rx_alloc); struct iwl_trans_pcie *trans_pcie = - container_of(rba_p, struct iwl_trans_pcie, rba); + container_of(data, struct iwl_trans_pcie, rx_replenish); - iwl_pcie_rx_allocator(trans_pcie->trans); + iwl_pcie_rx_replenish(trans_pcie->trans, GFP_KERNEL); } static int iwl_pcie_rx_alloc(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_rxq *rxq = &trans_pcie->rxq; - struct iwl_rb_allocator *rba = &trans_pcie->rba; struct device *dev = trans->dev; memset(&trans_pcie->rxq, 0, sizeof(trans_pcie->rxq)); spin_lock_init(&rxq->lock); - spin_lock_init(&rba->lock); if (WARN_ON(rxq->bd || rxq->rb_stts)) return -EINVAL; @@ -637,49 +487,15 @@ static void iwl_pcie_rx_init_rxb_lists(struct iwl_rxq *rxq) INIT_LIST_HEAD(&rxq->rx_free); INIT_LIST_HEAD(&rxq->rx_used); rxq->free_count = 0; - rxq->used_count = 0; - for (i = 0; i < RX_QUEUE_SIZE; i++) + for (i = 0; i < RX_FREE_BUFFERS + RX_QUEUE_SIZE; i++) list_add(&rxq->pool[i].list, &rxq->rx_used); } -static void iwl_pcie_rx_init_rba(struct iwl_rb_allocator *rba) -{ - int i; - - lockdep_assert_held(&rba->lock); - - INIT_LIST_HEAD(&rba->rbd_allocated); - INIT_LIST_HEAD(&rba->rbd_empty); - - for (i = 0; i < RX_POOL_SIZE; i++) - list_add(&rba->pool[i].list, &rba->rbd_empty); -} - -static void iwl_pcie_rx_free_rba(struct iwl_trans *trans) -{ - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rb_allocator *rba = &trans_pcie->rba; - int i; - - lockdep_assert_held(&rba->lock); - - for (i = 0; i < RX_POOL_SIZE; i++) { - if (!rba->pool[i].page) - continue; - dma_unmap_page(trans->dev, rba->pool[i].page_dma, - PAGE_SIZE << trans_pcie->rx_page_order, - DMA_FROM_DEVICE); - __free_pages(rba->pool[i].page, trans_pcie->rx_page_order); - rba->pool[i].page = NULL; - } -} - int iwl_pcie_rx_init(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_rxq *rxq = &trans_pcie->rxq; - struct iwl_rb_allocator *rba = &trans_pcie->rba; int i, err; if (!rxq->bd) { @@ -687,21 +503,11 @@ int iwl_pcie_rx_init(struct iwl_trans *trans) if (err) return err; } - if (!rba->alloc_wq) - rba->alloc_wq = alloc_workqueue("rb_allocator", - WQ_HIGHPRI | WQ_UNBOUND, 1); - INIT_WORK(&rba->rx_alloc, iwl_pcie_rx_allocator_work); - - spin_lock(&rba->lock); - atomic_set(&rba->req_pending, 0); - atomic_set(&rba->req_ready, 0); - /* free all first - we might be reconfigured for a different size */ - iwl_pcie_rx_free_rba(trans); - iwl_pcie_rx_init_rba(rba); - spin_unlock(&rba->lock); spin_lock(&rxq->lock); + INIT_WORK(&trans_pcie->rx_replenish, iwl_pcie_rx_replenish_work); + /* free all first - we might be reconfigured for a different size */ iwl_pcie_rxq_free_rbs(trans); iwl_pcie_rx_init_rxb_lists(rxq); @@ -716,7 +522,7 @@ int iwl_pcie_rx_init(struct iwl_trans *trans) memset(rxq->rb_stts, 0, sizeof(*rxq->rb_stts)); spin_unlock(&rxq->lock); - iwl_pcie_rx_replenish(trans); + iwl_pcie_rx_replenish(trans, GFP_KERNEL); iwl_pcie_rx_hw_init(trans, rxq); @@ -731,7 +537,6 @@ void iwl_pcie_rx_free(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_rxq *rxq = &trans_pcie->rxq; - struct iwl_rb_allocator *rba = &trans_pcie->rba; /*if rxq->bd is NULL, it means that nothing has been allocated, * exit now */ @@ -740,15 +545,7 @@ void iwl_pcie_rx_free(struct iwl_trans *trans) return; } - cancel_work_sync(&rba->rx_alloc); - if (rba->alloc_wq) { - destroy_workqueue(rba->alloc_wq); - rba->alloc_wq = NULL; - } - - spin_lock(&rba->lock); - iwl_pcie_rx_free_rba(trans); - spin_unlock(&rba->lock); + cancel_work_sync(&trans_pcie->rx_replenish); spin_lock(&rxq->lock); iwl_pcie_rxq_free_rbs(trans); @@ -769,43 +566,6 @@ void iwl_pcie_rx_free(struct iwl_trans *trans) rxq->rb_stts = NULL; } -/* - * iwl_pcie_rx_reuse_rbd - Recycle used RBDs - * - * Called when a RBD can be reused. The RBD is transferred to the allocator. - * When there are 2 empty RBDs - a request for allocation is posted - */ -static void iwl_pcie_rx_reuse_rbd(struct iwl_trans *trans, - struct iwl_rx_mem_buffer *rxb, - struct iwl_rxq *rxq) -{ - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rb_allocator *rba = &trans_pcie->rba; - - /* Count the used RBDs */ - rxq->used_count++; - - /* Move the RBD to the used list, will be moved to allocator in batches - * before claiming or posting a request*/ - list_add_tail(&rxb->list, &rxq->rx_used); - - /* If we have RX_POST_REQ_ALLOC new released rx buffers - - * issue a request for allocator. Modulo RX_CLAIM_REQ_ALLOC is - * used for the case we failed to claim RX_CLAIM_REQ_ALLOC, - * after but we still need to post another request. - */ - if ((rxq->used_count % RX_CLAIM_REQ_ALLOC) == RX_POST_REQ_ALLOC) { - /* Move the 2 RBDs to the allocator ownership. - Allocator has another 6 from pool for the request completion*/ - spin_lock(&rba->lock); - list_splice_tail_init(&rxq->rx_used, &rba->rbd_empty); - spin_unlock(&rba->lock); - - atomic_inc(&rba->req_pending); - queue_work(rba->alloc_wq, &rba->rx_alloc); - } -} - static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, struct iwl_rx_mem_buffer *rxb) { @@ -928,13 +688,13 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, */ __free_pages(rxb->page, trans_pcie->rx_page_order); rxb->page = NULL; - iwl_pcie_rx_reuse_rbd(trans, rxb, rxq); + list_add_tail(&rxb->list, &rxq->rx_used); } else { list_add_tail(&rxb->list, &rxq->rx_free); rxq->free_count++; } } else - iwl_pcie_rx_reuse_rbd(trans, rxb, rxq); + list_add_tail(&rxb->list, &rxq->rx_used); } /* @@ -944,7 +704,10 @@ static void iwl_pcie_rx_handle(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_rxq *rxq = &trans_pcie->rxq; - u32 r, i, j; + u32 r, i; + u8 fill_rx = 0; + u32 count = 8; + int total_empty; restart: spin_lock(&rxq->lock); @@ -957,6 +720,14 @@ restart: if (i == r) IWL_DEBUG_RX(trans, "HW = SW = %d\n", r); + /* calculate total frames need to be restock after handling RX */ + total_empty = r - rxq->write_actual; + if (total_empty < 0) + total_empty += RX_QUEUE_SIZE; + + if (total_empty > (RX_QUEUE_SIZE / 2)) + fill_rx = 1; + while (i != r) { struct iwl_rx_mem_buffer *rxb; @@ -968,48 +739,29 @@ restart: iwl_pcie_rx_handle_rb(trans, rxb); i = (i + 1) & RX_QUEUE_MASK; - - /* If we have RX_CLAIM_REQ_ALLOC released rx buffers - - * try to claim the pre-allocated buffers from the allocator */ - if (rxq->used_count >= RX_CLAIM_REQ_ALLOC) { - struct iwl_rb_allocator *rba = &trans_pcie->rba; - struct iwl_rx_mem_buffer *out[RX_CLAIM_REQ_ALLOC]; - - /* Add the remaining 6 empty RBDs for allocator use */ - spin_lock(&rba->lock); - list_splice_tail_init(&rxq->rx_used, &rba->rbd_empty); - spin_unlock(&rba->lock); - - /* If not ready - continue, will try to reclaim later. - * No need to reschedule work - allocator exits only on - * success */ - if (!iwl_pcie_rx_allocator_get(trans, out)) { - /* If success - then RX_CLAIM_REQ_ALLOC - * buffers were retrieved and should be added - * to free list */ - rxq->used_count -= RX_CLAIM_REQ_ALLOC; - for (j = 0; j < RX_CLAIM_REQ_ALLOC; j++) { - list_add_tail(&out[j]->list, - &rxq->rx_free); - rxq->free_count++; - } + /* If there are a lot of unused frames, + * restock the Rx queue so ucode wont assert. */ + if (fill_rx) { + count++; + if (count >= 8) { + rxq->read = i; + spin_unlock(&rxq->lock); + iwl_pcie_rx_replenish(trans, GFP_ATOMIC); + count = 0; + goto restart; } } - /* handle restock for two cases: - * - we just pulled buffers from the allocator - * - we have 8+ unstolen pages accumulated */ - if (rxq->free_count >= RX_CLAIM_REQ_ALLOC) { - rxq->read = i; - spin_unlock(&rxq->lock); - iwl_pcie_rxq_restock(trans); - goto restart; - } } /* Backtrack one entry */ rxq->read = i; spin_unlock(&rxq->lock); + if (fill_rx) + iwl_pcie_rx_replenish(trans, GFP_ATOMIC); + else + iwl_pcie_rxq_restock(trans); + if (trans_pcie->napi.poll) napi_gro_flush(&trans_pcie->napi, false); } From be88a1ada9b97bb016196b7f4a1fc2fe2f798529 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Wed, 1 Jul 2015 17:28:34 +0300 Subject: [PATCH 11/84] iwlwifi: nvm: remove mac address byte swapping in 8000 family This fixes the byte order copying in the MAO (Mac Override Section) section from the PNVM, as the byte swapping is not required anymore in the 8000 family. Due to the byte swapping, the driver was reporting an incorrect MAC adddress. CC: [4.1] Signed-off-by: Liad Kaufman Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/iwl-nvm-parse.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c index 80fefe7d7b8c..3b8e85e51002 100644 --- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c @@ -540,13 +540,11 @@ static void iwl_set_hw_address_family_8000(struct device *dev, hw_addr = (const u8 *)(mac_override + MAC_ADDRESS_OVERRIDE_FAMILY_8000); - /* The byte order is little endian 16 bit, meaning 214365 */ - data->hw_addr[0] = hw_addr[1]; - data->hw_addr[1] = hw_addr[0]; - data->hw_addr[2] = hw_addr[3]; - data->hw_addr[3] = hw_addr[2]; - data->hw_addr[4] = hw_addr[5]; - data->hw_addr[5] = hw_addr[4]; + /* + * Store the MAC address from MAO section. + * No byte swapping is required in MAO section + */ + memcpy(data->hw_addr, hw_addr, ETH_ALEN); /* * Force the use of the OTP MAC address in case of reserved MAC From 7bee8b08c428b63aa4a3765bb907602e36355378 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 14 Jul 2015 16:25:30 -0400 Subject: [PATCH 12/84] Bluetooth: btbcm: allow btbcm_read_verbose_config to fail on Apple Commit 1c8ba6d013 moved around the setup code for broadcomm chips, and also added btbcm_read_verbose_config() to read extra information about the hardware. It's returning errors on some macbooks: Bluetooth: hci0: BCM: Read verbose config info failed (-16) Which makes us error out of the setup function. Since this probe isn't critical to operate the chip, this patch just changes things to carry on when it fails. Signed-off-by: Chris Mason Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org # v4.1 --- drivers/bluetooth/btbcm.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 1e1a4323a71f..9ceb8ac68fdc 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -472,12 +472,11 @@ int btbcm_setup_apple(struct hci_dev *hdev) /* Read Verbose Config Version Info */ skb = btbcm_read_verbose_config(hdev); - if (IS_ERR(skb)) - return PTR_ERR(skb); - - BT_INFO("%s: BCM: chip id %u build %4.4u", hdev->name, skb->data[1], - get_unaligned_le16(skb->data + 5)); - kfree_skb(skb); + if (!IS_ERR(skb)) { + BT_INFO("%s: BCM: chip id %u build %4.4u", hdev->name, skb->data[1], + get_unaligned_le16(skb->data + 5)); + kfree_skb(skb); + } set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); From 50c2e4dd6749725338621fff456b26d3a592259f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 12 Jul 2015 01:20:55 +0300 Subject: [PATCH 13/84] net/xen-netback: off by one in BUG_ON() condition The > should be >=. I also added spaces around the '-' operations so the code is a little more consistent and matches the condition better. Fixes: f53c3fe8dad7 ('xen-netback: Introduce TX grant mapping') Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 880d0d63e872..7d50711476fe 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1566,13 +1566,13 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) smp_rmb(); while (dc != dp) { - BUG_ON(gop - queue->tx_unmap_ops > MAX_PENDING_REQS); + BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS); pending_idx = queue->dealloc_ring[pending_index(dc++)]; - pending_idx_release[gop-queue->tx_unmap_ops] = + pending_idx_release[gop - queue->tx_unmap_ops] = pending_idx; - queue->pages_to_unmap[gop-queue->tx_unmap_ops] = + queue->pages_to_unmap[gop - queue->tx_unmap_ops] = queue->mmap_pages[pending_idx]; gnttab_set_unmap_op(gop, idx_to_kaddr(queue, pending_idx), From 6ae3673debfbf2cfa1972b6d00b974d0e10ad3c6 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 11 Jul 2015 21:03:07 +0200 Subject: [PATCH 14/84] can: at91_can: don't touch skb after netif_receive_skb()/netif_rx() There is no guarantee that the skb is in the same state after calling net_receive_skb() or netif_rx(). It might be freed or reused. Not really harmful as its a read access, except you turn on the proper debugging options which catch a use after free. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/at91_can.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index f4e40aa4d2a2..945c0955a967 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -577,10 +577,10 @@ static void at91_rx_overflow_err(struct net_device *dev) cf->can_id |= CAN_ERR_CRTL; cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; - netif_receive_skb(skb); stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_receive_skb(skb); } /** @@ -642,10 +642,10 @@ static void at91_read_msg(struct net_device *dev, unsigned int mb) } at91_read_mb(dev, mb, cf); - netif_receive_skb(skb); stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_receive_skb(skb); can_led_event(dev, CAN_LED_EVENT_RX); } @@ -802,10 +802,10 @@ static int at91_poll_err(struct net_device *dev, int quota, u32 reg_sr) return 0; at91_poll_err_frame(dev, cf, reg_sr); - netif_receive_skb(skb); dev->stats.rx_packets++; dev->stats.rx_bytes += cf->can_dlc; + netif_receive_skb(skb); return 1; } @@ -1067,10 +1067,10 @@ static void at91_irq_err(struct net_device *dev) return; at91_irq_err_state(dev, cf, new_state); - netif_rx(skb); dev->stats.rx_packets++; dev->stats.rx_bytes += cf->can_dlc; + netif_rx(skb); priv->can.state = new_state; } From a18ec1b6c72eb87fe00e8e09c3a3f3cfbec3435b Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 8 May 2015 11:30:29 +0200 Subject: [PATCH 15/84] can: flexcan: don't touch skb after netif_receive_skb() There is no guarantee that the skb is in the same state after calling net_receive_skb() or netif_rx(). It might be freed or reused. Not really harmful as its a read access, except you turn on the proper debugging options which catch a use after free. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 6201c5a1a884..b1e8d729851c 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -577,10 +577,10 @@ static int flexcan_poll_bus_err(struct net_device *dev, u32 reg_esr) return 0; do_bus_err(dev, cf, reg_esr); - netif_receive_skb(skb); dev->stats.rx_packets++; dev->stats.rx_bytes += cf->can_dlc; + netif_receive_skb(skb); return 1; } @@ -622,10 +622,9 @@ static int flexcan_poll_state(struct net_device *dev, u32 reg_esr) if (unlikely(new_state == CAN_STATE_BUS_OFF)) can_bus_off(dev); - netif_receive_skb(skb); - dev->stats.rx_packets++; dev->stats.rx_bytes += cf->can_dlc; + netif_receive_skb(skb); return 1; } @@ -670,10 +669,10 @@ static int flexcan_read_frame(struct net_device *dev) } flexcan_read_fifo(dev, cf); - netif_receive_skb(skb); stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_receive_skb(skb); can_led_event(dev, CAN_LED_EVENT_RX); From 20926d79244de4e9c698ae429fdc1e7e5832b081 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 11 Jul 2015 21:16:08 +0200 Subject: [PATCH 16/84] can: bfin_can: don't touch skb after netif_rx() There is no guarantee that the skb is in the same state after calling net_receive_skb() or netif_rx(). It might be freed or reused. Not really harmful as its a read access, except you turn on the proper debugging options which catch a use after free. Cc: Aaron Wu Signed-off-by: Marc Kleine-Budde --- drivers/net/can/bfin_can.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c index 27ad312e7abf..57dadd52b428 100644 --- a/drivers/net/can/bfin_can.c +++ b/drivers/net/can/bfin_can.c @@ -424,10 +424,9 @@ static void bfin_can_rx(struct net_device *dev, u16 isrc) cf->data[6 - i] = (6 - i) < cf->can_dlc ? (val >> 8) : 0; } - netif_rx(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); } static int bfin_can_err(struct net_device *dev, u16 isrc, u16 status) @@ -508,10 +507,9 @@ static int bfin_can_err(struct net_device *dev, u16 isrc, u16 status) priv->can.state = state; - netif_rx(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); return 0; } From 83537b6fd6c3752bcb578d2bd46312d1e2c4a73a Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 11 Jul 2015 21:16:08 +0200 Subject: [PATCH 17/84] can: grcan: don't touch skb after netif_rx() There is no guarantee that the skb is in the same state after calling net_receive_skb() or netif_rx(). It might be freed or reused. Not really harmful as its a read access, except you turn on the proper debugging options which catch a use after free. Cc: Andreas Larsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/grcan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index e3d7e22a4fa0..db9538d4b358 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -1216,11 +1216,12 @@ static int grcan_receive(struct net_device *dev, int budget) cf->data[i] = (u8)(slot[j] >> shift); } } - netif_receive_skb(skb); /* Update statistics and read pointer */ stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_receive_skb(skb); + rd = grcan_ring_add(rd, GRCAN_MSG_SIZE, dma->rx.size); } From a2e78cf7a3f562edca4230b188c8832b6214eccd Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 11 Jul 2015 21:16:08 +0200 Subject: [PATCH 18/84] can: slcan: don't touch skb after netif_rx_ni() There is no guarantee that the skb is in the same state after calling net_receive_skb() or netif_rx(). It might be freed or reused. Not really harmful as its a read access, except you turn on the proper debugging options which catch a use after free. Cc: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/slcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index a23a7af8eb9a..9a3f15cb7ef4 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -218,10 +218,10 @@ static void slc_bump(struct slcan *sl) memcpy(skb_put(skb, sizeof(struct can_frame)), &cf, sizeof(struct can_frame)); - netif_rx_ni(skb); sl->dev->stats.rx_packets++; sl->dev->stats.rx_bytes += cf.can_dlc; + netif_rx_ni(skb); } /* parse tty input stream */ From 05c4456538e9551b8ac47762b21127bf9cf6cc8e Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 11 Jul 2015 21:16:08 +0200 Subject: [PATCH 19/84] can: ti_heccn: don't touch skb after netif_rx() There is no guarantee that the skb is in the same state after calling net_receive_skb() or netif_rx(). It might be freed or reused. Not really harmful as its a read access, except you turn on the proper debugging options which catch a use after free. Cc: Anant Gole Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index e95a9e1a889f..cf345cbfe819 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -747,9 +747,9 @@ static int ti_hecc_error(struct net_device *ndev, int int_status, } } - netif_rx(skb); stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); return 0; } From ef934e89f5e68535c447789b74de42cf389e55de Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 11 Jul 2015 21:16:08 +0200 Subject: [PATCH 20/84] can: cc770: don't touch skb after netif_rx() There is no guarantee that the skb is in the same state after calling net_receive_skb() or netif_rx(). It might be freed or reused. Not really harmful as its a read access, except you turn on the proper debugging options which catch a use after free. Cc: Wolfgang Grandegger Signed-off-by: Marc Kleine-Budde --- drivers/net/can/cc770/cc770.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index c11d44984036..70a8cbb29e75 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -504,10 +504,10 @@ static void cc770_rx(struct net_device *dev, unsigned int mo, u8 ctrl1) for (i = 0; i < cf->can_dlc; i++) cf->data[i] = cc770_read_reg(priv, msgobj[mo].data[i]); } - netif_rx(skb); stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); } static int cc770_err(struct net_device *dev, u8 status) @@ -584,10 +584,10 @@ static int cc770_err(struct net_device *dev, u8 status) } } - netif_rx(skb); stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); return 0; } From 889dd06e107d816fe63ff65e5dd3466949878439 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 11 Jul 2015 21:16:08 +0200 Subject: [PATCH 21/84] can: sja1000: don't touch skb after netif_rx() There is no guarantee that the skb is in the same state after calling net_receive_skb() or netif_rx(). It might be freed or reused. Not really harmful as its a read access, except you turn on the proper debugging options which catch a use after free. Cc: Wolfgang Grandegger Cc: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/sja1000.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 32bd7f451aa4..7b92e911a616 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -377,10 +377,9 @@ static void sja1000_rx(struct net_device *dev) /* release receive buffer */ sja1000_write_cmdreg(priv, CMD_RRB); - netif_rx(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); can_led_event(dev, CAN_LED_EVENT_RX); } @@ -484,10 +483,9 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) can_bus_off(dev); } - netif_rx(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); return 0; } From 296decb6931fca0bbc0bfb26865837d52e287599 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 11 Jul 2015 21:16:08 +0200 Subject: [PATCH 22/84] can: esd_usb2: don't touch skb after netif_rx() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no guarantee that the skb is in the same state after calling net_receive_skb() or netif_rx(). It might be freed or reused. Not really harmful as its a read access, except you turn on the proper debugging options which catch a use after free. Cc: Thomas Körper Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb2.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 411c1af92c62..0e5a4493ba4f 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -301,13 +301,12 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv, cf->data[7] = rxerr; } - netif_rx(skb); - priv->bec.txerr = txerr; priv->bec.rxerr = rxerr; stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); } } @@ -347,10 +346,9 @@ static void esd_usb2_rx_can_msg(struct esd_usb2_net_priv *priv, cf->data[i] = msg->msg.rx.data[i]; } - netif_rx(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); } return; From 43c021e8d6844c4b0f5cc9eda10d232e7b87a456 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 11 Jul 2015 21:16:08 +0200 Subject: [PATCH 23/84] can: ems_usb: don't touch skb after netif_rx() There is no guarantee that the skb is in the same state after calling net_receive_skb() or netif_rx(). It might be freed or reused. Not really harmful as its a read access, except you turn on the proper debugging options which catch a use after free. Cc: Gerhard Uttenthaler Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ems_usb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 866bac0ae7e9..2d390384ef3b 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -324,10 +324,9 @@ static void ems_usb_rx_can_msg(struct ems_usb *dev, struct ems_cpc_msg *msg) cf->data[i] = msg->msg.can_msg.msg[i]; } - netif_rx(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); } static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg) @@ -400,10 +399,9 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg) stats->rx_errors++; } - netif_rx(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); } /* From 9b721a4cefcbdedadbe72b5ec405046c139cf8ad Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 11 Jul 2015 21:16:08 +0200 Subject: [PATCH 24/84] can: usb_8dev: don't touch skb after netif_rx() There is no guarantee that the skb is in the same state after calling net_receive_skb() or netif_rx(). It might be freed or reused. Not really harmful as its a read access, except you turn on the proper debugging options which catch a use after free. Cc: Bernd Krumboeck Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/usb_8dev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index dd52c7a4c80d..de95b1ccba3e 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -461,10 +461,9 @@ static void usb_8dev_rx_err_msg(struct usb_8dev_priv *priv, priv->bec.txerr = txerr; priv->bec.rxerr = rxerr; - netif_rx(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); } /* Read data and status frames */ @@ -494,10 +493,9 @@ static void usb_8dev_rx_can_msg(struct usb_8dev_priv *priv, else memcpy(cf->data, msg->data, cf->can_dlc); - netif_rx(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); can_led_event(priv->netdev, CAN_LED_EVENT_RX); } else { From 1c0ee046957648106b415df79038e4e62b144c19 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 11 Jul 2015 21:16:08 +0200 Subject: [PATCH 25/84] can: pcan_usb: don't touch skb after netif_rx() There is no guarantee that the skb is in the same state after calling net_receive_skb() or netif_rx(). It might be freed or reused. Not really harmful as its a read access, except you turn on the proper debugging options which catch a use after free. Cc: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb.c | 7 +++---- drivers/net/can/usb/peak_usb/pcan_usb_pro.c | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 72427f21edff..6b94007ae052 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -526,9 +526,9 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, hwts->hwtstamp = timeval_to_ktime(tv); } - netif_rx(skb); mc->netdev->stats.rx_packets++; mc->netdev->stats.rx_bytes += cf->can_dlc; + netif_rx(skb); return 0; } @@ -659,12 +659,11 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) hwts = skb_hwtstamps(skb); hwts->hwtstamp = timeval_to_ktime(tv); - /* push the skb */ - netif_rx(skb); - /* update statistics */ mc->netdev->stats.rx_packets++; mc->netdev->stats.rx_bytes += cf->can_dlc; + /* push the skb */ + netif_rx(skb); return 0; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c index dec51717635e..7d61b3279798 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c @@ -553,9 +553,9 @@ static int pcan_usb_pro_handle_canmsg(struct pcan_usb_pro_interface *usb_if, hwts = skb_hwtstamps(skb); hwts->hwtstamp = timeval_to_ktime(tv); - netif_rx(skb); netdev->stats.rx_packets++; netdev->stats.rx_bytes += can_frame->can_dlc; + netif_rx(skb); return 0; } @@ -670,9 +670,9 @@ static int pcan_usb_pro_handle_error(struct pcan_usb_pro_interface *usb_if, peak_usb_get_ts_tv(&usb_if->time_ref, le32_to_cpu(er->ts32), &tv); hwts = skb_hwtstamps(skb); hwts->hwtstamp = timeval_to_ktime(tv); - netif_rx(skb); netdev->stats.rx_packets++; netdev->stats.rx_bytes += can_frame->can_dlc; + netif_rx(skb); return 0; } From 035d210f928ce083435b4fd351a26d126c02c927 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 13 Jul 2015 00:06:02 +0200 Subject: [PATCH 26/84] rtnetlink: reject non-IFLA_VF_PORT attributes inside IFLA_VF_PORTS Similarly as in commit 4f7d2cdfdde7 ("rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver"), we have a double nesting of netlink attributes, i.e. IFLA_VF_PORTS only contains IFLA_VF_PORT that is nested itself. While IFLA_VF_PORTS is a verified attribute from ifla_policy[], we only check if the IFLA_VF_PORTS container has IFLA_VF_PORT attributes and then pass the attribute's content itself via nla_parse_nested(). It would be more correct to reject inner types other than IFLA_VF_PORT instead of continuing parsing and also similarly as in commit 4f7d2cdfdde7, to check for a minimum of NLA_HDRLEN. Signed-off-by: Daniel Borkmann Cc: Roopa Prabhu Cc: Scott Feldman Cc: Jason Gunthorpe Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9e433d58d265..dc004b1e1f85 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1804,10 +1804,13 @@ static int do_setlink(const struct sk_buff *skb, goto errout; nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) { - if (nla_type(attr) != IFLA_VF_PORT) - continue; - err = nla_parse_nested(port, IFLA_PORT_MAX, - attr, ifla_port_policy); + if (nla_type(attr) != IFLA_VF_PORT || + nla_len(attr) < NLA_HDRLEN) { + err = -EINVAL; + goto errout; + } + err = nla_parse_nested(port, IFLA_PORT_MAX, attr, + ifla_port_policy); if (err < 0) goto errout; if (!port[IFLA_PORT_VF]) { From 738ac1ebb96d02e0d23bc320302a6ea94c612dec Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 13 Jul 2015 16:04:13 +0800 Subject: [PATCH 27/84] net: Clone skb before setting peeked flag Shared skbs must not be modified and this is crucial for broadcast and/or multicast paths where we use it as an optimisation to avoid unnecessary cloning. The function skb_recv_datagram breaks this rule by setting peeked without cloning the skb first. This causes funky races which leads to double-free. This patch fixes this by cloning the skb and replacing the skb in the list when setting skb->peeked. Fixes: a59322be07c9 ("[UDP]: Only increment counter on first peek/recv") Reported-by: Konstantin Khlebnikov Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/datagram.c | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index b80fb91bb3f7..4e9a3f690b7e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -131,6 +131,35 @@ out_noerr: goto out; } +static int skb_set_peeked(struct sk_buff *skb) +{ + struct sk_buff *nskb; + + if (skb->peeked) + return 0; + + /* We have to unshare an skb before modifying it. */ + if (!skb_shared(skb)) + goto done; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + skb->prev->next = nskb; + skb->next->prev = nskb; + nskb->prev = skb->prev; + nskb->next = skb->next; + + consume_skb(skb); + skb = nskb; + +done: + skb->peeked = 1; + + return 0; +} + /** * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket @@ -165,7 +194,9 @@ out_noerr: struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, int *peeked, int *off, int *err) { + struct sk_buff_head *queue = &sk->sk_receive_queue; struct sk_buff *skb, *last; + unsigned long cpu_flags; long timeo; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() @@ -184,8 +215,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, * Look at current nfs client by the way... * However, this function was correct in any case. 8) */ - unsigned long cpu_flags; - struct sk_buff_head *queue = &sk->sk_receive_queue; int _off = *off; last = (struct sk_buff *)queue; @@ -199,7 +228,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, _off -= skb->len; continue; } - skb->peeked = 1; + + error = skb_set_peeked(skb); + if (error) + goto unlock_err; + atomic_inc(&skb->users); } else __skb_unlink(skb, queue); @@ -223,6 +256,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, return NULL; +unlock_err: + spin_unlock_irqrestore(&queue->lock, cpu_flags); no_packet: *err = error; return NULL; From da278622bf04f8ddb14519a2b8214e108ef26101 Mon Sep 17 00:00:00 2001 From: Richard Stearn Date: Mon, 13 Jul 2015 11:38:24 +0200 Subject: [PATCH 28/84] NET: AX.25: Stop heartbeat timer on disconnect. This may result in a kernel panic. The bug has always existed but somehow we've run out of luck now and it bites. Signed-off-by: Richard Stearn Cc: stable@vger.kernel.org # all branches Signed-off-by: Ralf Baechle Signed-off-by: David S. Miller --- net/ax25/ax25_subr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 1997538a5d23..3b78e8473a01 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -264,6 +264,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); + ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); ax25_stop_t3timer(ax25); From c9805b9986ed88b7df1b14ea7d538d83f2149cf5 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 13 Jul 2015 08:13:52 -0300 Subject: [PATCH 29/84] Revert "net: fec: Ensure clocks are enabled while using mdio bus" This reverts commit 6c3e921b18edca290099adfddde8a50236bf2d80. commit 6c3e921b18ed ("net: fec: Ensure clocks are enabled while using mdio bus") prevents the kernel to boot on mx6 boards, so let's revert it. Reported-by: Tyler Baker Signed-off-by: Fabio Estevam Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 88 ++++------------------- 1 file changed, 13 insertions(+), 75 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 42e20e5385ac..1f89c59b4353 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -78,7 +77,6 @@ static void fec_enet_itr_coal_init(struct net_device *ndev); #define FEC_ENET_RAEM_V 0x8 #define FEC_ENET_RAFL_V 0x8 #define FEC_ENET_OPD_V 0xFFF0 -#define FEC_MDIO_PM_TIMEOUT 100 /* ms */ static struct platform_device_id fec_devtype[] = { { @@ -1769,13 +1767,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) { struct fec_enet_private *fep = bus->priv; - struct device *dev = &fep->pdev->dev; unsigned long time_left; - int ret = 0; - - ret = pm_runtime_get_sync(dev); - if (IS_ERR_VALUE(ret)) - return ret; fep->mii_timeout = 0; init_completion(&fep->mdio_done); @@ -1791,30 +1783,18 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) if (time_left == 0) { fep->mii_timeout = 1; netdev_err(fep->netdev, "MDIO read timeout\n"); - ret = -ETIMEDOUT; - goto out; + return -ETIMEDOUT; } - ret = FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); - -out: - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); - - return ret; + /* return value */ + return FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); } static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value) { struct fec_enet_private *fep = bus->priv; - struct device *dev = &fep->pdev->dev; unsigned long time_left; - int ret = 0; - - ret = pm_runtime_get_sync(dev); - if (IS_ERR_VALUE(ret)) - return ret; fep->mii_timeout = 0; init_completion(&fep->mdio_done); @@ -1831,13 +1811,10 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, if (time_left == 0) { fep->mii_timeout = 1; netdev_err(fep->netdev, "MDIO write timeout\n"); - ret = -ETIMEDOUT; + return -ETIMEDOUT; } - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); - - return ret; + return 0; } static int fec_enet_clk_enable(struct net_device *ndev, bool enable) @@ -1849,6 +1826,9 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) ret = clk_prepare_enable(fep->clk_ahb); if (ret) return ret; + ret = clk_prepare_enable(fep->clk_ipg); + if (ret) + goto failed_clk_ipg; if (fep->clk_enet_out) { ret = clk_prepare_enable(fep->clk_enet_out); if (ret) @@ -1872,6 +1852,7 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) } } else { clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); if (fep->clk_ptp) { @@ -1893,6 +1874,8 @@ failed_clk_ptp: if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); failed_clk_enet_out: + clk_disable_unprepare(fep->clk_ipg); +failed_clk_ipg: clk_disable_unprepare(fep->clk_ahb); return ret; @@ -2864,14 +2847,10 @@ fec_enet_open(struct net_device *ndev) struct fec_enet_private *fep = netdev_priv(ndev); int ret; - ret = pm_runtime_get_sync(&fep->pdev->dev); - if (IS_ERR_VALUE(ret)) - return ret; - pinctrl_pm_select_default_state(&fep->pdev->dev); ret = fec_enet_clk_enable(ndev, true); if (ret) - goto clk_enable; + return ret; /* I should reset the ring buffers here, but I don't yet know * a simple way to do that. @@ -2902,9 +2881,6 @@ err_enet_mii_probe: fec_enet_free_buffers(ndev); err_enet_alloc: fec_enet_clk_enable(ndev, false); -clk_enable: - pm_runtime_mark_last_busy(&fep->pdev->dev); - pm_runtime_put_autosuspend(&fep->pdev->dev); pinctrl_pm_select_sleep_state(&fep->pdev->dev); return ret; } @@ -2927,9 +2903,6 @@ fec_enet_close(struct net_device *ndev) fec_enet_clk_enable(ndev, false); pinctrl_pm_select_sleep_state(&fep->pdev->dev); - pm_runtime_mark_last_busy(&fep->pdev->dev); - pm_runtime_put_autosuspend(&fep->pdev->dev); - fec_enet_free_buffers(ndev); return 0; @@ -3415,10 +3388,6 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_clk; - ret = clk_prepare_enable(fep->clk_ipg); - if (ret) - goto failed_clk_ipg; - fep->reg_phy = devm_regulator_get(&pdev->dev, "phy"); if (!IS_ERR(fep->reg_phy)) { ret = regulator_enable(fep->reg_phy); @@ -3465,8 +3434,6 @@ fec_probe(struct platform_device *pdev) netif_carrier_off(ndev); fec_enet_clk_enable(ndev, false); pinctrl_pm_select_sleep_state(&pdev->dev); - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); ret = register_netdev(ndev); if (ret) @@ -3480,12 +3447,6 @@ fec_probe(struct platform_device *pdev) fep->rx_copybreak = COPYBREAK_DEFAULT; INIT_WORK(&fep->tx_timeout_work, fec_enet_timeout_work); - - pm_runtime_set_autosuspend_delay(&pdev->dev, FEC_MDIO_PM_TIMEOUT); - pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_mark_last_busy(&pdev->dev); - pm_runtime_put_autosuspend(&pdev->dev); - return 0; failed_register: @@ -3496,8 +3457,6 @@ failed_init: if (fep->reg_phy) regulator_disable(fep->reg_phy); failed_regulator: - clk_disable_unprepare(fep->clk_ipg); -failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: failed_phy: @@ -3609,28 +3568,7 @@ failed_clk: return ret; } -static int __maybe_unused fec_runtime_suspend(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct fec_enet_private *fep = netdev_priv(ndev); - - clk_disable_unprepare(fep->clk_ipg); - - return 0; -} - -static int __maybe_unused fec_runtime_resume(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct fec_enet_private *fep = netdev_priv(ndev); - - return clk_prepare_enable(fep->clk_ipg); -} - -static const struct dev_pm_ops fec_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(fec_suspend, fec_resume) - SET_RUNTIME_PM_OPS(fec_runtime_suspend, fec_runtime_resume, NULL) -}; +static SIMPLE_DEV_PM_OPS(fec_pm_ops, fec_suspend, fec_resume); static struct platform_driver fec_driver = { .driver = { From 89c22d8c3b278212eef6a8cc66b570bc840a6f5a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 13 Jul 2015 20:01:42 +0800 Subject: [PATCH 30/84] net: Fix skb csum races when peeking When we calculate the checksum on the recv path, we store the result in the skb as an optimisation in case we need the checksum again down the line. This is in fact bogus for the MSG_PEEK case as this is done without any locking. So multiple threads can peek and then store the result to the same skb, potentially resulting in bogus skb states. This patch fixes this by only storing the result if the skb is not shared. This preserves the optimisations for the few cases where it can be done safely due to locking or other reasons, e.g., SIOCINQ. Signed-off-by: Herbert Xu Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/datagram.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index 4e9a3f690b7e..4967262b2707 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -657,7 +657,8 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev); } - skb->csum_valid = !sum; + if (!skb_shared(skb)) + skb->csum_valid = !sum; return sum; } EXPORT_SYMBOL(__skb_checksum_complete_head); @@ -677,11 +678,13 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb) netdev_rx_csum_fault(skb->dev); } - /* Save full packet checksum */ - skb->csum = csum; - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum_complete_sw = 1; - skb->csum_valid = !sum; + if (!skb_shared(skb)) { + /* Save full packet checksum */ + skb->csum = csum; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + skb->csum_valid = !sum; + } return sum; } From bc8c20acaea154efc558f5f4122ed65d396f6156 Mon Sep 17 00:00:00 2001 From: Satish Ashok Date: Mon, 13 Jul 2015 05:28:37 -0700 Subject: [PATCH 31/84] bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave A report with INCLUDE/Change_to_include and empty source list should be treated as a leave, specified by RFC 3376, section 3.1: "If the requested filter mode is INCLUDE *and* the requested source list is empty, then the entry corresponding to the requested interface and multicast address is deleted if present. If no such entry is present, the request is ignored." Signed-off-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 742a6c27d7a2..79db489cdade 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -39,6 +39,16 @@ static void br_multicast_start_querier(struct net_bridge *br, struct bridge_mcast_own_query *query); static void br_multicast_add_router(struct net_bridge *br, struct net_bridge_port *port); +static void br_ip4_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + __be32 group, + __u16 vid); +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + const struct in6_addr *group, + __u16 vid); +#endif unsigned int br_mdb_rehash_seq; static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) @@ -1010,9 +1020,15 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, continue; } - err = br_ip4_multicast_add_group(br, port, group, vid); - if (err) - break; + if ((type == IGMPV3_CHANGE_TO_INCLUDE || + type == IGMPV3_MODE_IS_INCLUDE) && + ntohs(grec->grec_nsrcs) == 0) { + br_ip4_multicast_leave_group(br, port, group, vid); + } else { + err = br_ip4_multicast_add_group(br, port, group, vid); + if (err) + break; + } } return err; @@ -1071,10 +1087,17 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, continue; } - err = br_ip6_multicast_add_group(br, port, &grec->grec_mca, - vid); - if (err) - break; + if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE || + grec->grec_type == MLD2_MODE_IS_INCLUDE) && + ntohs(*nsrcs) == 0) { + br_ip6_multicast_leave_group(br, port, &grec->grec_mca, + vid); + } else { + err = br_ip6_multicast_add_group(br, port, + &grec->grec_mca, vid); + if (!err) + break; + } } return err; From 5ebc784625ea68a9570d1f70557e7932988cd1b4 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 13 Jul 2015 06:36:19 -0700 Subject: [PATCH 32/84] bridge: mdb: fix double add notification Since the mdb add/del code was introduced there have been 2 br_mdb_notify calls when doing br_mdb_add() resulting in 2 notifications on each add. Example: Command: bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent Before patch: root@debian:~# bridge monitor all [MDB]dev br0 port eth1 grp 239.0.0.1 permanent [MDB]dev br0 port eth1 grp 239.0.0.1 permanent After patch: root@debian:~# bridge monitor all [MDB]dev br0 port eth1 grp 239.0.0.1 permanent Signed-off-by: Nikolay Aleksandrov Fixes: cfd567543590 ("bridge: add support of adding and deleting mdb entries") Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index c11cf2611db0..1198a3dbad95 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -351,7 +351,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, if (state == MDB_TEMPORARY) mod_timer(&p->timer, now + br->multicast_membership_interval); - br_mdb_notify(br->dev, port, group, RTM_NEWMDB); return 0; } From f82b681a511f4d61069e9586a9cf97bdef371ef3 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 13 Jul 2015 12:10:20 -0700 Subject: [PATCH 33/84] tcp: don't use F-RTO on non-recurring timeouts Currently F-RTO may repeatedly send new data packets on non-recurring timeouts in CA_Loss mode. This is a bug because F-RTO (RFC5682) should only be used on either new recovery or recurring timeouts. This exacerbates the recovery progress during frequent timeout & repair, because we prioritize sending new data packets instead of repairing the holes when the bandwidth is already scarce. Fix it by correcting the test of a new recovery episode. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 684f095d196e..728f5b3d3c64 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1917,14 +1917,13 @@ void tcp_enter_loss(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - bool new_recovery = false; + bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ /* Reduce ssthresh if it has not yet been made inside this window. */ if (icsk->icsk_ca_state <= TCP_CA_Disorder || !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { - new_recovery = true; tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); From 052cbda41fdc243a8d40cce7ab3a6327b4b2887e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 13 Jul 2015 12:30:07 -0700 Subject: [PATCH 34/84] fq_codel: fix a use-after-free Fixes: 25331d6ce42b ("net: sched: implement qstat helper routines") Cc: John Fastabend Signed-off-by: Cong Wang Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index d75993f89fac..06e7c845e24d 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -155,10 +155,10 @@ static unsigned int fq_codel_drop(struct Qdisc *sch) skb = dequeue_head(flow); len = qdisc_pkt_len(skb); q->backlogs[idx] -= len; - kfree_skb(skb); sch->q.qlen--; qdisc_qstats_drop(sch); qdisc_qstats_backlog_dec(sch, skb); + kfree_skb(skb); flow->dropped++; return idx; } From fd98e9419d8d622a4de91f76b306af6aa627aa9c Mon Sep 17 00:00:00 2001 From: Tilman Schmidt Date: Tue, 14 Jul 2015 00:37:13 +0200 Subject: [PATCH 35/84] isdn/gigaset: reset tty->receive_room when attaching ser_gigaset Commit 79901317ce80 ("n_tty: Don't flush buffer when closing ldisc"), first merged in kernel release 3.10, caused the following regression in the Gigaset M101 driver: Before that commit, when closing the N_TTY line discipline in preparation to switching to N_GIGASET_M101, receive_room would be reset to a non-zero value by the call to n_tty_flush_buffer() in n_tty's close method. With the removal of that call, receive_room might be left at zero, blocking data reception on the serial line. The present patch fixes that regression by setting receive_room to an appropriate value in the ldisc open method. Fixes: 79901317ce80 ("n_tty: Don't flush buffer when closing ldisc") Signed-off-by: Tilman Schmidt Signed-off-by: David S. Miller --- drivers/isdn/gigaset/ser-gigaset.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 8c91fd5eb6fd..3ac9c4194814 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -524,9 +524,18 @@ gigaset_tty_open(struct tty_struct *tty) cs->hw.ser->tty = tty; atomic_set(&cs->hw.ser->refcnt, 1); init_completion(&cs->hw.ser->dead_cmp); - tty->disc_data = cs; + /* Set the amount of data we're willing to receive per call + * from the hardware driver to half of the input buffer size + * to leave some reserve. + * Note: We don't do flow control towards the hardware driver. + * If more data is received than will fit into the input buffer, + * it will be dropped and an error will be logged. This should + * never happen as the device is slow and the buffer size ample. + */ + tty->receive_room = RBUFSIZE/2; + /* OK.. Initialization of the datastructures and the HW is done.. Now * startup system and notify the LL that we are ready to run */ From 62c13bad39211387afd4354e8e127a535d13e70d Mon Sep 17 00:00:00 2001 From: Tilman Schmidt Date: Tue, 14 Jul 2015 00:37:13 +0200 Subject: [PATCH 36/84] isdn/gigaset: drop unused ldisc methods The line discipline read and write methods are optional so the dummy methods in ser_gigaset are unnecessary and can be removed. Signed-off-by: Tilman Schmidt Signed-off-by: David S. Miller --- drivers/isdn/gigaset/ser-gigaset.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 3ac9c4194814..375be509e95f 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -606,28 +606,6 @@ static int gigaset_tty_hangup(struct tty_struct *tty) return 0; } -/* - * Read on the tty. - * Unused, received data goes only to the Gigaset driver. - */ -static ssize_t -gigaset_tty_read(struct tty_struct *tty, struct file *file, - unsigned char __user *buf, size_t count) -{ - return -EAGAIN; -} - -/* - * Write on the tty. - * Unused, transmit data comes only from the Gigaset driver. - */ -static ssize_t -gigaset_tty_write(struct tty_struct *tty, struct file *file, - const unsigned char *buf, size_t count) -{ - return -EAGAIN; -} - /* * Ioctl on the tty. * Called in process context only. @@ -761,8 +739,6 @@ static struct tty_ldisc_ops gigaset_ldisc = { .open = gigaset_tty_open, .close = gigaset_tty_close, .hangup = gigaset_tty_hangup, - .read = gigaset_tty_read, - .write = gigaset_tty_write, .ioctl = gigaset_tty_ioctl, .receive_buf = gigaset_tty_receive, .write_wakeup = gigaset_tty_wakeup, From 03645a11a570d52e70631838cb786eb4253eb463 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Jul 2015 08:10:22 +0200 Subject: [PATCH 37/84] ipv6: lock socket in ip6_datagram_connect() ip6_datagram_connect() is doing a lot of socket changes without socket being locked. This looks wrong, at least for udp_lib_rehash() which could corrupt lists because of concurrent udp_sk(sk)->udp_portaddr_hash accesses. Signed-off-by: Eric Dumazet Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/ip.h | 1 + net/ipv4/datagram.c | 16 ++++++++++++---- net/ipv6/datagram.c | 20 +++++++++++++++----- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 0750a186ea63..d5fe9f2ab699 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -161,6 +161,7 @@ static inline __u8 get_rtconn_flags(struct ipcm_cookie* ipc, struct sock* sk) } /* datagram.c */ +int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); void ip4_datagram_release_cb(struct sock *sk); diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 90c0e8386116..574fad9cca05 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -20,7 +20,7 @@ #include #include -int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; @@ -39,8 +39,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_reset(sk); - lock_sock(sk); - oif = sk->sk_bound_dev_if; saddr = inet->inet_saddr; if (ipv4_is_multicast(usin->sin_addr.s_addr)) { @@ -82,9 +80,19 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_set(sk, &rt->dst); err = 0; out: - release_sock(sk); return err; } +EXPORT_SYMBOL(__ip4_datagram_connect); + +int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip4_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL(ip4_datagram_connect); /* Because UDP xmit path can manipulate sk_dst_cache without holding diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 62d908e64eeb..b10a88986a98 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); } -int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); @@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) return -EAFNOSUPPORT; - err = ip4_datagram_connect(sk, uaddr, addr_len); + err = __ip4_datagram_connect(sk, uaddr, addr_len); goto ipv4_connected; } @@ -98,9 +98,9 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sin.sin_addr.s_addr = daddr->s6_addr32[3]; sin.sin_port = usin->sin6_port; - err = ip4_datagram_connect(sk, - (struct sockaddr *) &sin, - sizeof(sin)); + err = __ip4_datagram_connect(sk, + (struct sockaddr *) &sin, + sizeof(sin)); ipv4_connected: if (err) @@ -204,6 +204,16 @@ out: fl6_sock_release(flowlabel); return err; } + +int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip6_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL_GPL(ip6_datagram_connect); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, From 515866f8185b92fb18a782408c53839f003c7669 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 14 Jul 2015 16:35:50 +0300 Subject: [PATCH 38/84] ipvlan: remove counters of ipv4 and ipv6 addresses They are unused after commit f631c44bbe15 ("ipvlan: Always set broadcast bit in multicast filter"). Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan.h | 2 -- drivers/net/ipvlan/ipvlan_main.c | 33 ++++++++++++-------------------- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 953a97492fab..68e2549c28c6 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -67,8 +67,6 @@ struct ipvl_dev { struct ipvl_port *port; struct net_device *phy_dev; struct list_head addrs; - int ipv4cnt; - int ipv6cnt; struct ipvl_pcpu_stats __percpu *pcpu_stats; DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE); netdev_features_t sfeatures; diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 1acc283160d9..048ecf0c76fb 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -153,10 +153,9 @@ static int ipvlan_open(struct net_device *dev) else dev->flags &= ~IFF_NOARP; - if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { - list_for_each_entry(addr, &ipvlan->addrs, anode) - ipvlan_ht_addr_add(ipvlan, addr); - } + list_for_each_entry(addr, &ipvlan->addrs, anode) + ipvlan_ht_addr_add(ipvlan, addr); + return dev_uc_add(phy_dev, phy_dev->dev_addr); } @@ -171,10 +170,9 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_del(phy_dev, phy_dev->dev_addr); - if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { - list_for_each_entry(addr, &ipvlan->addrs, anode) - ipvlan_ht_addr_del(addr, !dev->dismantle); - } + list_for_each_entry(addr, &ipvlan->addrs, anode) + ipvlan_ht_addr_del(addr, !dev->dismantle); + return 0; } @@ -471,8 +469,6 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan->port = port; ipvlan->sfeatures = IPVLAN_FEATURES; INIT_LIST_HEAD(&ipvlan->addrs); - ipvlan->ipv4cnt = 0; - ipvlan->ipv6cnt = 0; /* TODO Probably put random address here to be presented to the * world but keep using the physical-dev address for the outgoing @@ -508,12 +504,11 @@ static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_addr *addr, *next; - if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { - list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { - ipvlan_ht_addr_del(addr, !dev->dismantle); - list_del(&addr->anode); - } + list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { + ipvlan_ht_addr_del(addr, !dev->dismantle); + list_del(&addr->anode); } + list_del_rcu(&ipvlan->pnode); unregister_netdevice_queue(dev, head); netdev_upper_dev_unlink(ipvlan->phy_dev, dev); @@ -627,7 +622,7 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) memcpy(&addr->ip6addr, ip6_addr, sizeof(struct in6_addr)); addr->atype = IPVL_IPV6; list_add_tail(&addr->anode, &ipvlan->addrs); - ipvlan->ipv6cnt++; + /* If the interface is not up, the address will be added to the hash * list by ipvlan_open. */ @@ -647,8 +642,6 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) ipvlan_ht_addr_del(addr, true); list_del(&addr->anode); - ipvlan->ipv6cnt--; - WARN_ON(ipvlan->ipv6cnt < 0); kfree_rcu(addr, rcu); return; @@ -699,7 +692,7 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) memcpy(&addr->ip4addr, ip4_addr, sizeof(struct in_addr)); addr->atype = IPVL_IPV4; list_add_tail(&addr->anode, &ipvlan->addrs); - ipvlan->ipv4cnt++; + /* If the interface is not up, the address will be added to the hash * list by ipvlan_open. */ @@ -719,8 +712,6 @@ static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) ipvlan_ht_addr_del(addr, true); list_del(&addr->anode); - ipvlan->ipv4cnt--; - WARN_ON(ipvlan->ipv4cnt < 0); kfree_rcu(addr, rcu); return; From 6a725497318545aae246232ed05a8df9cffb0a02 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 14 Jul 2015 16:35:51 +0300 Subject: [PATCH 39/84] ipvlan: plug memory leak in ipvlan_link_delete Add missing kfree_rcu(addr, rcu); Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 048ecf0c76fb..7d81e37c3f76 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -507,6 +507,7 @@ static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { ipvlan_ht_addr_del(addr, !dev->dismantle); list_del(&addr->anode); + kfree_rcu(addr, rcu); } list_del_rcu(&ipvlan->pnode); From 6640e673c6f3dbaace085ca2686a8a343dc23a71 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 14 Jul 2015 16:35:53 +0300 Subject: [PATCH 40/84] ipvlan: unhash addresses without synchronize_rcu All structures used in traffic forwarding are rcu-protected: ipvl_addr, ipvl_dev and ipvl_port. Thus we can unhash addresses without synchronization. We'll anyway hash it back into the same bucket: in worst case lockless lookup will scan hash once again. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan.h | 2 +- drivers/net/ipvlan/ipvlan_core.c | 4 +--- drivers/net/ipvlan/ipvlan_main.c | 8 ++++---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 68e2549c28c6..40f9d7e4a0ea 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -122,5 +122,5 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, const void *iaddr, bool is_v6); -void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync); +void ipvlan_ht_addr_del(struct ipvl_addr *addr); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 8afbedad620d..8f8628a0adba 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -85,11 +85,9 @@ void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); } -void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync) +void ipvlan_ht_addr_del(struct ipvl_addr *addr) { hlist_del_init_rcu(&addr->hlnode); - if (sync) - synchronize_rcu(); } struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 7d81e37c3f76..e995bc501ee6 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -171,7 +171,7 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_del(phy_dev, phy_dev->dev_addr); list_for_each_entry(addr, &ipvlan->addrs, anode) - ipvlan_ht_addr_del(addr, !dev->dismantle); + ipvlan_ht_addr_del(addr); return 0; } @@ -505,7 +505,7 @@ static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) struct ipvl_addr *addr, *next; list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { - ipvlan_ht_addr_del(addr, !dev->dismantle); + ipvlan_ht_addr_del(addr); list_del(&addr->anode); kfree_rcu(addr, rcu); } @@ -641,7 +641,7 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) if (!addr) return; - ipvlan_ht_addr_del(addr, true); + ipvlan_ht_addr_del(addr); list_del(&addr->anode); kfree_rcu(addr, rcu); @@ -711,7 +711,7 @@ static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) if (!addr) return; - ipvlan_ht_addr_del(addr, true); + ipvlan_ht_addr_del(addr); list_del(&addr->anode); kfree_rcu(addr, rcu); From 0fba37a3af03a7e74bf9e75473729adb98da49c3 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 14 Jul 2015 16:35:54 +0300 Subject: [PATCH 41/84] ipvlan: use rcu_deference_bh() in ipvlan_queue_xmit() In tx path rcu_read_lock_bh() is held, so we need rcu_deference_bh(). This fixes the following warning: =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc1+ #1007 Not tainted ------------------------------- drivers/net/ipvlan/ipvlan.h:106 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 1 lock held by dhclient/1076: #0: (rcu_read_lock_bh){......}, at: [] rcu_lock_acquire+0x0/0x26 stack backtrace: CPU: 2 PID: 1076 Comm: dhclient Not tainted 4.1.0-rc1+ #1007 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 0000000000000001 ffff8800d381bac8 ffffffff81a4154f 000000003c1a3c19 ffff8800d4d0a690 ffff8800d381baf8 ffffffff810b849f ffff880117d41148 ffff880117d40000 ffff880117d40068 0000000000000156 ffff8800d381bb18 Call Trace: [] dump_stack+0x4c/0x65 [] lockdep_rcu_suspicious+0x107/0x110 [] ipvlan_port_get_rcu+0x47/0x4e [] ipvlan_queue_xmit+0x35/0x450 [] ? rcu_read_unlock+0x3e/0x5f [] ? local_clock+0x19/0x22 [] ? __lock_is_held+0x39/0x52 [] ipvlan_start_xmit+0x1b/0x44 [] dev_hard_start_xmit+0x2ae/0x467 [] __dev_queue_xmit+0x50a/0x60c [] dev_queue_xmit_sk+0x13/0x15 [] dev_queue_xmit+0x10/0x12 [] packet_sendmsg+0xb6b/0xbdf [] ? mark_lock+0x2e/0x226 [] ? sched_clock_cpu+0x9e/0xb7 [] sock_sendmsg_nosec+0x12/0x1d [] sock_sendmsg+0x29/0x2e [] sock_write_iter+0x70/0x91 [] __vfs_write+0x7e/0xa7 [] vfs_write+0x92/0xe8 [] SyS_write+0x47/0x7e [] system_call_fastpath+0x12/0x6f Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") Cc: Mahesh Bandewar Signed-off-by: Cong Wang Acked-by: Mahesh Bandewar Acked-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan.h | 5 +++++ drivers/net/ipvlan/ipvlan_core.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 40f9d7e4a0ea..9542b7bac61a 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -104,6 +104,11 @@ static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) return rcu_dereference(d->rx_handler_data); } +static inline struct ipvl_port *ipvlan_port_get_rcu_bh(const struct net_device *d) +{ + return rcu_dereference_bh(d->rx_handler_data); +} + static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d) { return rtnl_dereference(d->rx_handler_data); diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 8f8628a0adba..207f62e8de9a 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -529,7 +529,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); - struct ipvl_port *port = ipvlan_port_get_rcu(ipvlan->phy_dev); + struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev); if (!port) goto out; From 23a5a49c83dd8a7201a42e96d24238bde3547c11 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 14 Jul 2015 16:35:55 +0300 Subject: [PATCH 42/84] ipvlan: ignore addresses from ipv6 autoconfiguration Inet6addr notifier is atomic and runs in bh context without RTNL when ipv6 receives router advertisement packet and performs autoconfiguration. Proper fix still in discussion. Let's at least plug the bug. v1: http://lkml.kernel.org/r/20150514135618.14062.1969.stgit@buzz v2: http://lkml.kernel.org/r/20150703125840.24121.91556.stgit@buzz Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index e995bc501ee6..20b58bdecf75 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -655,6 +655,10 @@ static int ipvlan_addr6_event(struct notifier_block *unused, struct net_device *dev = (struct net_device *)if6->idev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); + /* FIXME IPv6 autoconf calls us from bh without RTNL */ + if (in_softirq()) + return NOTIFY_DONE; + if (!netif_is_ipvlan(dev)) return NOTIFY_DONE; From e8d092aafd9e68c04d7b468e95ff7a617998a796 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 14 Jul 2015 11:21:57 -0700 Subject: [PATCH 43/84] net_sched: fix a use-after-free in sfq Fixes: 25331d6ce42b ("net: sched: implement qstat helper routines") Cc: John Fastabend Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 7d1492663360..52f75a5473e1 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -306,10 +306,10 @@ drop: len = qdisc_pkt_len(skb); slot->backlog -= len; sfq_dec(q, x); - kfree_skb(skb); sch->q.qlen--; qdisc_qstats_drop(sch); qdisc_qstats_backlog_dec(sch, skb); + kfree_skb(skb); return len; } From c0afd9ce4d6a646fb6433536f95a418bb348fab1 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 14 Jul 2015 11:21:58 -0700 Subject: [PATCH 44/84] fq_codel: fix return value of fq_codel_drop() The ->drop() is supposed to return the number of bytes it dropped, however fq_codel_drop() returns the index of the flow where it drops a packet from. Fix this by introducing a helper to wrap fq_codel_drop(). Cc: Eric Dumazet Signed-off-by: Cong Wang Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 06e7c845e24d..21ca33c9f036 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -163,6 +163,15 @@ static unsigned int fq_codel_drop(struct Qdisc *sch) return idx; } +static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch) +{ + unsigned int prev_backlog; + + prev_backlog = sch->qstats.backlog; + fq_codel_drop(sch); + return prev_backlog - sch->qstats.backlog; +} + static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); @@ -604,7 +613,7 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { .enqueue = fq_codel_enqueue, .dequeue = fq_codel_dequeue, .peek = qdisc_peek_dequeued, - .drop = fq_codel_drop, + .drop = fq_codel_qdisc_drop, .init = fq_codel_init, .reset = fq_codel_reset, .destroy = fq_codel_destroy, From ddf06c1e569a64a44c4c750ae45b2604f19e45f0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 14 Jul 2015 12:15:19 -0700 Subject: [PATCH 45/84] tc: act_bpf: fix memory leak prog->bpf_ops is populated when act_bpf is used with classic BPF and prog->bpf_name is optionally used with extended BPF. Fix memory leak when act_bpf is released. Fixes: d23b8ad8ab23 ("tc: add BPF based action") Fixes: a8cb5f556b56 ("act_bpf: add initial eBPF support for actions") Acked-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/sched/act_bpf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1d56903fd4c7..1df78289e248 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -339,6 +339,9 @@ static void tcf_bpf_cleanup(struct tc_action *act, int bind) bpf_prog_put(prog->filter); else bpf_prog_destroy(prog->filter); + + kfree(prog->bpf_ops); + kfree(prog->bpf_name); } static struct tc_action_ops act_bpf_ops __read_mostly = { From 25b401c1816ae64bcc5dcb1d39ab41812522a0ce Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 18 May 2015 18:33:27 +0200 Subject: [PATCH 46/84] can: mcp251x: fix resume when device is down If a valid power regulator or a dummy regulator is used (which happens to be the case when no regulator is specified), restart_work is queued no matter whether the device was running or not at suspend time. Since work queues get initialized in the ndo_open callback, resuming leads to a NULL pointer exception. Reverse exactly the steps executed at suspend time: - Enable the power regulator in any case - Enable the transceiver regulator if the device was running, even in case we have a power regulator - Queue restart_work only in case the device was running Fixes: bf66f3736a94 ("can: mcp251x: Move to threaded interrupts instead of workqueues.") Signed-off-by: Stefan Agner Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251x.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index c1a95a34d62e..3b2f34e6ba9b 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1222,17 +1222,16 @@ static int __maybe_unused mcp251x_can_resume(struct device *dev) struct spi_device *spi = to_spi_device(dev); struct mcp251x_priv *priv = spi_get_drvdata(spi); - if (priv->after_suspend & AFTER_SUSPEND_POWER) { + if (priv->after_suspend & AFTER_SUSPEND_POWER) mcp251x_power_enable(priv->power, 1); + + if (priv->after_suspend & AFTER_SUSPEND_UP) { + mcp251x_power_enable(priv->transceiver, 1); queue_work(priv->wq, &priv->restart_work); } else { - if (priv->after_suspend & AFTER_SUSPEND_UP) { - mcp251x_power_enable(priv->transceiver, 1); - queue_work(priv->wq, &priv->restart_work); - } else { - priv->after_suspend = 0; - } + priv->after_suspend = 0; } + priv->force_quit = 0; enable_irq(spi->irq); return 0; From 69da3f2ac528642acbd06ed14564ac1b9a918394 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 18 May 2015 18:33:28 +0200 Subject: [PATCH 47/84] can: mcp251x: get regulators optionally The regulators power and transceiver are optional. If those are not present, the pointer (or error pointer) is correctly handled by the driver, hence we can use devm_regulator_get_optional safely, which avoids regulators getting created. Signed-off-by: Stefan Agner Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 3b2f34e6ba9b..b7e83c212023 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1086,8 +1086,8 @@ static int mcp251x_can_probe(struct spi_device *spi) if (ret) goto out_clk; - priv->power = devm_regulator_get(&spi->dev, "vdd"); - priv->transceiver = devm_regulator_get(&spi->dev, "xceiver"); + priv->power = devm_regulator_get_optional(&spi->dev, "vdd"); + priv->transceiver = devm_regulator_get_optional(&spi->dev, "xceiver"); if ((PTR_ERR(priv->power) == -EPROBE_DEFER) || (PTR_ERR(priv->transceiver) == -EPROBE_DEFER)) { ret = -EPROBE_DEFER; From e2370f07cf703c0920c99bc24de3d152262738f0 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 15 Jul 2015 00:56:52 +0300 Subject: [PATCH 48/84] ravb: do not invalidate cache for RX buffer twice First, dma_sync_single_for_cpu() shouldn't have been called in the first place (it's a streaming DMA API), dma_unmap_single() should have been called instead. Second, dma_unmap_single() call after handing the buffer to napi_gro_receive() makes little sense. Moreover desc->dptr might not be valid at this point. Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index fd9745714d90..0e5fde321630 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -543,10 +543,9 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) skb = priv->rx_skb[q][entry]; priv->rx_skb[q][entry] = NULL; - dma_sync_single_for_cpu(&ndev->dev, - le32_to_cpu(desc->dptr), - ALIGN(PKT_BUF_SZ, 16), - DMA_FROM_DEVICE); + dma_unmap_single(&ndev->dev, le32_to_cpu(desc->dptr), + ALIGN(PKT_BUF_SZ, 16), + DMA_FROM_DEVICE); get_ts &= (q == RAVB_NC) ? RAVB_RXTSTAMP_TYPE_V2_L2_EVENT : ~RAVB_RXTSTAMP_TYPE_V2_L2_EVENT; @@ -584,9 +583,6 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) if (!skb) break; /* Better luck next round. */ ravb_set_buffer_align(skb); - dma_unmap_single(&ndev->dev, le32_to_cpu(desc->dptr), - ALIGN(PKT_BUF_SZ, 16), - DMA_FROM_DEVICE); dma_addr = dma_map_single(&ndev->dev, skb->data, le16_to_cpu(desc->ds_cc), DMA_FROM_DEVICE); From 4479004e6409087d1b4986881dc98c6c15dffb28 Mon Sep 17 00:00:00 2001 From: Tom Hughes Date: Mon, 29 Jun 2015 19:41:49 +0100 Subject: [PATCH 49/84] mac80211: clear subdir_stations when removing debugfs If we don't do this, and we then fail to recreate the debugfs directory during a mode change, then we will fail later trying to add stations to this now bogus directory: BUG: unable to handle kernel NULL pointer dereference at 0000006c IP: [] mutex_lock+0x12/0x30 Call Trace: [] start_creating+0x44/0xc0 [] debugfs_create_dir+0x13/0xf0 [] ieee80211_sta_debugfs_add+0x6e/0x490 [mac80211] Cc: stable@kernel.org Signed-off-by: Tom Hughes Signed-off-by: Johannes Berg --- net/mac80211/debugfs_netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 29236e832e44..c09c0131bfa2 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -723,6 +723,7 @@ void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) debugfs_remove_recursive(sdata->vif.debugfs_dir); sdata->vif.debugfs_dir = NULL; + sdata->debugfs.subdir_stations = NULL; } void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) From e9de01907e3d1957591113daa3857c0bf01067ef Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Thu, 2 Jul 2015 09:59:56 +0200 Subject: [PATCH 50/84] mac80211: don't clear all tx flags when requeing When acting as AP and a PS-Poll frame is received associated station is marked as one in a Service Period. This state is kept until Tx status for released frame is reported. While a station is in Service Period PS-Poll frames are ignored. However if PS-Poll was received during A-MPDU teardown it was possible to have the to-be released frame re-queued back to pending queue. In such case the frame was stripped of 2 important flags: (a) IEEE80211_TX_CTL_NO_PS_BUFFER (b) IEEE80211_TX_STATUS_EOSP Stripping of (a) led to the frame that was to be released to be queued back to ps_tx_buf queue. If station remained to use only PS-Poll frames the re-queued frame (and new ones) was never actually transmitted because mac80211 would ignore subsequent PS-Poll frames due to station being in Service Period. There was nothing left to clear the Service Period bit (no xmit -> no tx status -> no SP end), i.e. the AP would have the station stuck in Service Period. Beacon TIM would repeatedly prompt station to poll for frames but it would get none. Once (a) is not stripped (b) becomes important because it's the main condition to clear the Service Period bit of the station when Tx status for the released frame is reported back. This problem was observed with ath9k acting as P2P GO in some testing scenarios but isn't limited to it. AP operation with mac80211 based Tx A-MPDU control combined with clients using PS-Poll frames is subject to this race. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8410bb3bf5e8..b8233505bf9f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1117,7 +1117,9 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, queued = true; info->control.vif = &tx->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; - info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; + info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS | + IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_STATUS_EOSP; __skb_queue_tail(&tid_tx->pending, skb); if (skb_queue_len(&tid_tx->pending) > STA_MAX_TX_BUFFER) purge_skb = __skb_dequeue(&tid_tx->pending); From 541b6ed7cee1ec7c8f525f51a0ff097776111aeb Mon Sep 17 00:00:00 2001 From: Chaitanya T K Date: Wed, 10 Jun 2015 19:12:31 +0530 Subject: [PATCH 51/84] mac80211: wowlan: enable powersave if suspend while ps-polling If for any reason we're in the middle of PS-polling or awake after TX due to dynamic powersave while going to suspend, go back to save power. This might cause a response frame to get lost, but since we can't really wait for it while going to suspend that's still better than not enabling powersave which would cause higher power usage during (and possibly even after) suspend. Note that this really only affects the very few drivers that use the powersave implementation in mac80211. Signed-off-by: Chaitanya T K [rewrite misleading commit log] Signed-off-by: Johannes Berg --- net/mac80211/pm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 06b60980c62c..b676b9fa707b 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -76,6 +76,22 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) if (sdata->vif.type != NL80211_IFTYPE_STATION) continue; ieee80211_mgd_quiesce(sdata); + /* If suspended during TX in progress, and wowlan + * is enabled (connection will be active) there + * can be a race where the driver is put out + * of power-save due to TX and during suspend + * dynamic_ps_timer is cancelled and TX packet + * is flushed, leaving the driver in ACTIVE even + * after resuming until dynamic_ps_timer puts + * driver back in DOZE. + */ + if (sdata->u.mgd.associated && + sdata->u.mgd.powersave && + !(local->hw.conf.flags & IEEE80211_CONF_PS)) { + local->hw.conf.flags |= IEEE80211_CONF_PS; + ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_PS); + } } err = drv_suspend(local, wowlan); From d8d9008cfb919db48d95f96b05e81f84b3774318 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 Jul 2015 15:41:50 +0300 Subject: [PATCH 52/84] mac80211: shut down interfaces before destroying interface list If the hardware is unregistered while interfaces are up, mac80211 will unregister all interfaces, which in turns causes mac80211 to be called again to remove them all from the driver and eventually shut down the hardware. During this shutdown, however, it's currently already unsafe to iterate the list of interfaces atomically, as the list is manipulated in an unsafe manner. This puts an undue burden on the driver - it must stop all its activities before calling ieee80211_unregister_hw(), while in the normal stop path it can do all cleanup in the stop method. If, for example, it's using the iteration during RX for some reason, it would have to stop RX before unregistering to avoid crashes. Fix this problem by closing all interfaces before unregistering them. This will cause the driver stop to have completed before we manipulate the interface list, and after the driver is stopped *and* has called ieee80211_unregister_hw() it really musn't be iterating any more as the memory will be freed as well. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ed1edac14372..553ac6dd4867 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1863,10 +1863,6 @@ void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata) ieee80211_teardown_sdata(sdata); } -/* - * Remove all interfaces, may only be called at hardware unregistration - * time because it doesn't do RCU-safe list removals. - */ void ieee80211_remove_interfaces(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata, *tmp; @@ -1875,14 +1871,21 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) ASSERT_RTNL(); - /* - * Close all AP_VLAN interfaces first, as otherwise they - * might be closed while the AP interface they belong to - * is closed, causing unregister_netdevice_many() to crash. + /* Before destroying the interfaces, make sure they're all stopped so + * that the hardware is stopped. Otherwise, the driver might still be + * iterating the interfaces during the shutdown, e.g. from a worker + * or from RX processing or similar, and if it does so (using atomic + * iteration) while we're manipulating the list, the iteration will + * crash. + * + * After this, the hardware should be stopped and the driver should + * have stopped all of its activities, so that we can do RCU-unaware + * manipulations of the interface list below. */ - list_for_each_entry(sdata, &local->interfaces, list) - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - dev_close(sdata->dev); + cfg80211_shutdown_all_interfaces(local->hw.wiphy); + + WARN(local->open_count, "%s: open count remains %d\n", + wiphy_name(local->hw.wiphy), local->open_count); mutex_lock(&local->iflist_mtx); list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { From 042ab5fc7a80b934032fcc673a125feb36645b33 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 9 Jul 2015 15:35:15 +0200 Subject: [PATCH 53/84] wireless: regulatory: reduce log level of CRDA related messages With a basic Linux userspace, the messages "Calling CRDA to update world regulatory domain" appears 10 times after boot every second or so, followed by a final "Exceeded CRDA call max attempts. Not calling CRDA". For those of us not having the corresponding userspace parts, having those messages repeatedly displayed at boot time is a bit annoying, so this commit reduces their log level to pr_debug(). Signed-off-by: Thomas Petazzoni Signed-off-by: Johannes Berg --- net/wireless/reg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index d359e0610198..29134c81e73c 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -544,15 +544,15 @@ static int call_crda(const char *alpha2) reg_regdb_query(alpha2); if (reg_crda_timeouts > REG_MAX_CRDA_TIMEOUTS) { - pr_info("Exceeded CRDA call max attempts. Not calling CRDA\n"); + pr_debug("Exceeded CRDA call max attempts. Not calling CRDA\n"); return -EINVAL; } if (!is_world_regdom((char *) alpha2)) - pr_info("Calling CRDA for country: %c%c\n", + pr_debug("Calling CRDA for country: %c%c\n", alpha2[0], alpha2[1]); else - pr_info("Calling CRDA to update world regulatory domain\n"); + pr_debug("Calling CRDA to update world regulatory domain\n"); return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, env); } From 2ea752cd2ce066c5d8c1807b5310ef329885cecb Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Tue, 14 Jul 2015 08:31:55 -0400 Subject: [PATCH 54/84] mac80211: correct aid location in peering frames According to 802.11-2012 8.5.16.3.2 AID comes directly after the capability bytes in mesh peering confirm frames. The existing code, however, was adding a 2 byte offset to this location, resulting in garbage data going out over the air. Remove the offset to fix it. Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/mesh_plink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 5438d13e2f00..f17127e754c9 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -306,7 +306,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, if (action == WLAN_SP_MESH_PEERING_CONFIRM) { /* AID */ pos = skb_put(skb, 2); - put_unaligned_le16(plid, pos + 2); + put_unaligned_le16(plid, pos); } if (ieee80211_add_srates_ie(sdata, skb, true, band) || ieee80211_add_ext_srates_ie(sdata, skb, true, band) || From b3e7de873df77c1fa0bc2cfaf3eaff757b80e773 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Tue, 14 Jul 2015 08:31:56 -0400 Subject: [PATCH 55/84] mac80211: add missing length check for confirm frames Although mesh_rx_plink_frame() already checks that frames have enough bytes for the action code plus another two bytes for capability/reason code, it doesn't take into account that confirm frames also have an additional two-byte aid. As a result, a corrupt frame could cause a subsequent subtraction to wrap around to ill effect. Add another check for this case. Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/mesh_plink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index f17127e754c9..3b59099413fb 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1122,6 +1122,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, WLAN_SP_MESH_PEERING_CONFIRM) { baseaddr += 4; baselen += 4; + + if (baselen > len) + return; } ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems); mesh_process_plink_frame(sdata, mgmt, &elems); From 923b352f19d9ea971ae2536eab55f5fc9e95fedf Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Wed, 8 Jul 2015 15:41:44 +0300 Subject: [PATCH 56/84] cfg80211: use RTNL locked reg_can_beacon for IR-relaxation The RTNL is required to check for IR-relaxation conditions that allow more channels to beacon. Export an RTNL locked version of reg_can_beacon and use it where possible in AP/STA interface type flows, where IR-relaxation may be applicable. Fixes: 06f207fc5418 ("cfg80211: change GO_CONCURRENT to IR_CONCURRENT for STA") Signed-off-by: Arik Nemtsov Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 17 ++++++++++++++++ net/mac80211/tdls.c | 6 +++--- net/wireless/chan.c | 45 +++++++++++++++++++++++++++++++----------- net/wireless/nl80211.c | 14 +++++++------ net/wireless/reg.c | 2 +- net/wireless/trace.h | 11 +++++++---- 6 files changed, 70 insertions(+), 25 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a741678f24a2..883fe1e7c5a1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4868,6 +4868,23 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype); +/** + * cfg80211_reg_can_beacon_relax - check if beaconing is allowed with relaxation + * @wiphy: the wiphy + * @chandef: the channel definition + * @iftype: interface type + * + * Return: %true if there is no secondary channel or the secondary channel(s) + * can be used for beaconing (i.e. is not a radar channel etc.). This version + * also checks if IR-relaxation conditions apply, to allow beaconing under + * more permissive conditions. + * + * Requires the RTNL to be held. + */ +bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype); + /* * cfg80211_ch_switch_notify - update wdev channel and notify userspace * @dev: the device which switched channels diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index ad31b2dab4f5..8db6e2994bbc 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -60,6 +60,7 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *ch; struct cfg80211_chan_def chandef; int i, subband_start; + struct wiphy *wiphy = sdata->local->hw.wiphy; for (i = start; i <= end; i += spacing) { if (!ch_cnt) @@ -70,9 +71,8 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata, /* we will be active on the channel */ cfg80211_chandef_create(&chandef, ch, NL80211_CHAN_NO_HT); - if (cfg80211_reg_can_beacon(sdata->local->hw.wiphy, - &chandef, - sdata->wdev.iftype)) { + if (cfg80211_reg_can_beacon_relax(wiphy, &chandef, + sdata->wdev.iftype)) { ch_cnt++; /* * check if the next channel is also part of diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 915b328b9ac5..59cabc9bce69 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -797,23 +797,18 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, return false; } -bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype) +static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype, + bool check_no_ir) { bool res; u32 prohibited_flags = IEEE80211_CHAN_DISABLED | IEEE80211_CHAN_RADAR; - trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype); + trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir); - /* - * Under certain conditions suggested by some regulatory bodies a - * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag - * only if such relaxations are not enabled and the conditions are not - * met. - */ - if (!cfg80211_ir_permissive_chan(wiphy, iftype, chandef->chan)) + if (check_no_ir) prohibited_flags |= IEEE80211_CHAN_NO_IR; if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 && @@ -827,8 +822,36 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, trace_cfg80211_return_bool(res); return res; } + +bool cfg80211_reg_can_beacon(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) +{ + return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, true); +} EXPORT_SYMBOL(cfg80211_reg_can_beacon); +bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) +{ + bool check_no_ir; + + ASSERT_RTNL(); + + /* + * Under certain conditions suggested by some regulatory bodies a + * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag + * only if such relaxations are not enabled and the conditions are not + * met. + */ + check_no_ir = !cfg80211_ir_permissive_chan(wiphy, iftype, + chandef->chan); + + return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir); +} +EXPORT_SYMBOL(cfg80211_reg_can_beacon_relax); + int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c264effd00a6..76b41578a838 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2003,7 +2003,8 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) { + if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef, + iftype)) { result = -EINVAL; break; } @@ -3403,8 +3404,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } else if (!nl80211_get_ap_channel(rdev, ¶ms)) return -EINVAL; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef, - wdev->iftype)) + if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) return -EINVAL; if (info->attrs[NL80211_ATTR_ACL_POLICY]) { @@ -6492,8 +6493,8 @@ skip_beacons: if (err) return err; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef, - wdev->iftype)) + if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) return -EINVAL; err = cfg80211_chandef_dfs_required(wdev->wiphy, @@ -10170,7 +10171,8 @@ static int nl80211_tdls_channel_switch(struct sk_buff *skb, return -EINVAL; /* we will be active on the TDLS link */ - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, wdev->iftype)) + if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef, + wdev->iftype)) return -EINVAL; /* don't allow switching to DFS channels */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 29134c81e73c..aa2d75482017 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1589,7 +1589,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_ADHOC: - return cfg80211_reg_can_beacon(wiphy, &chandef, iftype); + return cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: return cfg80211_chandef_usable(wiphy, &chandef, diff --git a/net/wireless/trace.h b/net/wireless/trace.h index af3617c9879e..a808279a432a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2358,20 +2358,23 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify, TRACE_EVENT(cfg80211_reg_can_beacon, TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype), - TP_ARGS(wiphy, chandef, iftype), + enum nl80211_iftype iftype, bool check_no_ir), + TP_ARGS(wiphy, chandef, iftype, check_no_ir), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY __field(enum nl80211_iftype, iftype) + __field(bool, check_no_ir) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->iftype = iftype; + __entry->check_no_ir = check_no_ir; ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d", - WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype) + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d check_no_ir=%s", + WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype, + BOOL_TO_STR(__entry->check_no_ir)) ); TRACE_EVENT(cfg80211_chandef_dfs_required, From 75993300d008f418ee2569a632185fc1d7d50674 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 15 Jul 2015 15:26:19 +0300 Subject: [PATCH 57/84] virtio_net: don't require ANY_LAYOUT with VERSION_1 ANY_LAYOUT is a compatibility feature. It's implied for VERSION_1 devices, and non-transitional devices might not offer it. Change code to behave accordingly. Signed-off-by: Michael S. Tsirkin Reviewed-by: Paolo Bonzini Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 63c7810e1545..7fbca37a1adf 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1828,7 +1828,8 @@ static int virtnet_probe(struct virtio_device *vdev) else vi->hdr_len = sizeof(struct virtio_net_hdr); - if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT)) + if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || + virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) vi->any_header_sg = true; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) From 40a7166044a6fce3dbcaa8b5c89845980c218c98 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 15 Jul 2015 10:07:07 -0400 Subject: [PATCH 58/84] net: dsa: mv88e6xxx: fix fid_mask when leaving bridge The mv88e6xxx_priv_state structure contains an fid_mask, where 1 means the FID is free to use, 0 means the FID is in use. This patch fixes the bit clear in mv88e6xxx_leave_bridge() when assigning a new FID to a port. Example scenario: I have 7 ports, port 5 is CPU, port 6 is unused (no PHY). After setting the ports 0, 1 and 2 in bridge br0, and ports 3 and 4 in bridge br1, I have the following fid_mask: 0b111110010110 (0xf96). Indeed, br0 uses FID 0, and br1 uses FID 3. After setting nomaster for port 0, I get the wrong fid_mask: 0b10 (0x2). With this patch we correctly get 0b111110010100 (0xf94), meaning port 0 uses FID 1, br0 uses FID 0, and br1 uses FID 3. Signed-off-by: Vivien Didelot Reviewed-by: Guenter Roeck Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index fd8547c2b79d..561342466076 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -1163,7 +1163,7 @@ int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int port, u32 br_port_mask) newfid = __ffs(ps->fid_mask); ps->fid[port] = newfid; - ps->fid_mask &= (1 << newfid); + ps->fid_mask &= ~(1 << newfid); ps->bridge_mask[fid] &= ~(1 << port); ps->bridge_mask[newfid] = 1 << port; From 06f6d1094aa0992432b1e2a0920b0ee86ccd83bf Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 15 Jul 2015 21:52:51 +0200 Subject: [PATCH 59/84] bonding: fix destruction of bond with devices different from arphrd_ether When the bonding is being unloaded and the netdevice notifier is unregistered it executes NETDEV_UNREGISTER for each device which should remove the bond's proc entry but if the device enslaved is not of ARPHRD_ETHER type and is in front of the bonding, it may execute bond_release_and_destroy() first which would release the last slave and destroy the bond device leaving the proc entry and thus we will get the following error (with dynamic debug on for bond_netdev_event to see the events order): [ 908.963051] eql: event: 9 [ 908.963052] eql: IFF_SLAVE [ 908.963054] eql: event: 2 [ 908.963056] eql: IFF_SLAVE [ 908.963058] eql: event: 6 [ 908.963059] eql: IFF_SLAVE [ 908.963110] bond0: Releasing active interface eql [ 908.976168] bond0: Destroying bond bond0 [ 908.976266] bond0 (unregistering): Released all slaves [ 908.984097] ------------[ cut here ]------------ [ 908.984107] WARNING: CPU: 0 PID: 1787 at fs/proc/generic.c:575 remove_proc_entry+0x112/0x160() [ 908.984110] remove_proc_entry: removing non-empty directory 'net/bonding', leaking at least 'bond0' [ 908.984111] Modules linked in: bonding(-) eql(O) 9p nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ppdev qxl drm_kms_helper snd_hda_codec_generic aesni_intel ttm aes_x86_64 glue_helper pcspkr lrw gf128mul ablk_helper cryptd snd_hda_intel virtio_console snd_hda_codec psmouse serio_raw snd_hwdep snd_hda_core 9pnet_virtio 9pnet evdev joydev drm virtio_balloon snd_pcm snd_timer snd soundcore i2c_piix4 i2c_core pvpanic acpi_cpufreq parport_pc parport processor thermal_sys button autofs4 ext4 crc16 mbcache jbd2 hid_generic usbhid hid sg sr_mod cdrom ata_generic virtio_blk virtio_net floppy ata_piix e1000 libata ehci_pci virtio_pci scsi_mod uhci_hcd ehci_hcd virtio_ring virtio usbcore usb_common [last unloaded: bonding] [ 908.984168] CPU: 0 PID: 1787 Comm: rmmod Tainted: G W O 4.2.0-rc2+ #8 [ 908.984170] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 908.984172] 0000000000000000 ffffffff81732d41 ffffffff81525b34 ffff8800358dfda8 [ 908.984175] ffffffff8106c521 ffff88003595af78 ffff88003595af40 ffff88003e3a4280 [ 908.984178] ffffffffa058d040 0000000000000000 ffffffff8106c59a ffffffff8172ebd0 [ 908.984181] Call Trace: [ 908.984188] [] ? dump_stack+0x40/0x50 [ 908.984193] [] ? warn_slowpath_common+0x81/0xb0 [ 908.984196] [] ? warn_slowpath_fmt+0x4a/0x50 [ 908.984199] [] ? remove_proc_entry+0x112/0x160 [ 908.984205] [] ? bond_destroy_proc_dir+0x26/0x30 [bonding] [ 908.984208] [] ? bond_net_exit+0x8e/0xa0 [bonding] [ 908.984217] [] ? ops_exit_list.isra.4+0x37/0x70 [ 908.984225] [] ? unregister_pernet_operations+0x8d/0xd0 [ 908.984228] [] ? unregister_pernet_subsys+0x1d/0x30 [ 908.984232] [] ? bonding_exit+0x23/0xdba [bonding] [ 908.984236] [] ? SyS_delete_module+0x18a/0x250 [ 908.984241] [] ? task_work_run+0x89/0xc0 [ 908.984244] [] ? entry_SYSCALL_64_fastpath+0x16/0x75 [ 908.984247] ---[ end trace 7c006ed4abbef24b ]--- Thus remove the proc entry manually if bond_release_and_destroy() is used. Because of the checks in bond_remove_proc_entry() it's not a problem for a bond device to change namespaces (the bug fixed by the Fixes commit) but since commit f9399814927ad ("bonding: Don't allow bond devices to change network namespaces.") that can't happen anyway. Reported-by: Carol Soto Signed-off-by: Nikolay Aleksandrov Fixes: a64d49c3dd50 ("bonding: Manage /proc/net/bonding/ entries from the netdev events") Tested-by: Carol L Soto Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 317a49480475..ec1404ec4d2f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1916,6 +1916,7 @@ static int bond_release_and_destroy(struct net_device *bond_dev, bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; netdev_info(bond_dev, "Destroying bond %s\n", bond_dev->name); + bond_remove_proc_entry(bond); unregister_netdevice(bond_dev); } return ret; From 7d5cd2ce5292b45e555de776cb9e72975a07460d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 15 Jul 2015 22:57:01 +0200 Subject: [PATCH 60/84] bonding: correctly handle bonding type change on enslave failure If the bond is enslaving a device with different type it will be setup by it, but if after being setup the enslave fails the bond doesn't switch back its type and also keeps pointers to foreign structures that can be long gone. Thus revert back any type changes if the enslave failed and the bond had to change its type. Example: Before patch: $ echo lo > bond0/bonding/slaves -bash: echo: write error: Cannot assign requested address $ ip l sh bond0 20: bond0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default link/loopback 16:54:78:34:bd:41 brd 00:00:00:00:00:00 $ echo +eth1 > bond0/bonding/slaves $ ip l sh bond0 20: bond0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 52:54:00:3f:47:69 brd ff:ff:ff:ff:ff:ff (notice the MASTER flag is gone) After patch: $ echo lo > bond0/bonding/slaves -bash: echo: write error: Cannot assign requested address $ ip l sh bond0 21: bond0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 6e:66:94:f6:07:fc brd ff:ff:ff:ff:ff:ff $ echo +eth1 > bond0/bonding/slaves $ ip l sh bond0 21: bond0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 52:54:00:3f:47:69 brd ff:ff:ff:ff:ff:ff Signed-off-by: Nikolay Aleksandrov Fixes: e36b9d16c6a6 ("bonding: clean muticast addresses when device changes type") Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ec1404ec4d2f..1d26d6700c1d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1725,9 +1725,16 @@ err_free: err_undo_flags: /* Enslave of first slave has failed and we need to fix master's mac */ - if (!bond_has_slaves(bond) && - ether_addr_equal_64bits(bond_dev->dev_addr, slave_dev->dev_addr)) - eth_hw_addr_random(bond_dev); + if (!bond_has_slaves(bond)) { + if (ether_addr_equal_64bits(bond_dev->dev_addr, + slave_dev->dev_addr)) + eth_hw_addr_random(bond_dev); + if (bond_dev->type != ARPHRD_ETHER) { + ether_setup(bond_dev); + bond_dev->flags |= IFF_MASTER; + bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; + } + } return res; } From b8c6cd1d316f3b01ae578d8e29179f6396c0eaa2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 15 Jul 2015 16:09:32 -0700 Subject: [PATCH 61/84] net: dsa: bcm_sf2: do not use indirect reads and writes for 7445E0 7445E0 contains an ECO which disconnected the internal SF2 pseudo-PHY which was known to conflict with the external pseudo-PHY of BCM53125 switches. This motivated the need to utilize the internal SF2 MDIO controller via indirect register reads/writes to control external Broadcom switches due to this address conflict (both responded at address 30d). For 7445E0, the internal pseudo-PHY of the SF2 switch got disconnected, and as a consequence this prevents the internal SF2 MDIO bus controller from reading data (reads back everything as 0) since the MDI line is tied low. Fix this by making the indirect register reads and writes conditional to 7445D0, on 7445E0 we can utilize the SWITCH_MDIO controller (backed by mdio-unimac and not the DSA created slave MII bus). We utilize of_machine_is_compatible() here since this is the only way for use to differentiate between these two chips in a way that does not violate layers or becomes (too) vendor-specific. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 972982f8bea7..079897b3a955 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -696,9 +696,20 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) } /* Include the pseudo-PHY address and the broadcast PHY address to - * divert reads towards our workaround + * divert reads towards our workaround. This is only required for + * 7445D0, since 7445E0 disconnects the internal switch pseudo-PHY such + * that we can use the regular SWITCH_MDIO master controller instead. + * + * By default, DSA initializes ds->phys_mii_mask to ds->phys_port_mask + * to have a 1:1 mapping between Port address and PHY address in order + * to utilize the slave_mii_bus instance to read from Port PHYs. This is + * not what we want here, so we initialize phys_mii_mask 0 to always + * utilize the "master" MDIO bus backed by the "mdio-unimac" driver. */ - ds->phys_mii_mask |= ((1 << BRCM_PSEUDO_PHY_ADDR) | (1 << 0)); + if (of_machine_is_compatible("brcm,bcm7445d0")) + ds->phys_mii_mask |= ((1 << BRCM_PSEUDO_PHY_ADDR) | (1 << 0)); + else + ds->phys_mii_mask = 0; rev = reg_readl(priv, REG_SWITCH_REVISION); priv->hw_params.top_rev = (rev >> SWITCH_TOP_REV_SHIFT) & From fdbf5b097bbd9693a86c0b8bfdd071a9a2117cfc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 20 Jul 2015 17:55:38 +0800 Subject: [PATCH 62/84] Revert "sit: Add gro callbacks to sit_offload" This patch reverts 19424e052fb44da2f00d1a868cbb51f3e9f4bbb5 ("sit: Add gro callbacks to sit_offload") because it generates packets that cannot be handled even by our own GSO. Reported-by: Wolfgang Walter Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_offload.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index e893cd18612f..08b62047c67f 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -292,8 +292,6 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, - .gro_complete = ipv6_gro_complete, }, }; From a951bc1e6ba58f11df5ed5ddc41311e10f5fd20b Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Thu, 16 Jul 2015 16:30:02 +0800 Subject: [PATCH 63/84] bonding: correct the MAC address for "follow" fail_over_mac policy The "follow" fail_over_mac policy is useful for multiport devices that either become confused or incur a performance penalty when multiple ports are programmed with the same MAC address, but the same MAC address still may happened by this steps for this policy: 1) echo +eth0 > /sys/class/net/bond0/bonding/slaves bond0 has the same mac address with eth0, it is MAC1. 2) echo +eth1 > /sys/class/net/bond0/bonding/slaves eth1 is backup, eth1 has MAC2. 3) ifconfig eth0 down eth1 became active slave, bond will swap MAC for eth0 and eth1, so eth1 has MAC1, and eth0 has MAC2. 4) ifconfig eth1 down there is no active slave, and eth1 still has MAC1, eth2 has MAC2. 5) ifconfig eth0 up the eth0 became active slave again, the bond set eth0 to MAC1. Something wrong here, then if you set eth1 up, the eth0 and eth1 will have the same MAC address, it will break this policy for ACTIVE_BACKUP mode. This patch will fix this problem by finding the old active slave and swap them MAC address before change active slave. Signed-off-by: Ding Tianhong Tested-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1d26d6700c1d..e1ccefce9a9d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -625,6 +625,23 @@ static void bond_set_dev_addr(struct net_device *bond_dev, call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); } +static struct slave *bond_get_old_active(struct bonding *bond, + struct slave *new_active) +{ + struct slave *slave; + struct list_head *iter; + + bond_for_each_slave(bond, slave, iter) { + if (slave == new_active) + continue; + + if (ether_addr_equal(bond->dev->dev_addr, slave->dev->dev_addr)) + return slave; + } + + return NULL; +} + /* bond_do_fail_over_mac * * Perform special MAC address swapping for fail_over_mac settings @@ -652,6 +669,9 @@ static void bond_do_fail_over_mac(struct bonding *bond, if (!new_active) return; + if (!old_active) + old_active = bond_get_old_active(bond, new_active); + if (old_active) { ether_addr_copy(tmp_mac, new_active->dev->dev_addr); ether_addr_copy(saddr.sa_data, From 194ac06e39238685abc0eeb436efa82e6571b90f Mon Sep 17 00:00:00 2001 From: "Karicheri, Muralidharan" Date: Thu, 16 Jul 2015 15:32:14 -0400 Subject: [PATCH 64/84] net: netcp: fix improper initialization in netcp_ndo_open() The keystone qmss will raise interrupt when packet arrive at the receive queue. Only control available to avoid interrupt from happening is to keep the free descriptor queue (FDQ) empty in the receive side. So the filling of descriptors into the FDQ has to happen after request_irq() call is made as part of knav_queue_enable_notify(). So move the function netcp_rxpool_refill() after this call. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 5ec4ed3f6c8d..ec8ed30196f3 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1617,11 +1617,11 @@ static int netcp_ndo_open(struct net_device *ndev) } mutex_unlock(&netcp_modules_lock); - netcp_rxpool_refill(netcp); napi_enable(&netcp->rx_napi); napi_enable(&netcp->tx_napi); knav_queue_enable_notify(netcp->tx_compl_q); knav_queue_enable_notify(netcp->rx_queue); + netcp_rxpool_refill(netcp); netif_tx_wake_all_queues(ndev); dev_dbg(netcp->ndev_dev, "netcp device %s opened\n", ndev->name); return 0; From 06613e38f1f2b098d46e9549756c0d5c040f2ef8 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 17 Jul 2015 00:28:38 +0300 Subject: [PATCH 65/84] ravb: fix race updating TCCR The TCCR.TSRQn bit may get clearead after TCCR gets read, so that TCCR write would get skipped. We don't need to check this bit before setting. Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 0e5fde321630..d08c250e843e 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1275,7 +1275,6 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) u32 dma_addr; void *buffer; u32 entry; - u32 tccr; spin_lock_irqsave(&priv->lock, flags); if (priv->cur_tx[q] - priv->dirty_tx[q] >= priv->num_tx_ring[q]) { @@ -1324,9 +1323,7 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) dma_wmb(); desc->die_dt = DT_FSINGLE; - tccr = ravb_read(ndev, TCCR); - if (!(tccr & (TCCR_TSRQ0 << q))) - ravb_write(ndev, tccr | (TCCR_TSRQ0 << q), TCCR); + ravb_write(ndev, ravb_read(ndev, TCCR) | (TCCR_TSRQ0 << q), TCCR); priv->cur_tx[q]++; if (priv->cur_tx[q] - priv->dirty_tx[q] >= priv->num_tx_ring[q] && From e3426ca7bc2957ee072f61360c2b81b4adb629ad Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Thu, 16 Jul 2015 23:28:14 +0200 Subject: [PATCH 66/84] qmi_wwan: add the second QMI/network interface for Sierra Wireless MC7305/MC7355 Sierra Wireless MC7305/MC7355 with USB ID 1199:9041 also provide a second QMI/network interface like the MC73xx with USB ID 1199:68c0 on USB interface #10 when used in the appropriate USB configuration. Add the corresponding QMI_FIXED_INTF entry to the qmi_wwan driver. Please note that the second QMI/network interface is not working for early MC73xx firmware versions like 01.08.x as the device does not respond to QMI messages on the second /dev/cdc-wdm port. Signed-off-by: Reinhard Speyerer Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index f603f362504b..9d43460ce3c7 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -757,6 +757,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x901c, 8)}, /* Sierra Wireless EM7700 */ {QMI_FIXED_INTF(0x1199, 0x901f, 8)}, /* Sierra Wireless EM7355 */ {QMI_FIXED_INTF(0x1199, 0x9041, 8)}, /* Sierra Wireless MC7305/MC7355 */ + {QMI_FIXED_INTF(0x1199, 0x9041, 10)}, /* Sierra Wireless MC7305/MC7355 */ {QMI_FIXED_INTF(0x1199, 0x9051, 8)}, /* Netgear AirCard 340U */ {QMI_FIXED_INTF(0x1199, 0x9053, 8)}, /* Sierra Wireless Modem */ {QMI_FIXED_INTF(0x1199, 0x9054, 8)}, /* Sierra Wireless Modem */ From b8a23e8d8e31abeda2f6cfa36a772414b2a86ffc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Jul 2015 10:19:23 +0200 Subject: [PATCH 67/84] caif: fix leaks and race in caif_queue_rcv_skb() 1) If sk_filter() is applied, skb was leaked (not freed) 2) Testing SOCK_DEAD twice is racy : packet could be freed while already queued. 3) Remove obsolete comment about caching skb->len Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/caif/caif_socket.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 3cc71b9f5517..cc858919108e 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -121,12 +121,13 @@ static void caif_flow_ctrl(struct sock *sk, int mode) * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are * not dropped, but CAIF is sending flow off instead. */ -static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static void caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int err; unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); + bool queued = false; if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned int)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) { @@ -139,7 +140,8 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) err = sk_filter(sk, skb); if (err) - return err; + goto out; + if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) { set_rx_flow_off(cf_sk); net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); @@ -147,21 +149,16 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } skb->dev = NULL; skb_set_owner_r(skb, sk); - /* Cache the SKB length before we tack it onto the receive - * queue. Once it is added it no longer belongs to us and - * may be freed by other threads of control pulling packets - * from the queue. - */ spin_lock_irqsave(&list->lock, flags); - if (!sock_flag(sk, SOCK_DEAD)) + queued = !sock_flag(sk, SOCK_DEAD); + if (queued) __skb_queue_tail(list, skb); spin_unlock_irqrestore(&list->lock, flags); - - if (!sock_flag(sk, SOCK_DEAD)) +out: + if (queued) sk->sk_data_ready(sk); else kfree_skb(skb); - return 0; } /* Packet Receive Callback function called from CAIF Stack */ From 8bf4ada2e21378816b28205427ee6b0e1ca4c5f1 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 17 Jul 2015 14:01:11 +0300 Subject: [PATCH 68/84] net: ratelimit warnings about dst entry refcount underflow or overflow Kernel generates a lot of warnings when dst entry reference counter overflows and becomes negative. That bug was seen several times at machines with outdated 3.10.y kernels. Most like it's already fixed in upstream. Anyway that flood completely kills machine and makes further debugging impossible. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- net/core/dst.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dst.c b/net/core/dst.c index e956ce6d1378..002144bea935 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -284,7 +284,9 @@ void dst_release(struct dst_entry *dst) int newrefcnt; newrefcnt = atomic_dec_return(&dst->__refcnt); - WARN_ON(newrefcnt < 0); + if (unlikely(newrefcnt < 0)) + net_warn_ratelimited("%s: dst:%p refcnt:%d\n", + __func__, dst, newrefcnt); if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) call_rcu(&dst->rcu_head, dst_destroy_rcu); } From e0536cd910d5bc98300d7830f1d9317df9ab8afa Mon Sep 17 00:00:00 2001 From: Shaohui Xie Date: Fri, 17 Jul 2015 18:07:19 +0800 Subject: [PATCH 69/84] net/mdio: fix mdio_bus_match for c45 PHY We store c45 PHY's id information in c45_ids, so it should be used to check the matching between PHY driver and PHY device for c45 PHY. Signed-off-by: Shaohui Xie Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 095ef3fe369a..46a14cbb0215 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -421,6 +421,8 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv) { struct phy_device *phydev = to_phy_device(dev); struct phy_driver *phydrv = to_phy_driver(drv); + const int num_ids = ARRAY_SIZE(phydev->c45_ids.device_ids); + int i; if (of_driver_match_device(dev, drv)) return 1; @@ -428,8 +430,21 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv) if (phydrv->match_phy_device) return phydrv->match_phy_device(phydev); - return (phydrv->phy_id & phydrv->phy_id_mask) == - (phydev->phy_id & phydrv->phy_id_mask); + if (phydev->is_c45) { + for (i = 1; i < num_ids; i++) { + if (!(phydev->c45_ids.devices_in_package & (1 << i))) + continue; + + if ((phydrv->phy_id & phydrv->phy_id_mask) == + (phydev->c45_ids.device_ids[i] & + phydrv->phy_id_mask)) + return 1; + } + return 0; + } else { + return (phydrv->phy_id & phydrv->phy_id_mask) == + (phydev->phy_id & phydrv->phy_id_mask); + } } #ifdef CONFIG_PM From f6bfc46da6292b630ba389592123f0dd02066172 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 17 Jul 2015 22:38:43 +0200 Subject: [PATCH 70/84] sched: cls_bpf: fix panic on filter replace The following test case causes a NULL pointer dereference in cls_bpf: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc filter replace dev foo parent 1: pref 49152 handle 0x1 \ bpf bytecode "$FOO" flowid 1:1 action drop The problem is that commit 1f947bf151e9 ("net: sched: rcu'ify cls_bpf") accidentally swapped the arguments of list_replace_rcu(), the old element needs to be the first argument and the new element the second. Fixes: 1f947bf151e9 ("net: sched: rcu'ify cls_bpf") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c79ecfd36e0f..e5168f8b9640 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -378,7 +378,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; if (oldprog) { - list_replace_rcu(&prog->link, &oldprog->link); + list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); call_rcu(&oldprog->rcu, __cls_bpf_delete_prog); } else { From ff3532f2655b79058fec035ca54fced69a483084 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 17 Jul 2015 22:38:44 +0200 Subject: [PATCH 71/84] sched: cls_flower: fix panic on filter replace The following test case causes a NULL pointer dereference in cls_flower: tc filter add dev foo parent 1: flower eth_type ipv4 action ok flowid 1:1 tc filter replace dev foo parent 1: pref 49152 handle 0x1 \ flower eth_type ipv6 action ok flowid 1:1 The problem is that commit 77b9900ef53a ("tc: introduce Flower classifier") accidentally swapped the arguments of list_replace_rcu(), the old element needs to be the first argument and the new element the second. Fixes: 77b9900ef53a ("tc: introduce Flower classifier") Signed-off-by: Daniel Borkmann Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 9d37ccd95062..2f3d03f99487 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -499,7 +499,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, *arg = (unsigned long) fnew; if (fold) { - list_replace_rcu(&fnew->list, &fold->list); + list_replace_rcu(&fold->list, &fnew->list); tcf_unbind_filter(tp, &fold->res); call_rcu(&fold->rcu, fl_destroy_filter); } else { From 32b2f4b196b37695fdb42b31afcbc15399d6ef91 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 17 Jul 2015 22:38:45 +0200 Subject: [PATCH 72/84] sched: cls_flow: fix panic on filter replace The following test case causes a NULL pointer dereference in cls_flow: tc filter add dev foo parent 1: handle 0x1 flow hash keys dst action ok tc filter replace dev foo parent 1: pref 49152 handle 0x1 \ flow hash keys mark action drop To be more precise, actually two different panics are fixed, the first occurs because tcf_exts_init() is not called on the newly allocated filter when we do a replace. And the second panic uncovered after that happens since the arguments of list_replace_rcu() are swapped, the old element needs to be the first argument and the new element the second. Fixes: 70da9f0bf999 ("net: sched: cls_flow use RCU") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/cls_flow.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 76bc3a20ffdb..bb2a0f529c1f 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -425,6 +425,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (!fnew) goto err2; + tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + fold = (struct flow_filter *)*arg; if (fold) { err = -EINVAL; @@ -486,7 +488,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, fnew->mask = ~0U; fnew->tp = tp; get_random_bytes(&fnew->hashrnd, 4); - tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); } fnew->perturb_timer.function = flow_perturbation; @@ -526,7 +527,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (*arg == 0) list_add_tail_rcu(&fnew->list, &head->filters); else - list_replace_rcu(&fnew->list, &fold->list); + list_replace_rcu(&fold->list, &fnew->list); *arg = (unsigned long)fnew; From a7a6268590bdc6760df52570a1df41654e3096ba Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Fri, 17 Jul 2015 23:48:17 +0200 Subject: [PATCH 73/84] stmmac: fix setting of driver data in stmmac_dvr_probe Commit 803f8fc46274b ("stmmac: move driver data setting into stmmac_dvr_probe") mistakenly set priv and not priv->dev as driver data. This meant that the remove, resume and suspend callbacks that fetched and tried to use this data would most likely explode. Fix the issue by using the correct variable. Fixes: 803f8fc46274b ("stmmac: move driver data setting into stmmac_dvr_probe") Signed-off-by: Joachim Eastwood Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 50f7a7a26821..864b476f7fd5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2843,7 +2843,7 @@ int stmmac_dvr_probe(struct device *device, if (res->mac) memcpy(priv->dev->dev_addr, res->mac, ETH_ALEN); - dev_set_drvdata(device, priv); + dev_set_drvdata(device, priv->dev); /* Verify driver arguments */ stmmac_verify_args(); From a84e32894191cfcbffa54180d78d7d4654d56c20 Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Sun, 19 Jul 2015 13:00:53 +0200 Subject: [PATCH 74/84] net: mvneta: fix refilling for Rx DMA buffers With the actual code, if a memory allocation error happens while refilling a Rx descriptor, then the original Rx buffer is both passed to the networking stack (in a SKB) and let in the Rx ring. This leads to various kernel oops and crashes. As a fix, this patch moves Rx descriptor refilling ahead of building SKB with the associated Rx buffer. In case of a memory allocation failure, data is dropped and the original DMA buffer is put back into the Rx ring. Signed-off-by: Simon Guinot Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Cc: # v3.8+ Tested-by: Yoann Sculo Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 370e20ed224c..62e48bc0cb23 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1462,7 +1462,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, struct mvneta_rx_queue *rxq) { struct net_device *dev = pp->dev; - int rx_done, rx_filled; + int rx_done; u32 rcvd_pkts = 0; u32 rcvd_bytes = 0; @@ -1473,7 +1473,6 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, rx_todo = rx_done; rx_done = 0; - rx_filled = 0; /* Fairness NAPI loop */ while (rx_done < rx_todo) { @@ -1484,7 +1483,6 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, int rx_bytes, err; rx_done++; - rx_filled++; rx_status = rx_desc->status; rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE); data = (unsigned char *)rx_desc->buf_cookie; @@ -1524,6 +1522,14 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, continue; } + /* Refill processing */ + err = mvneta_rx_refill(pp, rx_desc); + if (err) { + netdev_err(dev, "Linux processing - Can't refill\n"); + rxq->missed++; + goto err_drop_frame; + } + skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size); if (!skb) goto err_drop_frame; @@ -1543,14 +1549,6 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, mvneta_rx_csum(pp, rx_status, skb); napi_gro_receive(&pp->napi, skb); - - /* Refill processing */ - err = mvneta_rx_refill(pp, rx_desc); - if (err) { - netdev_err(dev, "Linux processing - Can't refill\n"); - rxq->missed++; - rx_filled--; - } } if (rcvd_pkts) { @@ -1563,7 +1561,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, } /* Update rxq management counters */ - mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_filled); + mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done); return rx_done; } From 0848f6428ba3a2e42db124d41ac6f548655735bf Mon Sep 17 00:00:00 2001 From: Edward Hyunkoo Jee Date: Tue, 21 Jul 2015 09:43:59 +0200 Subject: [PATCH 75/84] inet: frags: fix defragmented packet's IP header for af_packet When ip_frag_queue() computes positions, it assumes that the passed sk_buff does not contain L2 headers. However, when PACKET_FANOUT_FLAG_DEFRAG is used, IP reassembly functions can be called on outgoing packets that contain L2 headers. Also, IPv4 checksum is not corrected after reassembly. Fixes: 7736d33f4262 ("packet: Add pre-defragmentation support for ipv4 fanouts.") Signed-off-by: Edward Hyunkoo Jee Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Jerry Chu Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a50dc6d408d1..31f71b15cfba 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -351,7 +351,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) ihl = ip_hdrlen(skb); /* Determine the position of this fragment. */ - end = offset + skb->len - ihl; + end = offset + skb->len - skb_network_offset(skb) - ihl; err = -EINVAL; /* Is this the final fragment? */ @@ -381,7 +381,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) goto err; err = -ENOMEM; - if (!pskb_pull(skb, ihl)) + if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) goto err; err = pskb_trim_rcsum(skb, end - offset); @@ -641,6 +641,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph->frag_off = 0; } + ip_send_check(iph); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; qp->q.fragments_tail = NULL; From 1e353cddcf81fbb79dd637cab9a22c837e94c205 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Tue, 21 Jul 2015 16:00:42 +0530 Subject: [PATCH 76/84] drivers: net: cpsw: remove tx event processing in rx napi poll With commit c03abd84634d ("net: ethernet: cpsw: don't requests IRQs we don't use") common isr and napi are separated into separate tx isr and rx isr/napi, but still in rx napi tx events are handled. So removing the tx event handling in rx napi. Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index f335bf119ab5..d155bf2573cd 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -793,9 +793,7 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) static int cpsw_poll(struct napi_struct *napi, int budget) { struct cpsw_priv *priv = napi_to_priv(napi); - int num_tx, num_rx; - - num_tx = cpdma_chan_process(priv->txch, 128); + int num_rx; num_rx = cpdma_chan_process(priv->rxch, budget); if (num_rx < budget) { @@ -810,9 +808,8 @@ static int cpsw_poll(struct napi_struct *napi, int budget) } } - if (num_rx || num_tx) - cpsw_dbg(priv, intr, "poll %d rx, %d tx pkts\n", - num_rx, num_tx); + if (num_rx) + cpsw_dbg(priv, intr, "poll %d rx pkts\n", num_rx); return num_rx; } From 89e478a2aa58af2548b7f316e4d5b6bcc9eade5b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Jul 2015 07:02:00 +0200 Subject: [PATCH 77/84] tcp: suppress a division by zero warning Andrew Morton reported following warning on one ARM build with gcc-4.4 : net/ipv4/inet_hashtables.c: In function 'inet_ehash_locks_alloc': net/ipv4/inet_hashtables.c:617: warning: division by zero Even guarded with a test on sizeof(spinlock_t), compiler does not like current construct on a !CONFIG_SMP build. Remove the warning by using a temporary variable. Fixes: 095dc8e0c368 ("tcp: fix/cleanup inet_ehash_locks_alloc()") Reported-by: Andrew Morton Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 5f9b063bbe8a..0cb9165421d4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -624,22 +624,21 @@ EXPORT_SYMBOL_GPL(inet_hashinfo_init); int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { + unsigned int locksz = sizeof(spinlock_t); unsigned int i, nblocks = 1; - if (sizeof(spinlock_t) != 0) { + if (locksz != 0) { /* allocate 2 cache lines or at least one spinlock per cpu */ - nblocks = max_t(unsigned int, - 2 * L1_CACHE_BYTES / sizeof(spinlock_t), - 1); + nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U); nblocks = roundup_pow_of_two(nblocks * num_possible_cpus()); /* no more locks than number of hash buckets */ nblocks = min(nblocks, hashinfo->ehash_mask + 1); - hashinfo->ehash_locks = kmalloc_array(nblocks, sizeof(spinlock_t), + hashinfo->ehash_locks = kmalloc_array(nblocks, locksz, GFP_KERNEL | __GFP_NOWARN); if (!hashinfo->ehash_locks) - hashinfo->ehash_locks = vmalloc(nblocks * sizeof(spinlock_t)); + hashinfo->ehash_locks = vmalloc(nblocks * locksz); if (!hashinfo->ehash_locks) return -ENOMEM; From 7aed35cb65348fc8b9ce0c2394ff675e5fc750da Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Tue, 21 Jul 2015 14:14:12 +0200 Subject: [PATCH 78/84] ARM: net: fix condition for load_order > 0 when translating load instructions. To check whether the load should take the fast path or not, the code would check that (r_skb_hlen - load_order) is greater than the offset of the access using an "Unsigned higher or same" condition. For halfword accesses and an skb length of 1 at offset 0, that test is valid, as we end up comparing 0xffffffff(-1) and 0, so the fast path is taken and the filter allows the load to wrongly succeed. A similar issue exists for word loads at offset 0 and an skb length of less than 4. Fix that by using the condition "Signed greater than or equal" condition for the fast path code for load orders greater than 0. Signed-off-by: Nicolas Schichan Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 4550d247e308..21f5ace156fd 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -547,7 +547,7 @@ load_common: emit(ARM_SUB_I(r_scratch, r_skb_hl, 1 << load_order), ctx); emit(ARM_CMP_R(r_scratch, r_off), ctx); - condt = ARM_COND_HS; + condt = ARM_COND_GE; } else { emit(ARM_CMP_R(r_skb_hl, r_off), ctx); condt = ARM_COND_HI; From 6d715e301e950e3314d590bdbabf0c26e4fed94b Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Tue, 21 Jul 2015 14:14:13 +0200 Subject: [PATCH 79/84] ARM: net: handle negative offsets in BPF JIT. Previously, the JIT would reject negative offsets known during code generation and mishandle negative offsets provided at runtime. Fix that by calling bpf_internal_load_pointer_neg_helper() appropriately in the jit_get_skb_{b,h,w} slow path helpers and by forcing the execution flow to the slow path helpers when the offset is negative. Signed-off-by: Nicolas Schichan Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 47 +++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 21f5ace156fd..d9b25242f743 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -74,32 +74,52 @@ struct jit_ctx { int bpf_jit_enable __read_mostly; -static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset) +static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret, + unsigned int size) +{ + void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size); + + if (!ptr) + return -EFAULT; + memcpy(ret, ptr, size); + return 0; +} + +static u64 jit_get_skb_b(struct sk_buff *skb, int offset) { u8 ret; int err; - err = skb_copy_bits(skb, offset, &ret, 1); + if (offset < 0) + err = call_neg_helper(skb, offset, &ret, 1); + else + err = skb_copy_bits(skb, offset, &ret, 1); return (u64)err << 32 | ret; } -static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset) +static u64 jit_get_skb_h(struct sk_buff *skb, int offset) { u16 ret; int err; - err = skb_copy_bits(skb, offset, &ret, 2); + if (offset < 0) + err = call_neg_helper(skb, offset, &ret, 2); + else + err = skb_copy_bits(skb, offset, &ret, 2); return (u64)err << 32 | ntohs(ret); } -static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) +static u64 jit_get_skb_w(struct sk_buff *skb, int offset) { u32 ret; int err; - err = skb_copy_bits(skb, offset, &ret, 4); + if (offset < 0) + err = call_neg_helper(skb, offset, &ret, 4); + else + err = skb_copy_bits(skb, offset, &ret, 4); return (u64)err << 32 | ntohl(ret); } @@ -536,9 +556,6 @@ static int build_body(struct jit_ctx *ctx) case BPF_LD | BPF_B | BPF_ABS: load_order = 0; load: - /* the interpreter will deal with the negative K */ - if ((int)k < 0) - return -ENOTSUPP; emit_mov_i(r_off, k, ctx); load_common: ctx->seen |= SEEN_DATA | SEEN_CALL; @@ -553,6 +570,18 @@ load_common: condt = ARM_COND_HI; } + /* + * test for negative offset, only if we are + * currently scheduled to take the fast + * path. this will update the flags so that + * the slowpath instruction are ignored if the + * offset is negative. + * + * for loard_order == 0 the HI condition will + * make loads at offset 0 take the slow path too. + */ + _emit(condt, ARM_CMP_I(r_off, 0), ctx); + _emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data), ctx); From c18fe54b3f8d7daa6a43270f82676d6a563582a4 Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Tue, 21 Jul 2015 14:14:14 +0200 Subject: [PATCH 80/84] ARM: net: fix vlan access instructions in ARM JIT. This makes BPF_ANC | SKF_AD_VLAN_TAG and BPF_ANC | SKF_AD_VLAN_TAG_PRESENT have the same behaviour as the in kernel VM and makes the test_bpf LD_VLAN_TAG and LD_VLAN_TAG_PRESENT tests pass. Signed-off-by: Nicolas Schichan Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index d9b25242f743..c011e2296cb1 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -889,9 +889,11 @@ b_epilogue: off = offsetof(struct sk_buff, vlan_tci); emit(ARM_LDRH_I(r_A, r_skb, off), ctx); if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) - OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx); - else - OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx); + OP_IMM3(ARM_AND, r_A, r_A, ~VLAN_TAG_PRESENT, ctx); + else { + OP_IMM3(ARM_LSR, r_A, r_A, 12, ctx); + OP_IMM3(ARM_AND, r_A, r_A, 0x1, ctx); + } break; case BPF_ANC | SKF_AD_QUEUE: ctx->seen |= SEEN_SKB; From 0470eb99b4721586ccac954faac3fa4472da0845 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 21 Jul 2015 16:33:50 +0200 Subject: [PATCH 81/84] netlink: don't hold mutex in rcu callback when releasing mmapd ring Kirill A. Shutemov says: This simple test-case trigers few locking asserts in kernel: int main(int argc, char **argv) { unsigned int block_size = 16 * 4096; struct nl_mmap_req req = { .nm_block_size = block_size, .nm_block_nr = 64, .nm_frame_size = 16384, .nm_frame_nr = 64 * block_size / 16384, }; unsigned int ring_size; int fd; fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) exit(1); if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) exit(1); ring_size = req.nm_block_nr * req.nm_block_size; mmap(NULL, 2 * ring_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); return 0; } +++ exited with 0 +++ BUG: sleeping function called from invalid context at /home/kas/git/public/linux-mm/kernel/locking/mutex.c:616 in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: init 3 locks held by init/1: #0: (reboot_mutex){+.+...}, at: [] SyS_reboot+0xa9/0x220 #1: ((reboot_notifier_list).rwsem){.+.+..}, at: [] __blocking_notifier_call_chain+0x39/0x70 #2: (rcu_callback){......}, at: [] rcu_do_batch.isra.49+0x160/0x10c0 Preemption disabled at:[] __delay+0xf/0x20 CPU: 1 PID: 1 Comm: init Not tainted 4.1.0-00009-gbddf4c4818e0 #253 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Debian-1.8.2-1 04/01/2014 ffff88017b3d8000 ffff88027bc03c38 ffffffff81929ceb 0000000000000102 0000000000000000 ffff88027bc03c68 ffffffff81085a9d 0000000000000002 ffffffff81ca2a20 0000000000000268 0000000000000000 ffff88027bc03c98 Call Trace: [] dump_stack+0x4f/0x7b [] ___might_sleep+0x16d/0x270 [] __might_sleep+0x4d/0x90 [] mutex_lock_nested+0x2f/0x430 [] ? _raw_spin_unlock_irqrestore+0x5d/0x80 [] ? __this_cpu_preempt_check+0x13/0x20 [] netlink_set_ring+0x1ed/0x350 [] ? netlink_undo_bind+0x70/0x70 [] netlink_sock_destruct+0x80/0x150 [] __sk_free+0x1d/0x160 [] sk_free+0x19/0x20 [..] Cong Wang says: We can't hold mutex lock in a rcu callback, [..] Thomas Graf says: The socket should be dead at this point. It might be simpler to add a netlink_release_ring() function which doesn't require locking at all. Reported-by: "Kirill A. Shutemov" Diagnosed-by: Cong Wang Suggested-by: Thomas Graf Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 79 ++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9a0ae7172f92..d8e2e3918ce2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -357,25 +357,52 @@ err1: return NULL; } + +static void +__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, + unsigned int order) +{ + struct netlink_sock *nlk = nlk_sk(sk); + struct sk_buff_head *queue; + struct netlink_ring *ring; + + queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; + ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; + + spin_lock_bh(&queue->lock); + + ring->frame_max = req->nm_frame_nr - 1; + ring->head = 0; + ring->frame_size = req->nm_frame_size; + ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; + + swap(ring->pg_vec_len, req->nm_block_nr); + swap(ring->pg_vec_order, order); + swap(ring->pg_vec, pg_vec); + + __skb_queue_purge(queue); + spin_unlock_bh(&queue->lock); + + WARN_ON(atomic_read(&nlk->mapped)); + + if (pg_vec) + free_pg_vec(pg_vec, order, req->nm_block_nr); +} + static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool closing, bool tx_ring) + bool tx_ring) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ring *ring; - struct sk_buff_head *queue; void **pg_vec = NULL; unsigned int order = 0; - int err; ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - if (!closing) { - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; - } + if (atomic_read(&nlk->mapped)) + return -EBUSY; + if (atomic_read(&ring->pending)) + return -EBUSY; if (req->nm_block_nr) { if (ring->pg_vec != NULL) @@ -407,31 +434,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, return -EINVAL; } - err = -EBUSY; mutex_lock(&nlk->pg_vec_lock); - if (closing || atomic_read(&nlk->mapped) == 0) { - err = 0; - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); + if (atomic_read(&nlk->mapped) == 0) { + __netlink_set_ring(sk, req, tx_ring, pg_vec, order); + mutex_unlock(&nlk->pg_vec_lock); + return 0; } + mutex_unlock(&nlk->pg_vec_lock); if (pg_vec) free_pg_vec(pg_vec, order, req->nm_block_nr); - return err; + + return -EBUSY; } static void netlink_mm_open(struct vm_area_struct *vma) @@ -900,10 +915,10 @@ static void netlink_sock_destruct(struct sock *sk) memset(&req, 0, sizeof(req)); if (nlk->rx_ring.pg_vec) - netlink_set_ring(sk, &req, true, false); + __netlink_set_ring(sk, &req, false, NULL, 0); memset(&req, 0, sizeof(req)); if (nlk->tx_ring.pg_vec) - netlink_set_ring(sk, &req, true, true); + __netlink_set_ring(sk, &req, true, NULL, 0); } #endif /* CONFIG_NETLINK_MMAP */ @@ -2223,7 +2238,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, return -EINVAL; if (copy_from_user(&req, optval, sizeof(req))) return -EFAULT; - err = netlink_set_ring(sk, &req, false, + err = netlink_set_ring(sk, &req, optname == NETLINK_TX_RING); break; } From bac541e46319c2dc6ff9fd5ad6df59cd38686bdb Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Tue, 21 Jul 2015 12:36:33 -0500 Subject: [PATCH 82/84] openvswitch: allocate nr_node_ids flow_stats instead of num_possible_nodes Some architectures like POWER can have a NUMA node_possible_map that contains sparse entries. This causes memory corruption with openvswitch since it allocates flow_cache with a multiple of num_possible_nodes() and assumes the node variable returned by for_each_node will index into flow->stats[node]. Use nr_node_ids to allocate a maximal sparse array instead of num_possible_nodes(). The crash was noticed after 3af229f2 was applied as it changed the node_possible_map to match node_online_map on boot. Fixes: 3af229f2071f5b5cb31664be6109561fbe19c861 Signed-off-by: Chris J Arges Acked-by: Pravin B Shelar Acked-by: Nishanth Aravamudan Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 4613df8c8290..65523948fb95 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -752,7 +752,7 @@ int ovs_flow_init(void) BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) - + (num_possible_nodes() + + (nr_node_ids * sizeof(struct flow_stats *)), 0, 0, NULL); if (flow_cache == NULL) From a46fa260f6f5e8f80a725b28e4aee5a04d1bd79e Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Tue, 21 Jul 2015 12:06:45 -0500 Subject: [PATCH 83/84] net: phy: dp83867: Fix warning check for setting the internal delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix warning: logical ‘or’ of collectively exhaustive tests is always true Change the internal delay check from an 'or' condition to an 'and' condition. Reported-by: David Binderman Signed-off-by: Dan Murphy Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/dp83867.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index c7a12e2e07b7..8a3bf5469892 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -164,7 +164,7 @@ static int dp83867_config_init(struct phy_device *phydev) return ret; } - if ((phydev->interface >= PHY_INTERFACE_MODE_RGMII_ID) || + if ((phydev->interface >= PHY_INTERFACE_MODE_RGMII_ID) && (phydev->interface <= PHY_INTERFACE_MODE_RGMII_RXID)) { val = phy_read_mmd_indirect(phydev, DP83867_RGMIICTL, DP83867_DEVADDR, phydev->addr); From d8b48911fd249bc1a3431a9515619403c96d6af3 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 22 Jul 2015 01:31:59 +0300 Subject: [PATCH 84/84] ravb: fix ring memory allocation The driver is written as if it can adapt to a low memory situation allocating less RX skbs and TX aligned buffers than the respective RX/TX ring sizes. In reality though the driver would malfunction in this case. Stop being overly smart and just fail in such situation -- this is achieved by moving the memory allocation from ravb_ring_format() to ravb_ring_init(). We leave dma_map_single() calls in place but make their failure non-fatal by marking the corresponding RX descriptors with zero data size which should prevent DMA to an invalid addresses. Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 59 ++++++++++++++---------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index d08c250e843e..78849dd4ef8e 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -228,9 +228,7 @@ static void ravb_ring_format(struct net_device *ndev, int q) struct ravb_desc *desc = NULL; int rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q]; int tx_ring_size = sizeof(*tx_desc) * priv->num_tx_ring[q]; - struct sk_buff *skb; dma_addr_t dma_addr; - void *buffer; int i; priv->cur_rx[q] = 0; @@ -241,41 +239,28 @@ static void ravb_ring_format(struct net_device *ndev, int q) memset(priv->rx_ring[q], 0, rx_ring_size); /* Build RX ring buffer */ for (i = 0; i < priv->num_rx_ring[q]; i++) { - priv->rx_skb[q][i] = NULL; - skb = netdev_alloc_skb(ndev, PKT_BUF_SZ + RAVB_ALIGN - 1); - if (!skb) - break; - ravb_set_buffer_align(skb); /* RX descriptor */ rx_desc = &priv->rx_ring[q][i]; /* The size of the buffer should be on 16-byte boundary. */ rx_desc->ds_cc = cpu_to_le16(ALIGN(PKT_BUF_SZ, 16)); - dma_addr = dma_map_single(&ndev->dev, skb->data, + dma_addr = dma_map_single(&ndev->dev, priv->rx_skb[q][i]->data, ALIGN(PKT_BUF_SZ, 16), DMA_FROM_DEVICE); - if (dma_mapping_error(&ndev->dev, dma_addr)) { - dev_kfree_skb(skb); - break; - } - priv->rx_skb[q][i] = skb; + /* We just set the data size to 0 for a failed mapping which + * should prevent DMA from happening... + */ + if (dma_mapping_error(&ndev->dev, dma_addr)) + rx_desc->ds_cc = cpu_to_le16(0); rx_desc->dptr = cpu_to_le32(dma_addr); rx_desc->die_dt = DT_FEMPTY; } rx_desc = &priv->rx_ring[q][i]; rx_desc->dptr = cpu_to_le32((u32)priv->rx_desc_dma[q]); rx_desc->die_dt = DT_LINKFIX; /* type */ - priv->dirty_rx[q] = (u32)(i - priv->num_rx_ring[q]); memset(priv->tx_ring[q], 0, tx_ring_size); /* Build TX ring buffer */ for (i = 0; i < priv->num_tx_ring[q]; i++) { - priv->tx_skb[q][i] = NULL; - priv->tx_buffers[q][i] = NULL; - buffer = kmalloc(PKT_BUF_SZ + RAVB_ALIGN - 1, GFP_KERNEL); - if (!buffer) - break; - /* Aligned TX buffer */ - priv->tx_buffers[q][i] = buffer; tx_desc = &priv->tx_ring[q][i]; tx_desc->die_dt = DT_EEMPTY; } @@ -298,7 +283,10 @@ static void ravb_ring_format(struct net_device *ndev, int q) static int ravb_ring_init(struct net_device *ndev, int q) { struct ravb_private *priv = netdev_priv(ndev); + struct sk_buff *skb; int ring_size; + void *buffer; + int i; /* Allocate RX and TX skb rings */ priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q], @@ -308,12 +296,28 @@ static int ravb_ring_init(struct net_device *ndev, int q) if (!priv->rx_skb[q] || !priv->tx_skb[q]) goto error; + for (i = 0; i < priv->num_rx_ring[q]; i++) { + skb = netdev_alloc_skb(ndev, PKT_BUF_SZ + RAVB_ALIGN - 1); + if (!skb) + goto error; + ravb_set_buffer_align(skb); + priv->rx_skb[q][i] = skb; + } + /* Allocate rings for the aligned buffers */ priv->tx_buffers[q] = kcalloc(priv->num_tx_ring[q], sizeof(*priv->tx_buffers[q]), GFP_KERNEL); if (!priv->tx_buffers[q]) goto error; + for (i = 0; i < priv->num_tx_ring[q]; i++) { + buffer = kmalloc(PKT_BUF_SZ + RAVB_ALIGN - 1, GFP_KERNEL); + if (!buffer) + goto error; + /* Aligned TX buffer */ + priv->tx_buffers[q][i] = buffer; + } + /* Allocate all RX descriptors. */ ring_size = sizeof(struct ravb_ex_rx_desc) * (priv->num_rx_ring[q] + 1); priv->rx_ring[q] = dma_alloc_coherent(NULL, ring_size, @@ -524,6 +528,10 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) if (--boguscnt < 0) break; + /* We use 0-byte descriptors to mark the DMA mapping errors */ + if (!pkt_len) + continue; + if (desc_status & MSC_MC) stats->multicast++; @@ -587,10 +595,11 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) le16_to_cpu(desc->ds_cc), DMA_FROM_DEVICE); skb_checksum_none_assert(skb); - if (dma_mapping_error(&ndev->dev, dma_addr)) { - dev_kfree_skb_any(skb); - break; - } + /* We just set the data size to 0 for a failed mapping + * which should prevent DMA from happening... + */ + if (dma_mapping_error(&ndev->dev, dma_addr)) + desc->ds_cc = cpu_to_le16(0); desc->dptr = cpu_to_le32(dma_addr); priv->rx_skb[q][entry] = skb; }