From 69146e7bfc38139a134c79a4ee6607c881891786 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Fri, 26 Jun 2015 09:32:58 +0100 Subject: [PATCH 1/6] iommu/arm-smmu: Fix the index calculation of strtab The element size of cfg->strtab is just one DWORD, so we should use a multiply operation instead of a shift when calculating the level 1 index. Signed-off-by: Zhen Lei Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 8e9ec81ce4bb..606852f18808 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1064,7 +1064,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return 0; size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); - strtab = &cfg->strtab[sid >> STRTAB_SPLIT << STRTAB_L1_DESC_DWORDS]; + strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; desc->span = STRTAB_SPLIT + 1; desc->l2ptr = dma_zalloc_coherent(smmu->dev, size, &desc->l2ptr_dma, From d2e88e7c081efb2c5a9e1adb2a065d373167af4b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 30 Jun 2015 10:02:28 +0100 Subject: [PATCH 2/6] iommu/arm-smmu: Fix LOG2SIZE setting for 2-level stream tables STRTAB_BASE_CFG.LOG2SIZE should be set to log2(entries), where entries is the *total* number of entries in the stream table, not just the first level. This patch fixes the register setting, which was previously being set to the size of the l1 thanks to a multi-use "size" variable. Reported-by: Zhen Lei Tested-by: Zhen Lei Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 606852f18808..6b1ae4e09616 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2020,21 +2020,23 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) { void *strtab; u64 reg; - u32 size; + u32 size, l1size; int ret; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; /* Calculate the L1 size, capped to the SIDSIZE */ size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); size = min(size, smmu->sid_bits - STRTAB_SPLIT); - if (size + STRTAB_SPLIT < smmu->sid_bits) + cfg->num_l1_ents = 1 << size; + + size += STRTAB_SPLIT; + if (size < smmu->sid_bits) dev_warn(smmu->dev, "2-level strtab only covers %u/%u bits of SID\n", - size + STRTAB_SPLIT, smmu->sid_bits); + size, smmu->sid_bits); - cfg->num_l1_ents = 1 << size; - size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); - strtab = dma_zalloc_coherent(smmu->dev, size, &cfg->strtab_dma, + l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); + strtab = dma_zalloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, GFP_KERNEL); if (!strtab) { dev_err(smmu->dev, @@ -2055,8 +2057,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) ret = arm_smmu_init_l1_strtab(smmu); if (ret) dma_free_coherent(smmu->dev, - cfg->num_l1_ents * - (STRTAB_L1_DESC_DWORDS << 3), + l1size, strtab, cfg->strtab_dma); return ret; From 5d58c6207c300340151931ad9c2cdea2d1685dc4 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Fri, 26 Jun 2015 09:32:59 +0100 Subject: [PATCH 3/6] iommu/arm-smmu: Fix the values of ARM64_TCR_{I,O}RGN0_SHIFT The arm64 CPU architecture defines TCR[8:11] as holding the inner and outer memory attributes for TTBR0. This patch fixes the ARM SMMUv3 driver to pack these bits into the context descriptor, rather than picking up the TTBR1 attributes as it currently does. Signed-off-by: Zhen Lei Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 6b1ae4e09616..98e987a3ed3a 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -269,10 +269,10 @@ #define ARM64_TCR_TG0_SHIFT 14 #define ARM64_TCR_TG0_MASK 0x3UL #define CTXDESC_CD_0_TCR_IRGN0_SHIFT 8 -#define ARM64_TCR_IRGN0_SHIFT 24 +#define ARM64_TCR_IRGN0_SHIFT 8 #define ARM64_TCR_IRGN0_MASK 0x3UL #define CTXDESC_CD_0_TCR_ORGN0_SHIFT 10 -#define ARM64_TCR_ORGN0_SHIFT 26 +#define ARM64_TCR_ORGN0_SHIFT 10 #define ARM64_TCR_ORGN0_MASK 0x3UL #define CTXDESC_CD_0_TCR_SH0_SHIFT 12 #define ARM64_TCR_SH0_SHIFT 12 From e2f4c2330f08ba73d9a3c919a3d6ca33dce7d2c2 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 7 Jul 2015 04:30:17 +0100 Subject: [PATCH 4/6] iommu/arm-smmu: Enlarge STRTAB_L1_SZ_SHIFT to support larger sidsize Because we will choose the minimum value between STRTAB_L1_SZ_SHIFT and IDR1.SIDSIZE, so enlarge STRTAB_L1_SZ_SHIFT will not impact the platforms whose IDR1.SIDSIZE is smaller than old STRTAB_L1_SZ_SHIFT value. Signed-off-by: Zhen Lei Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 98e987a3ed3a..29cba3280af7 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -199,9 +199,10 @@ * Stream table. * * Linear: Enough to cover 1 << IDR1.SIDSIZE entries - * 2lvl: 8k L1 entries, 256 lazy entries per table (each table covers a PCI bus) + * 2lvl: 128k L1 entries, + * 256 lazy entries per table (each table covers a PCI bus) */ -#define STRTAB_L1_SZ_SHIFT 16 +#define STRTAB_L1_SZ_SHIFT 20 #define STRTAB_SPLIT 8 #define STRTAB_L1_DESC_DWORDS 1 From 5e92946c39ca6abc65e34775a93cc1d1a819c0e3 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 7 Jul 2015 04:30:18 +0100 Subject: [PATCH 5/6] iommu/arm-smmu: Skip the execution of CMD_PREFETCH_CONFIG Hisilicon SMMUv3 devices treat CMD_PREFETCH_CONFIG as a illegal command, execute it will trigger GERROR interrupt. Although the gerror code manage to turn the prefetch into a SYNC, and the system can continue to run normally, but it's ugly to print error information. Signed-off-by: Zhen Lei [will: extended binding documentation] Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu-v3.txt | 3 ++ drivers/iommu/arm-smmu-v3.c | 32 ++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt index c03eec116872..3443e0f838df 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt @@ -35,3 +35,6 @@ the PCIe specification. NOTE: this only applies to the SMMU itself, not masters connected upstream of the SMMU. + +- hisilicon,broken-prefetch-cmd + : Avoid sending CMD_PREFETCH_* commands to the SMMU. diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 29cba3280af7..da902baaa794 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -543,6 +543,9 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_HYP (1 << 12) u32 features; +#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) + u32 options; + struct arm_smmu_cmdq cmdq; struct arm_smmu_evtq evtq; struct arm_smmu_priq priq; @@ -603,11 +606,35 @@ struct arm_smmu_domain { static DEFINE_SPINLOCK(arm_smmu_devices_lock); static LIST_HEAD(arm_smmu_devices); +struct arm_smmu_option_prop { + u32 opt; + const char *prop; +}; + +static struct arm_smmu_option_prop arm_smmu_options[] = { + { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, + { 0, NULL}, +}; + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); } +static void parse_driver_options(struct arm_smmu_device *smmu) +{ + int i = 0; + + do { + if (of_property_read_bool(smmu->dev->of_node, + arm_smmu_options[i].prop)) { + smmu->options |= arm_smmu_options[i].opt; + dev_notice(smmu->dev, "option %s\n", + arm_smmu_options[i].prop); + } + } while (arm_smmu_options[++i].opt); +} + /* Low-level queue manipulation functions */ static bool queue_full(struct arm_smmu_queue *q) { @@ -1037,7 +1064,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, arm_smmu_sync_ste_for_sid(smmu, sid); /* It's likely that we'll want to use the new STE soon */ - arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); + if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) + arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); } static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent) @@ -2575,6 +2603,8 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) if (irq > 0) smmu->gerr_irq = irq; + parse_driver_options(smmu); + /* Probe the h/w */ ret = arm_smmu_device_probe(smmu); if (ret) From 46ebb7af7b93792de65e124e1ab8b89a108a41f2 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 14 Jul 2015 14:48:53 -0600 Subject: [PATCH 6/6] iommu/vt-d: Fix VM domain ID leak This continues the attempt to fix commit fb170fb4c548 ("iommu/vt-d: Introduce helper functions to make code symmetric for readability"). The previous attempt in commit 71684406905f ("iommu/vt-d: Detach domain *only* from attached iommus") overlooked the fact that dmar_domain.iommu_bmp gets cleared for VM domains when devices are detached: intel_iommu_detach_device domain_remove_one_dev_info domain_detach_iommu The domain is detached from the iommu, but the iommu is still attached to the domain, for whatever reason. Thus when we get to domain_exit(), we can't rely on iommu_bmp for VM domains to find the active iommus, we must check them all. Without that, the corresponding bit in intel_iommu.domain_ids doesn't get cleared and repeated VM domain creation and destruction will run out of domain IDs. Meanwhile we still can't call iommu_detach_domain() on arbitrary non-VM domains or we risk clearing in-use domain IDs, as 71684406905f attempted to address. It's tempting to modify iommu_detach_domain() to test the domain iommu_bmp, but the call ordering from domain_remove_one_dev_info() prevents it being able to work as fb170fb4c548 seems to have intended. Caching of unused VM domains on the iommu object seems to be the root of the problem, but this code is far too fragile for that kind of rework to be proposed for stable, so we simply revert this chunk to its state prior to fb170fb4c548. Fixes: fb170fb4c548 ("iommu/vt-d: Introduce helper functions to make code symmetric for readability") Fixes: 71684406905f ("iommu/vt-d: Detach domain *only* from attached iommus") Signed-off-by: Alex Williamson Cc: Jiang Liu Cc: stable@vger.kernel.org # v3.17+ Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a98a7b27aca1..0649b94f5958 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1830,8 +1830,9 @@ static int domain_init(struct dmar_domain *domain, int guest_width) static void domain_exit(struct dmar_domain *domain) { + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; struct page *freelist = NULL; - int i; /* Domain 0 is reserved, so dont process it */ if (!domain) @@ -1851,8 +1852,10 @@ static void domain_exit(struct dmar_domain *domain) /* clear attached or cached domains */ rcu_read_lock(); - for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) - iommu_detach_domain(domain, g_iommus[i]); + for_each_active_iommu(iommu, drhd) + if (domain_type_is_vm(domain) || + test_bit(iommu->seq_id, domain->iommu_bmp)) + iommu_detach_domain(domain, iommu); rcu_read_unlock(); dma_free_pagelist(freelist);