IOMMU Updates for Linux v6.13:

Including:
 
 	- Core Updates:
 	  - Convert call-sites using iommu_domain_alloc() to more specific
 	    versions and remove function.
 	  - Introduce iommu_paging_domain_alloc_flags().
 	  - Extend support for allocating PASID-capable domains to more
 	    drivers.
 	  - Remove iommu_present().
 	  - Some smaller improvements.
 
 	- New IOMMU driver for RISC-V.
 
 	- Intel VT-d Updates:
 	  - Add domain_alloc_paging support.
 	  - Enable user space IOPFs in non-PASID and non-svm cases.
 	  - Small code refactoring and cleanups.
 	  - Add domain replacement support for pasid.
 
 	- AMD-Vi Updates:
 	  - Adapt to iommu_paging_domain_alloc_flags() interface and alloc V2
 	    page-tables by default.
 	  - Replace custom domain ID allocator with IDA allocator.
 	  - Add ops->release_domain() support.
 	  - Other improvements to device attach and domain allocation code
 	    paths.
 
 	- ARM-SMMU Updates:
 	  - SMMUv2:
 	    - Return -EPROBE_DEFER for client devices probing before their SMMU.
 	    - Devicetree binding updates for Qualcomm MMU-500 implementations.
 	  - SMMUv3:
 	    - Minor fixes and cleanup for NVIDIA's virtual command queue driver.
 	  - IO-PGTable:
 	    - Fix indexing of concatenated PGDs and extend selftest coverage.
 	    - Remove unused block-splitting support.
 
 	- S390 IOMMU:
 	  - Implement support for blocking domain.
 
 	- Mediatek IOMMU:
 	  - Enable 35-bit physical address support for mt8186.
 
 	- OMAP IOMMU driver:
 	  - Adapt to recent IOMMU core changes and unbreak driver.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEr9jSbILcajRFYWYyK/BELZcBGuMFAmdAPOoACgkQK/BELZcB
 GuOs1w/+PoLbOYUjmJiOfpI6YNSEfF2tE4z2al/YYIBcNoAmTTRauuhv6+S0gVRy
 NTfSucw7OuLlbE9vGsdY02UL1PK58NGfUF8Z2rZSf+RRgLACc47cjZWh0vzDlNbP
 4LTdqJXmIWiYcmDtY7LmHtwTSiB900YFZwZOHmTSfNyJt8UC4tBPRh8k2YD3vuxc
 QZlxSihEf+F+vm8GtW40Ia9BiG3YhCYAcHq6Y4dKxI0JWN+7oRiPN8CF+z/vcdjV
 VpCDBcbHjvqqpXJvddQHA0SrGDBMHz1AXYhRXnfe7Ogh6SbaSWDSsdaIS27DsOzC
 L6fxW3+sNmfEOO1RmJoizkHzAtkLWCLNjBvjOb1hUCpwLcKf5nhgE3wOQSwzqumn
 KbxpoQpHFJutikDBGRsKJCsNqS8ZNWd4Z8rHhTnq2ctuYUFvurkcwX4WXOSRpsoA
 iJ+x1ezk9FxObHj/B+1nIAwKoeaLyFEwJe7Etom/E2m/2mq2oQOrq1bvfIGCms5h
 mqLYJ9L9MDanhEiOshHooy6ROPD842XmWILfq3HUi9JcrB/BvILPRsESQnNAn3Zl
 8ImbR5VijGGDy50KBE8I9abRwDTIn9c2JJVDSh3tAz1aicGnRLcIeqNeuJ4IEQZf
 IQb7qcZQge17ie/Pwr24GlwrKG7DhOg5NXvl3DiVUum2NFGjuBc=
 =V9hb
 -----END PGP SIGNATURE-----

Merge tag 'iommu-updates-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux

Pull iommu updates from Joerg Roedel:
 "Core Updates:
   - Convert call-sites using iommu_domain_alloc() to more specific
     versions and remove function
   - Introduce iommu_paging_domain_alloc_flags()
   - Extend support for allocating PASID-capable domains to more drivers
   - Remove iommu_present()
   - Some smaller improvements

  New IOMMU driver for RISC-V

  Intel VT-d Updates:
   - Add domain_alloc_paging support
   - Enable user space IOPFs in non-PASID and non-svm cases
   - Small code refactoring and cleanups
   - Add domain replacement support for pasid

  AMD-Vi Updates:
   - Adapt to iommu_paging_domain_alloc_flags() interface and alloc V2
     page-tables by default
   - Replace custom domain ID allocator with IDA allocator
   - Add ops->release_domain() support
   - Other improvements to device attach and domain allocation code
     paths

  ARM-SMMU Updates:
   - SMMUv2:
      - Return -EPROBE_DEFER for client devices probing before their
        SMMU
      - Devicetree binding updates for Qualcomm MMU-500 implementations
   - SMMUv3:
      - Minor fixes and cleanup for NVIDIA's virtual command queue
        driver
   - IO-PGTable:
      - Fix indexing of concatenated PGDs and extend selftest coverage
      - Remove unused block-splitting support

  S390 IOMMU:
   - Implement support for blocking domain

  Mediatek IOMMU:
   - Enable 35-bit physical address support for mt8186

  OMAP IOMMU driver:
   - Adapt to recent IOMMU core changes and unbreak driver"

* tag 'iommu-updates-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux: (92 commits)
  iommu/tegra241-cmdqv: Fix alignment failure at max_n_shift
  iommu: Make set_dev_pasid op support domain replacement
  iommu/arm-smmu-v3: Make set_dev_pasid() op support replace
  iommu/vt-d: Add set_dev_pasid callback for nested domain
  iommu/vt-d: Make identity_domain_set_dev_pasid() to handle domain replacement
  iommu/vt-d: Make intel_svm_set_dev_pasid() support domain replacement
  iommu/vt-d: Limit intel_iommu_set_dev_pasid() for paging domain
  iommu/vt-d: Make intel_iommu_set_dev_pasid() to handle domain replacement
  iommu/vt-d: Add iommu_domain_did() to get did
  iommu/vt-d: Consolidate the struct dev_pasid_info add/remove
  iommu/vt-d: Add pasid replace helpers
  iommu/vt-d: Refactor the pasid setup helpers
  iommu/vt-d: Add a helper to flush cache for updating present pasid entry
  iommu: Pass old domain to set_dev_pasid op
  iommu/iova: Fix typo 'adderss'
  iommu: Add a kdoc to iommu_unmap()
  iommu/io-pgtable-arm-v7s: Remove split on unmap behavior
  iommu/io-pgtable-arm: Remove split on unmap behavior
  iommu/vt-d: Drain PRQs when domain removed from RID
  iommu/vt-d: Drop pasid requirement for prq initialization
  ...
This commit is contained in:
Linus Torvalds 2024-11-22 19:55:10 -08:00
commit ceba6f6f33
51 changed files with 4636 additions and 1650 deletions

View File

@ -36,10 +36,12 @@ properties:
items:
- enum:
- qcom,qcm2290-smmu-500
- qcom,qcs615-smmu-500
- qcom,qcs8300-smmu-500
- qcom,qdu1000-smmu-500
- qcom,sa8255p-smmu-500
- qcom,sa8775p-smmu-500
- qcom,sar2130p-smmu-500
- qcom,sc7180-smmu-500
- qcom,sc7280-smmu-500
- qcom,sc8180x-smmu-500
@ -88,6 +90,7 @@ properties:
- qcom,qcm2290-smmu-500
- qcom,sa8255p-smmu-500
- qcom,sa8775p-smmu-500
- qcom,sar2130p-smmu-500
- qcom,sc7280-smmu-500
- qcom,sc8180x-smmu-500
- qcom,sc8280xp-smmu-500
@ -524,6 +527,7 @@ allOf:
compatible:
items:
- enum:
- qcom,sar2130p-smmu-500
- qcom,sm8550-smmu-500
- qcom,sm8650-smmu-500
- qcom,x1e80100-smmu-500
@ -555,6 +559,7 @@ allOf:
- cavium,smmu-v2
- marvell,ap806-smmu-500
- nvidia,smmu-500
- qcom,qcs615-smmu-500
- qcom,qcs8300-smmu-500
- qcom,qdu1000-smmu-500
- qcom,sa8255p-smmu-500

View File

@ -0,0 +1,147 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/iommu/riscv,iommu.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: RISC-V IOMMU Architecture Implementation
maintainers:
- Tomasz Jeznach <tjeznach@rivosinc.com>
description: |
The RISC-V IOMMU provides memory address translation and isolation for
input and output devices, supporting per-device translation context,
shared process address spaces including the ATS and PRI components of
the PCIe specification, two stage address translation and MSI remapping.
It supports identical translation table format to the RISC-V address
translation tables with page level access and protection attributes.
Hardware uses in-memory command and fault reporting queues with wired
interrupt or MSI notifications.
Visit https://github.com/riscv-non-isa/riscv-iommu for more details.
For information on assigning RISC-V IOMMU to its peripheral devices,
see generic IOMMU bindings.
properties:
# For PCIe IOMMU hardware compatible property should contain the vendor
# and device ID according to the PCI Bus Binding specification.
# Since PCI provides built-in identification methods, compatible is not
# actually required. For non-PCIe hardware implementations 'riscv,iommu'
# should be specified along with 'reg' property providing MMIO location.
compatible:
oneOf:
- items:
- enum:
- qemu,riscv-iommu
- const: riscv,iommu
- items:
- enum:
- pci1efd,edf1
- const: riscv,pci-iommu
reg:
maxItems: 1
description:
For non-PCI devices this represents base address and size of for the
IOMMU memory mapped registers interface.
For PCI IOMMU hardware implementation this should represent an address
of the IOMMU, as defined in the PCI Bus Binding reference.
'#iommu-cells':
const: 1
description:
The single cell describes the requester id emitted by a master to the
IOMMU.
interrupts:
minItems: 1
maxItems: 4
description:
Wired interrupt vectors available for RISC-V IOMMU to notify the
RISC-V HARTS. The cause to interrupt vector is software defined
using IVEC IOMMU register.
msi-parent: true
power-domains:
maxItems: 1
required:
- compatible
- reg
- '#iommu-cells'
additionalProperties: false
examples:
- |+
/* Example 1 (IOMMU device with wired interrupts) */
#include <dt-bindings/interrupt-controller/irq.h>
iommu1: iommu@1bccd000 {
compatible = "qemu,riscv-iommu", "riscv,iommu";
reg = <0x1bccd000 0x1000>;
interrupt-parent = <&aplic_smode>;
interrupts = <32 IRQ_TYPE_LEVEL_HIGH>,
<33 IRQ_TYPE_LEVEL_HIGH>,
<34 IRQ_TYPE_LEVEL_HIGH>,
<35 IRQ_TYPE_LEVEL_HIGH>;
#iommu-cells = <1>;
};
/* Device with two IOMMU device IDs, 0 and 7 */
master1 {
iommus = <&iommu1 0>, <&iommu1 7>;
};
- |+
/* Example 2 (IOMMU device with shared wired interrupt) */
#include <dt-bindings/interrupt-controller/irq.h>
iommu2: iommu@1bccd000 {
compatible = "qemu,riscv-iommu", "riscv,iommu";
reg = <0x1bccd000 0x1000>;
interrupt-parent = <&aplic_smode>;
interrupts = <32 IRQ_TYPE_LEVEL_HIGH>;
#iommu-cells = <1>;
};
- |+
/* Example 3 (IOMMU device with MSIs) */
iommu3: iommu@1bcdd000 {
compatible = "qemu,riscv-iommu", "riscv,iommu";
reg = <0x1bccd000 0x1000>;
msi-parent = <&imsics_smode>;
#iommu-cells = <1>;
};
- |+
/* Example 4 (IOMMU PCIe device with MSIs) */
bus {
#address-cells = <2>;
#size-cells = <2>;
pcie@30000000 {
device_type = "pci";
#address-cells = <3>;
#size-cells = <2>;
reg = <0x0 0x30000000 0x0 0x1000000>;
ranges = <0x02000000 0x0 0x41000000 0x0 0x41000000 0x0 0x0f000000>;
/*
* The IOMMU manages all functions in this PCI domain except
* itself. Omit BDF 00:01.0.
*/
iommu-map = <0x0 &iommu0 0x0 0x8>,
<0x9 &iommu0 0x9 0xfff7>;
/* The IOMMU programming interface uses slot 00:01.0 */
iommu0: iommu@1,0 {
compatible = "pci1efd,edf1", "riscv,pci-iommu";
reg = <0x800 0 0 0 0>;
#iommu-cells = <1>;
};
};
};

View File

@ -19992,6 +19992,15 @@ F: arch/riscv/
N: riscv
K: riscv
RISC-V IOMMU
M: Tomasz Jeznach <tjeznach@rivosinc.com>
L: iommu@lists.linux.dev
L: linux-riscv@lists.infradead.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
F: Documentation/devicetree/bindings/iommu/riscv,iommu.yaml
F: drivers/iommu/riscv/
RISC-V MICROCHIP FPGA SUPPORT
M: Conor Dooley <conor.dooley@microchip.com>
M: Daire McNamara <daire.mcnamara@microchip.com>

View File

@ -96,7 +96,6 @@ struct zpci_bar_struct {
u8 size; /* order 2 exponent */
};
struct s390_domain;
struct kvm_zdev;
#define ZPCI_FUNCTIONS_PER_BUS 256
@ -186,9 +185,10 @@ struct zpci_dev {
struct dentry *debugfs_dev;
/* IOMMU and passthrough */
struct s390_domain *s390_domain; /* s390 IOMMU domain data */
struct iommu_domain *s390_domain; /* attached IOMMU domain */
struct kvm_zdev *kzdev;
struct mutex kzdev_lock;
spinlock_t dom_lock; /* protect s390_domain change */
};
static inline bool zdev_enabled(struct zpci_dev *zdev)

View File

@ -161,6 +161,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
struct zpci_iommu_ctrs *ctrs;
struct zpci_fib fib = {0};
unsigned long flags;
u8 cc, status;
if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length)
@ -172,6 +173,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
WARN_ON((u64) zdev->fmb & 0xf);
/* reset software counters */
spin_lock_irqsave(&zdev->dom_lock, flags);
ctrs = zpci_get_iommu_ctrs(zdev);
if (ctrs) {
atomic64_set(&ctrs->mapped_pages, 0);
@ -180,6 +182,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
atomic64_set(&ctrs->sync_map_rpcits, 0);
atomic64_set(&ctrs->sync_rpcits, 0);
}
spin_unlock_irqrestore(&zdev->dom_lock, flags);
fib.fmb_addr = virt_to_phys(zdev->fmb);

View File

@ -71,17 +71,23 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length,
static void pci_sw_counter_show(struct seq_file *m)
{
struct zpci_iommu_ctrs *ctrs = zpci_get_iommu_ctrs(m->private);
struct zpci_dev *zdev = m->private;
struct zpci_iommu_ctrs *ctrs;
atomic64_t *counter;
unsigned long flags;
int i;
spin_lock_irqsave(&zdev->dom_lock, flags);
ctrs = zpci_get_iommu_ctrs(m->private);
if (!ctrs)
return;
goto unlock;
counter = &ctrs->mapped_pages;
for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
atomic64_read(counter));
unlock:
spin_unlock_irqrestore(&zdev->dom_lock, flags);
}
static int pci_perf_show(struct seq_file *m, void *v)

View File

@ -195,6 +195,7 @@ config MSM_IOMMU
source "drivers/iommu/amd/Kconfig"
source "drivers/iommu/intel/Kconfig"
source "drivers/iommu/iommufd/Kconfig"
source "drivers/iommu/riscv/Kconfig"
config IRQ_REMAP
bool "Support for Interrupt Remapping"

View File

@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
obj-y += amd/ intel/ arm/ iommufd/
obj-y += amd/ intel/ arm/ iommufd/ riscv/
obj-$(CONFIG_IOMMU_API) += iommu.o
obj-$(CONFIG_IOMMU_API) += iommu-traces.o
obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o

View File

@ -46,13 +46,15 @@ extern int amd_iommu_gpt_level;
extern unsigned long amd_iommu_pgsize_bitmap;
/* Protection domain ops */
void amd_iommu_init_identity_domain(void);
struct protection_domain *protection_domain_alloc(unsigned int type, int nid);
void protection_domain_free(struct protection_domain *domain);
struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct mm_struct *mm);
void amd_iommu_domain_free(struct iommu_domain *dom);
int iommu_sva_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
struct device *dev, ioasid_t pasid,
struct iommu_domain *old);
void amd_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain);
@ -118,9 +120,14 @@ static inline bool check_feature2(u64 mask)
return (amd_iommu_efr2 & mask);
}
static inline bool amd_iommu_v2_pgtbl_supported(void)
{
return (check_feature(FEATURE_GIOSUP) && check_feature(FEATURE_GT));
}
static inline bool amd_iommu_gt_ppr_supported(void)
{
return (check_feature(FEATURE_GT) &&
return (amd_iommu_v2_pgtbl_supported() &&
check_feature(FEATURE_PPR) &&
check_feature(FEATURE_EPHSUP));
}

View File

@ -565,6 +565,12 @@ struct pdom_dev_data {
struct list_head list;
};
/* Keeps track of the IOMMUs attached to protection domain */
struct pdom_iommu_info {
struct amd_iommu *iommu; /* IOMMUs attach to protection domain */
u32 refcnt; /* Count of attached dev/pasid per domain/IOMMU */
};
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
@ -578,8 +584,7 @@ struct protection_domain {
u16 id; /* the domain id written to the device table */
enum protection_domain_mode pd_mode; /* Track page table type */
bool dirty_tracking; /* dirty tracking is enabled in the domain */
unsigned dev_cnt; /* devices assigned to this domain */
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
struct xarray iommu_array; /* per-IOMMU reference count */
struct mmu_notifier mn; /* mmu notifier for the SVA domain */
struct list_head dev_data_list; /* List of pdom_dev_data */
@ -831,7 +836,7 @@ struct devid_map {
*/
struct iommu_dev_data {
/*Protect against attach/detach races */
spinlock_t lock;
struct mutex mutex;
struct list_head list; /* For domain->dev_list */
struct llist_node dev_data_list; /* For global dev_data_list */
@ -872,12 +877,6 @@ extern struct list_head amd_iommu_pci_seg_list;
*/
extern struct list_head amd_iommu_list;
/*
* Array with pointers to each IOMMU struct
* The indices are referenced in the protection domains
*/
extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
/*
* Structure defining one entry in the device table
*/
@ -912,14 +911,14 @@ struct unity_map_entry {
/* size of the dma_ops aperture as power of 2 */
extern unsigned amd_iommu_aperture_order;
/* allocation bitmap for domain ids */
extern unsigned long *amd_iommu_pd_alloc_bitmap;
extern bool amd_iommu_force_isolation;
/* Max levels of glxval supported */
extern int amd_iommu_max_glx_val;
/* IDA to track protection domain IDs */
extern struct ida pdom_ids;
/* Global EFR and EFR2 registers */
extern u64 amd_iommu_efr;
extern u64 amd_iommu_efr2;

View File

@ -177,9 +177,6 @@ LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
system */
/* Array to assign indices to IOMMUs*/
struct amd_iommu *amd_iommus[MAX_IOMMUS];
/* Number of IOMMUs present in the system */
static int amd_iommus_present;
@ -194,12 +191,6 @@ bool amd_iommu_force_isolation __read_mostly;
unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES;
/*
* AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
* to know which ones are already in use.
*/
unsigned long *amd_iommu_pd_alloc_bitmap;
enum iommu_init_state {
IOMMU_START_STATE,
IOMMU_IVRS_DETECTED,
@ -1082,7 +1073,12 @@ static bool __copy_device_table(struct amd_iommu *iommu)
if (dte_v && dom_id) {
pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
/* Reserve the Domain IDs used by previous kernel */
if (ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC) != dom_id) {
pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
memunmap(old_devtb);
return false;
}
/* If gcr3 table existed, mask it out */
if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
@ -1744,9 +1740,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
return -ENOSYS;
}
/* Index is fine - add IOMMU to the array */
amd_iommus[iommu->index] = iommu;
/*
* Copy data from ACPI table entry to the iommu struct
*/
@ -2070,14 +2063,6 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
init_iommu_perf_ctr(iommu);
if (amd_iommu_pgtable == AMD_IOMMU_V2) {
if (!check_feature(FEATURE_GIOSUP) ||
!check_feature(FEATURE_GT)) {
pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
amd_iommu_pgtable = AMD_IOMMU_V1;
}
}
if (is_rd890_iommu(iommu->dev)) {
int i, j;
@ -2172,6 +2157,9 @@ static int __init amd_iommu_init_pci(void)
struct amd_iommu_pci_seg *pci_seg;
int ret;
/* Init global identity domain before registering IOMMU */
amd_iommu_init_identity_domain();
for_each_iommu(iommu) {
ret = iommu_init_pci(iommu);
if (ret) {
@ -2882,11 +2870,6 @@ static void enable_iommus_vapic(void)
#endif
}
static void enable_iommus(void)
{
early_enable_iommus();
}
static void disable_iommus(void)
{
struct amd_iommu *iommu;
@ -2913,7 +2896,8 @@ static void amd_iommu_resume(void)
iommu_apply_resume_quirks(iommu);
/* re-load the hardware */
enable_iommus();
for_each_iommu(iommu)
early_enable_iommu(iommu);
amd_iommu_enable_interrupts();
}
@ -2994,9 +2978,7 @@ static bool __init check_ioapic_information(void)
static void __init free_dma_resources(void)
{
iommu_free_pages(amd_iommu_pd_alloc_bitmap,
get_order(MAX_DOMAIN_ID / 8));
amd_iommu_pd_alloc_bitmap = NULL;
ida_destroy(&pdom_ids);
free_unity_maps();
}
@ -3064,20 +3046,6 @@ static int __init early_amd_iommu_init(void)
amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
/* Device table - directly used by all IOMMUs */
ret = -ENOMEM;
amd_iommu_pd_alloc_bitmap = iommu_alloc_pages(GFP_KERNEL,
get_order(MAX_DOMAIN_ID / 8));
if (amd_iommu_pd_alloc_bitmap == NULL)
goto out;
/*
* never allocate domain 0 because its used as the non-allocated and
* error value placeholder
*/
__set_bit(0, amd_iommu_pd_alloc_bitmap);
/*
* now the data structures are allocated and basically initialized
* start the real acpi table scan
@ -3091,6 +3059,13 @@ static int __init early_amd_iommu_init(void)
FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL)
amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
if (amd_iommu_pgtable == AMD_IOMMU_V2) {
if (!amd_iommu_v2_pgtbl_supported()) {
pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
amd_iommu_pgtable = AMD_IOMMU_V1;
}
}
/* Disable any previously enabled IOMMUs */
if (!is_kdump_kernel() || amd_iommu_disabled)
disable_iommus();

View File

@ -118,6 +118,7 @@ static void free_sub_pt(u64 *root, int mode, struct list_head *freelist)
*/
static bool increase_address_space(struct amd_io_pgtable *pgtable,
unsigned long address,
unsigned int page_size_level,
gfp_t gfp)
{
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
@ -133,7 +134,8 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable,
spin_lock_irqsave(&domain->lock, flags);
if (address <= PM_LEVEL_SIZE(pgtable->mode))
if (address <= PM_LEVEL_SIZE(pgtable->mode) &&
pgtable->mode - 1 >= page_size_level)
goto out;
ret = false;
@ -163,18 +165,21 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
gfp_t gfp,
bool *updated)
{
unsigned long last_addr = address + (page_size - 1);
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
int level, end_lvl;
u64 *pte, *page;
BUG_ON(!is_power_of_2(page_size));
while (address > PM_LEVEL_SIZE(pgtable->mode)) {
while (last_addr > PM_LEVEL_SIZE(pgtable->mode) ||
pgtable->mode - 1 < PAGE_SIZE_LEVEL(page_size)) {
/*
* Return an error if there is no memory to update the
* page-table.
*/
if (!increase_address_space(pgtable, address, gfp))
if (!increase_address_space(pgtable, last_addr,
PAGE_SIZE_LEVEL(page_size), gfp))
return NULL;
}

View File

@ -268,8 +268,11 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
out:
if (updated) {
struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
unsigned long flags;
spin_lock_irqsave(&pdom->lock, flags);
amd_iommu_domain_flush_pages(pdom, o_iova, size);
spin_unlock_irqrestore(&pdom->lock, flags);
}
if (mapped)

View File

@ -18,6 +18,7 @@
#include <linux/scatterlist.h>
#include <linux/dma-map-ops.h>
#include <linux/dma-direct.h>
#include <linux/idr.h>
#include <linux/iommu-helper.h>
#include <linux/delay.h>
#include <linux/amd-iommu.h>
@ -52,8 +53,6 @@
#define HT_RANGE_START (0xfd00000000ULL)
#define HT_RANGE_END (0xffffffffffULL)
static DEFINE_SPINLOCK(pd_bitmap_lock);
LIST_HEAD(ioapic_map);
LIST_HEAD(hpet_map);
LIST_HEAD(acpihid_map);
@ -70,9 +69,16 @@ struct iommu_cmd {
u32 data[4];
};
/*
* AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
* to know which ones are already in use.
*/
DEFINE_IDA(pdom_ids);
struct kmem_cache *amd_iommu_irq_cache;
static void detach_device(struct device *dev);
static int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev);
static void set_dte_entry(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data);
@ -202,7 +208,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid)
if (!dev_data)
return NULL;
spin_lock_init(&dev_data->lock);
mutex_init(&dev_data->mutex);
dev_data->devid = devid;
ratelimit_default_init(&dev_data->rs);
@ -555,22 +561,6 @@ static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev)
setup_aliases(iommu, dev);
}
static void amd_iommu_uninit_device(struct device *dev)
{
struct iommu_dev_data *dev_data;
dev_data = dev_iommu_priv_get(dev);
if (!dev_data)
return;
if (dev_data->domain)
detach_device(dev);
/*
* We keep dev_data around for unplugged devices and reuse it when the
* device is re-plugged - not doing so would introduce a ton of races.
*/
}
/****************************************************************************
*
@ -1230,7 +1220,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
if (!iommu->need_sync)
return 0;
data = atomic64_add_return(1, &iommu->cmd_sem_val);
data = atomic64_inc_return(&iommu->cmd_sem_val);
build_completion_wait(&cmd, iommu, data);
raw_spin_lock_irqsave(&iommu->lock, flags);
@ -1249,18 +1239,17 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
static void domain_flush_complete(struct protection_domain *domain)
{
int i;
struct pdom_iommu_info *pdom_iommu_info;
unsigned long i;
for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
if (domain && !domain->dev_iommu[i])
continue;
lockdep_assert_held(&domain->lock);
/*
* Devices of this domain are behind this IOMMU
* We need to wait for completion of all commands.
*/
iommu_completion_wait(amd_iommus[i]);
}
/*
* Devices of this domain are behind this IOMMU
* We need to wait for completion of all commands.
*/
xa_for_each(&domain->iommu_array, i, pdom_iommu_info)
iommu_completion_wait(pdom_iommu_info->iommu);
}
static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
@ -1442,21 +1431,22 @@ static int domain_flush_pages_v2(struct protection_domain *pdom,
static int domain_flush_pages_v1(struct protection_domain *pdom,
u64 address, size_t size)
{
struct pdom_iommu_info *pdom_iommu_info;
struct iommu_cmd cmd;
int ret = 0, i;
int ret = 0;
unsigned long i;
lockdep_assert_held(&pdom->lock);
build_inv_iommu_pages(&cmd, address, size,
pdom->id, IOMMU_NO_PASID, false);
for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
if (!pdom->dev_iommu[i])
continue;
xa_for_each(&pdom->iommu_array, i, pdom_iommu_info) {
/*
* Devices of this domain are behind this IOMMU
* We need a TLB flush
*/
ret |= iommu_queue_command(amd_iommus[i], &cmd);
ret |= iommu_queue_command(pdom_iommu_info->iommu, &cmd);
}
return ret;
@ -1495,6 +1485,8 @@ static void __domain_flush_pages(struct protection_domain *domain,
void amd_iommu_domain_flush_pages(struct protection_domain *domain,
u64 address, size_t size)
{
lockdep_assert_held(&domain->lock);
if (likely(!amd_iommu_np_cache)) {
__domain_flush_pages(domain, address, size);
@ -1640,31 +1632,14 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag)
*
****************************************************************************/
static u16 domain_id_alloc(void)
static int pdom_id_alloc(void)
{
unsigned long flags;
int id;
spin_lock_irqsave(&pd_bitmap_lock, flags);
id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
BUG_ON(id == 0);
if (id > 0 && id < MAX_DOMAIN_ID)
__set_bit(id, amd_iommu_pd_alloc_bitmap);
else
id = 0;
spin_unlock_irqrestore(&pd_bitmap_lock, flags);
return id;
return ida_alloc_range(&pdom_ids, 1, MAX_DOMAIN_ID - 1, GFP_ATOMIC);
}
static void domain_id_free(int id)
static void pdom_id_free(int id)
{
unsigned long flags;
spin_lock_irqsave(&pd_bitmap_lock, flags);
if (id > 0 && id < MAX_DOMAIN_ID)
__clear_bit(id, amd_iommu_pd_alloc_bitmap);
spin_unlock_irqrestore(&pd_bitmap_lock, flags);
ida_free(&pdom_ids, id);
}
static void free_gcr3_tbl_level1(u64 *tbl)
@ -1709,7 +1684,7 @@ static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info)
gcr3_info->glx = 0;
/* Free per device domain ID */
domain_id_free(gcr3_info->domid);
pdom_id_free(gcr3_info->domid);
iommu_free_page(gcr3_info->gcr3_tbl);
gcr3_info->gcr3_tbl = NULL;
@ -1736,6 +1711,7 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info,
{
int levels = get_gcr3_levels(pasids);
int nid = iommu ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
int domid;
if (levels > amd_iommu_max_glx_val)
return -EINVAL;
@ -1744,11 +1720,14 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info,
return -EBUSY;
/* Allocate per device domain ID */
gcr3_info->domid = domain_id_alloc();
domid = pdom_id_alloc();
if (domid <= 0)
return -ENOSPC;
gcr3_info->domid = domid;
gcr3_info->gcr3_tbl = iommu_alloc_page_node(nid, GFP_ATOMIC);
if (gcr3_info->gcr3_tbl == NULL) {
domain_id_free(gcr3_info->domid);
pdom_id_free(domid);
return -ENOMEM;
}
@ -2019,57 +1998,69 @@ static void destroy_gcr3_table(struct iommu_dev_data *dev_data,
free_gcr3_table(gcr3_info);
}
static int do_attach(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
static int pdom_attach_iommu(struct amd_iommu *iommu,
struct protection_domain *pdom)
{
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg;
struct pdom_iommu_info *pdom_iommu_info, *curr;
struct io_pgtable_cfg *cfg = &pdom->iop.pgtbl.cfg;
unsigned long flags;
int ret = 0;
/* Update data structures */
dev_data->domain = domain;
list_add(&dev_data->list, &domain->dev_list);
spin_lock_irqsave(&pdom->lock, flags);
pdom_iommu_info = xa_load(&pdom->iommu_array, iommu->index);
if (pdom_iommu_info) {
pdom_iommu_info->refcnt++;
goto out_unlock;
}
pdom_iommu_info = kzalloc(sizeof(*pdom_iommu_info), GFP_ATOMIC);
if (!pdom_iommu_info) {
ret = -ENOMEM;
goto out_unlock;
}
pdom_iommu_info->iommu = iommu;
pdom_iommu_info->refcnt = 1;
curr = xa_cmpxchg(&pdom->iommu_array, iommu->index,
NULL, pdom_iommu_info, GFP_ATOMIC);
if (curr) {
kfree(pdom_iommu_info);
ret = -ENOSPC;
goto out_unlock;
}
/* Update NUMA Node ID */
if (cfg->amd.nid == NUMA_NO_NODE)
cfg->amd.nid = dev_to_node(dev_data->dev);
/* Do reference counting */
domain->dev_iommu[iommu->index] += 1;
domain->dev_cnt += 1;
/* Setup GCR3 table */
if (pdom_is_sva_capable(domain)) {
ret = init_gcr3_table(dev_data, domain);
if (ret)
return ret;
}
cfg->amd.nid = dev_to_node(&iommu->dev->dev);
out_unlock:
spin_unlock_irqrestore(&pdom->lock, flags);
return ret;
}
static void do_detach(struct iommu_dev_data *dev_data)
static void pdom_detach_iommu(struct amd_iommu *iommu,
struct protection_domain *pdom)
{
struct protection_domain *domain = dev_data->domain;
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
struct pdom_iommu_info *pdom_iommu_info;
unsigned long flags;
/* Clear DTE and flush the entry */
dev_update_dte(dev_data, false);
spin_lock_irqsave(&pdom->lock, flags);
/* Flush IOTLB and wait for the flushes to finish */
amd_iommu_domain_flush_all(domain);
pdom_iommu_info = xa_load(&pdom->iommu_array, iommu->index);
if (!pdom_iommu_info) {
spin_unlock_irqrestore(&pdom->lock, flags);
return;
}
/* Clear GCR3 table */
if (pdom_is_sva_capable(domain))
destroy_gcr3_table(dev_data, domain);
pdom_iommu_info->refcnt--;
if (pdom_iommu_info->refcnt == 0) {
xa_erase(&pdom->iommu_array, iommu->index);
kfree(pdom_iommu_info);
}
/* Update data structures */
dev_data->domain = NULL;
list_del(&dev_data->list);
/* decrease reference counters - needs to happen after the flushes */
domain->dev_iommu[iommu->index] -= 1;
domain->dev_cnt -= 1;
spin_unlock_irqrestore(&pdom->lock, flags);
}
/*
@ -2079,27 +2070,56 @@ static void do_detach(struct iommu_dev_data *dev_data)
static int attach_device(struct device *dev,
struct protection_domain *domain)
{
struct iommu_dev_data *dev_data;
unsigned long flags;
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
struct pci_dev *pdev;
int ret = 0;
spin_lock_irqsave(&domain->lock, flags);
dev_data = dev_iommu_priv_get(dev);
spin_lock(&dev_data->lock);
mutex_lock(&dev_data->mutex);
if (dev_data->domain != NULL) {
ret = -EBUSY;
goto out;
}
ret = do_attach(dev_data, domain);
/* Do reference counting */
ret = pdom_attach_iommu(iommu, domain);
if (ret)
goto out;
/* Setup GCR3 table */
if (pdom_is_sva_capable(domain)) {
ret = init_gcr3_table(dev_data, domain);
if (ret) {
pdom_detach_iommu(iommu, domain);
goto out;
}
}
pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL;
if (pdev && pdom_is_sva_capable(domain)) {
pdev_enable_caps(pdev);
/*
* Device can continue to function even if IOPF
* enablement failed. Hence in error path just
* disable device PRI support.
*/
if (amd_iommu_iopf_add_device(iommu, dev_data))
pdev_disable_cap_pri(pdev);
} else if (pdev) {
pdev_enable_cap_ats(pdev);
}
/* Update data structures */
dev_data->domain = domain;
list_add(&dev_data->list, &domain->dev_list);
/* Update device table */
dev_update_dte(dev_data, true);
out:
spin_unlock(&dev_data->lock);
spin_unlock_irqrestore(&domain->lock, flags);
mutex_unlock(&dev_data->mutex);
return ret;
}
@ -2110,14 +2130,11 @@ static int attach_device(struct device *dev,
static void detach_device(struct device *dev)
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
struct protection_domain *domain = dev_data->domain;
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
struct protection_domain *domain = dev_data->domain;
unsigned long flags;
bool ppr = dev_data->ppr;
spin_lock_irqsave(&domain->lock, flags);
spin_lock(&dev_data->lock);
mutex_lock(&dev_data->mutex);
/*
* First check if the device is still attached. It might already
@ -2128,27 +2145,36 @@ static void detach_device(struct device *dev)
if (WARN_ON(!dev_data->domain))
goto out;
if (ppr) {
iopf_queue_flush_dev(dev);
/* Updated here so that it gets reflected in DTE */
dev_data->ppr = false;
}
do_detach(dev_data);
out:
spin_unlock(&dev_data->lock);
spin_unlock_irqrestore(&domain->lock, flags);
/* Remove IOPF handler */
if (ppr)
if (dev_data->ppr) {
iopf_queue_flush_dev(dev);
amd_iommu_iopf_remove_device(iommu, dev_data);
}
if (dev_is_pci(dev))
pdev_disable_caps(to_pci_dev(dev));
/* Clear DTE and flush the entry */
dev_update_dte(dev_data, false);
/* Flush IOTLB and wait for the flushes to finish */
spin_lock_irqsave(&domain->lock, flags);
amd_iommu_domain_flush_all(domain);
spin_unlock_irqrestore(&domain->lock, flags);
/* Clear GCR3 table */
if (pdom_is_sva_capable(domain))
destroy_gcr3_table(dev_data, domain);
/* Update data structures */
dev_data->domain = NULL;
list_del(&dev_data->list);
/* decrease reference counters - needs to happen after the flushes */
pdom_detach_iommu(iommu, domain);
out:
mutex_unlock(&dev_data->mutex);
}
static struct iommu_device *amd_iommu_probe_device(struct device *dev)
@ -2205,17 +2231,14 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
static void amd_iommu_release_device(struct device *dev)
{
struct amd_iommu *iommu;
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
if (!check_device(dev))
return;
WARN_ON(dev_data->domain);
iommu = rlookup_amd_iommu(dev);
if (!iommu)
return;
amd_iommu_uninit_device(dev);
iommu_completion_wait(iommu);
/*
* We keep dev_data around for unplugged devices and reuse it when the
* device is re-plugged - not doing so would introduce a ton of races.
*/
}
static struct iommu_group *amd_iommu_device_group(struct device *dev)
@ -2236,70 +2259,53 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
*
*****************************************************************************/
static void cleanup_domain(struct protection_domain *domain)
{
struct iommu_dev_data *entry;
lockdep_assert_held(&domain->lock);
if (!domain->dev_cnt)
return;
while (!list_empty(&domain->dev_list)) {
entry = list_first_entry(&domain->dev_list,
struct iommu_dev_data, list);
BUG_ON(!entry->domain);
do_detach(entry);
}
WARN_ON(domain->dev_cnt != 0);
}
void protection_domain_free(struct protection_domain *domain)
{
WARN_ON(!list_empty(&domain->dev_list));
if (domain->domain.type & __IOMMU_DOMAIN_PAGING)
free_io_pgtable_ops(&domain->iop.pgtbl.ops);
domain_id_free(domain->id);
pdom_id_free(domain->id);
kfree(domain);
}
static void protection_domain_init(struct protection_domain *domain, int nid)
{
spin_lock_init(&domain->lock);
INIT_LIST_HEAD(&domain->dev_list);
INIT_LIST_HEAD(&domain->dev_data_list);
xa_init(&domain->iommu_array);
domain->iop.pgtbl.cfg.amd.nid = nid;
}
struct protection_domain *protection_domain_alloc(unsigned int type, int nid)
{
struct io_pgtable_ops *pgtbl_ops;
struct protection_domain *domain;
int pgtable;
int domid;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
domain->id = domain_id_alloc();
if (!domain->id)
goto err_free;
spin_lock_init(&domain->lock);
INIT_LIST_HEAD(&domain->dev_list);
INIT_LIST_HEAD(&domain->dev_data_list);
domain->iop.pgtbl.cfg.amd.nid = nid;
switch (type) {
/* No need to allocate io pgtable ops in passthrough mode */
case IOMMU_DOMAIN_IDENTITY:
case IOMMU_DOMAIN_SVA:
return domain;
case IOMMU_DOMAIN_DMA:
pgtable = amd_iommu_pgtable;
break;
/*
* Force IOMMU v1 page table when allocating
* domain for pass-through devices.
*/
case IOMMU_DOMAIN_UNMANAGED:
pgtable = AMD_IOMMU_V1;
break;
default:
goto err_id;
domid = pdom_id_alloc();
if (domid <= 0) {
kfree(domain);
return NULL;
}
domain->id = domid;
protection_domain_init(domain, nid);
return domain;
}
static int pdom_setup_pgtable(struct protection_domain *domain,
unsigned int type, int pgtable)
{
struct io_pgtable_ops *pgtbl_ops;
/* No need to allocate io pgtable ops in passthrough mode */
if (!(type & __IOMMU_DOMAIN_PAGING))
return 0;
switch (pgtable) {
case AMD_IOMMU_V1:
@ -2309,25 +2315,20 @@ struct protection_domain *protection_domain_alloc(unsigned int type, int nid)
domain->pd_mode = PD_MODE_V2;
break;
default:
goto err_id;
return -EINVAL;
}
pgtbl_ops =
alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl.cfg, domain);
if (!pgtbl_ops)
goto err_id;
return -ENOMEM;
return domain;
err_id:
domain_id_free(domain->id);
err_free:
kfree(domain);
return NULL;
return 0;
}
static inline u64 dma_max_address(void)
static inline u64 dma_max_address(int pgtable)
{
if (amd_iommu_pgtable == AMD_IOMMU_V1)
if (pgtable == AMD_IOMMU_V1)
return ~0ULL;
/* V2 with 4/5 level page table */
@ -2340,11 +2341,13 @@ static bool amd_iommu_hd_support(struct amd_iommu *iommu)
}
static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
struct device *dev, u32 flags)
struct device *dev,
u32 flags, int pgtable)
{
bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
struct protection_domain *domain;
struct amd_iommu *iommu = NULL;
int ret;
if (dev)
iommu = get_amd_iommu_from_dev(dev);
@ -2356,16 +2359,20 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY))
return ERR_PTR(-EINVAL);
if (dirty_tracking && !amd_iommu_hd_support(iommu))
return ERR_PTR(-EOPNOTSUPP);
domain = protection_domain_alloc(type,
dev ? dev_to_node(dev) : NUMA_NO_NODE);
if (!domain)
return ERR_PTR(-ENOMEM);
ret = pdom_setup_pgtable(domain, type, pgtable);
if (ret) {
pdom_id_free(domain->id);
kfree(domain);
return ERR_PTR(ret);
}
domain->domain.geometry.aperture_start = 0;
domain->domain.geometry.aperture_end = dma_max_address();
domain->domain.geometry.aperture_end = dma_max_address(pgtable);
domain->domain.geometry.force_aperture = true;
domain->domain.pgsize_bitmap = domain->iop.pgtbl.cfg.pgsize_bitmap;
@ -2383,8 +2390,16 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
static struct iommu_domain *amd_iommu_domain_alloc(unsigned int type)
{
struct iommu_domain *domain;
int pgtable = amd_iommu_pgtable;
domain = do_iommu_domain_alloc(type, NULL, 0);
/*
* Force IOMMU v1 page table when allocating
* domain for pass-through devices.
*/
if (type == IOMMU_DOMAIN_UNMANAGED)
pgtable = AMD_IOMMU_V1;
domain = do_iommu_domain_alloc(type, NULL, 0, pgtable);
if (IS_ERR(domain))
return NULL;
@ -2398,25 +2413,41 @@ amd_iommu_domain_alloc_user(struct device *dev, u32 flags,
{
unsigned int type = IOMMU_DOMAIN_UNMANAGED;
struct amd_iommu *iommu = NULL;
const u32 supported_flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
IOMMU_HWPT_ALLOC_PASID;
if ((flags & ~IOMMU_HWPT_ALLOC_DIRTY_TRACKING) || parent || user_data)
if (dev)
iommu = get_amd_iommu_from_dev(dev);
if ((flags & ~supported_flags) || parent || user_data)
return ERR_PTR(-EOPNOTSUPP);
return do_iommu_domain_alloc(type, dev, flags);
/* Allocate domain with v2 page table if IOMMU supports PASID. */
if (flags & IOMMU_HWPT_ALLOC_PASID) {
if (!amd_iommu_pasid_supported())
return ERR_PTR(-EOPNOTSUPP);
return do_iommu_domain_alloc(type, dev, flags, AMD_IOMMU_V2);
}
/* Allocate domain with v1 page table for dirty tracking */
if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) {
if (iommu && amd_iommu_hd_support(iommu)) {
return do_iommu_domain_alloc(type, dev,
flags, AMD_IOMMU_V1);
}
return ERR_PTR(-EOPNOTSUPP);
}
/* If nothing specific is required use the kernel commandline default */
return do_iommu_domain_alloc(type, dev, 0, amd_iommu_pgtable);
}
void amd_iommu_domain_free(struct iommu_domain *dom)
{
struct protection_domain *domain;
unsigned long flags;
domain = to_pdomain(dom);
spin_lock_irqsave(&domain->lock, flags);
cleanup_domain(domain);
spin_unlock_irqrestore(&domain->lock, flags);
struct protection_domain *domain = to_pdomain(dom);
protection_domain_free(domain);
}
@ -2430,9 +2461,9 @@ static int blocked_domain_attach_device(struct iommu_domain *domain,
detach_device(dev);
/* Clear DTE and flush the entry */
spin_lock(&dev_data->lock);
mutex_lock(&dev_data->mutex);
dev_update_dte(dev_data, false);
spin_unlock(&dev_data->lock);
mutex_unlock(&dev_data->mutex);
return 0;
}
@ -2444,13 +2475,39 @@ static struct iommu_domain blocked_domain = {
}
};
static struct protection_domain identity_domain;
static const struct iommu_domain_ops identity_domain_ops = {
.attach_dev = amd_iommu_attach_device,
};
void amd_iommu_init_identity_domain(void)
{
struct iommu_domain *domain = &identity_domain.domain;
domain->type = IOMMU_DOMAIN_IDENTITY;
domain->ops = &identity_domain_ops;
domain->owner = &amd_iommu_ops;
identity_domain.id = pdom_id_alloc();
protection_domain_init(&identity_domain, NUMA_NO_NODE);
}
/* Same as blocked domain except it supports only ops->attach_dev() */
static struct iommu_domain release_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocked_domain_attach_device,
}
};
static int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev)
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
struct protection_domain *domain = to_pdomain(dom);
struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
struct pci_dev *pdev;
int ret;
/*
@ -2483,24 +2540,6 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
}
#endif
pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL;
if (pdev && pdom_is_sva_capable(domain)) {
pdev_enable_caps(pdev);
/*
* Device can continue to function even if IOPF
* enablement failed. Hence in error path just
* disable device PRI support.
*/
if (amd_iommu_iopf_add_device(iommu, dev_data))
pdev_disable_cap_pri(pdev);
} else if (pdev) {
pdev_enable_cap_ats(pdev);
}
/* Update device table */
dev_update_dte(dev_data, true);
return ret;
}
@ -2842,6 +2881,8 @@ static int amd_iommu_dev_disable_feature(struct device *dev,
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.blocked_domain = &blocked_domain,
.release_domain = &release_domain,
.identity_domain = &identity_domain.domain,
.domain_alloc = amd_iommu_domain_alloc,
.domain_alloc_user = amd_iommu_domain_alloc_user,
.domain_alloc_sva = amd_iommu_domain_alloc_sva,
@ -2890,7 +2931,7 @@ static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
return;
build_inv_irt(&cmd, devid);
data = atomic64_add_return(1, &iommu->cmd_sem_val);
data = atomic64_inc_return(&iommu->cmd_sem_val);
build_completion_wait(&cmd2, iommu, data);
raw_spin_lock_irqsave(&iommu->lock, flags);

View File

@ -100,7 +100,8 @@ static const struct mmu_notifier_ops sva_mn = {
};
int iommu_sva_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
struct device *dev, ioasid_t pasid,
struct iommu_domain *old)
{
struct pdom_dev_data *pdom_dev_data;
struct protection_domain *sva_pdom = to_pdomain(domain);
@ -108,6 +109,9 @@ int iommu_sva_set_dev_pasid(struct iommu_domain *domain,
unsigned long flags;
int ret = -EINVAL;
if (old)
return -EOPNOTSUPP;
/* PASID zero is used for requests from the I/O device without PASID */
if (!is_pasid_valid(dev_data, pasid))
return ret;

View File

@ -332,7 +332,8 @@ void arm_smmu_sva_notifier_synchronize(void)
}
static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t id)
struct device *dev, ioasid_t id,
struct iommu_domain *old)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
@ -348,7 +349,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
* get reassigned
*/
arm_smmu_make_sva_cd(&target, master, domain->mm, smmu_domain->cd.asid);
ret = arm_smmu_set_pasid(master, smmu_domain, id, &target);
ret = arm_smmu_set_pasid(master, smmu_domain, id, &target, old);
mmput(domain->mm);
return ret;

View File

@ -2911,7 +2911,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
}
static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t id)
struct device *dev, ioasid_t id,
struct iommu_domain *old)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
@ -2937,7 +2938,7 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
*/
arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
return arm_smmu_set_pasid(master, to_smmu_domain(domain), id,
&target_cd);
&target_cd, old);
}
static void arm_smmu_update_ste(struct arm_smmu_master *master,
@ -2967,16 +2968,13 @@ static void arm_smmu_update_ste(struct arm_smmu_master *master,
int arm_smmu_set_pasid(struct arm_smmu_master *master,
struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
struct arm_smmu_cd *cd)
struct arm_smmu_cd *cd, struct iommu_domain *old)
{
struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);
struct arm_smmu_attach_state state = {
.master = master,
/*
* For now the core code prevents calling this when a domain is
* already attached, no need to set old_domain.
*/
.ssid = pasid,
.old_domain = old,
};
struct arm_smmu_cd *cdptr;
int ret;
@ -3140,6 +3138,7 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
IOMMU_HWPT_ALLOC_PASID |
IOMMU_HWPT_ALLOC_NEST_PARENT;
struct arm_smmu_domain *smmu_domain;
int ret;
@ -3149,6 +3148,9 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
if (parent || user_data)
return ERR_PTR(-EOPNOTSUPP);
if (flags & IOMMU_HWPT_ALLOC_PASID)
return arm_smmu_domain_alloc_paging(dev);
smmu_domain = arm_smmu_domain_alloc();
if (IS_ERR(smmu_domain))
return ERR_CAST(smmu_domain);

View File

@ -911,7 +911,7 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
int arm_smmu_set_pasid(struct arm_smmu_master *master,
struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
struct arm_smmu_cd *cd);
struct arm_smmu_cd *cd, struct iommu_domain *old);
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,

View File

@ -509,7 +509,8 @@ static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
snprintf(name, 16, "vcmdq%u", vcmdq->idx);
q->llq.max_n_shift = VCMDQ_LOG2SIZE_MAX;
/* Queue size, capped to ensure natural alignment */
q->llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, VCMDQ_LOG2SIZE_MAX);
/* Use the common helper to init the VCMDQ, and then... */
ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0,
@ -800,7 +801,9 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
return 0;
}
struct dentry *cmdqv_debugfs_dir;
#ifdef CONFIG_IOMMU_DEBUGFS
static struct dentry *cmdqv_debugfs_dir;
#endif
static struct arm_smmu_device *
__tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res,

View File

@ -1437,6 +1437,17 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
goto out_free;
} else {
smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
/*
* Defer probe if the relevant SMMU instance hasn't finished
* probing yet. This is a fragile hack and we'd ideally
* avoid this race in the core code. Until that's ironed
* out, however, this is the most pragmatic option on the
* table.
*/
if (!smmu)
return ERR_PTR(dev_err_probe(dev, -EPROBE_DEFER,
"smmu dev has not bound yet\n"));
}
ret = -EINVAL;

View File

@ -14,6 +14,7 @@ config INTEL_IOMMU
depends on PCI_MSI && ACPI && X86
select IOMMU_API
select IOMMU_IOVA
select IOMMU_IOPF
select IOMMUFD_DRIVER if IOMMUFD
select NEED_DMA_MAP_STATE
select DMAR_TABLE
@ -50,7 +51,6 @@ config INTEL_IOMMU_SVM
depends on X86_64
select MMU_NOTIFIER
select IOMMU_SVA
select IOMMU_IOPF
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o
obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o prq.o
obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o

View File

@ -1060,7 +1060,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
err = iommu->seq_id;
goto error;
}
sprintf(iommu->name, "dmar%d", iommu->seq_id);
snprintf(iommu->name, sizeof(iommu->name), "dmar%d", iommu->seq_id);
err = map_iommu(iommu, drhd);
if (err) {
@ -1895,19 +1895,6 @@ void dmar_msi_write(int irq, struct msi_msg *msg)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
void dmar_msi_read(int irq, struct msi_msg *msg)
{
struct intel_iommu *iommu = irq_get_handler_data(irq);
int reg = dmar_msi_reg(iommu, irq);
unsigned long flag;
raw_spin_lock_irqsave(&iommu->register_lock, flag);
msg->data = readl(iommu->reg + reg + 4);
msg->address_lo = readl(iommu->reg + reg + 8);
msg->address_hi = readl(iommu->reg + reg + 12);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
u8 fault_reason, u32 pasid, u16 source_id,
unsigned long long addr)

View File

@ -352,89 +352,6 @@ static bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
}
static void domain_update_iommu_coherency(struct dmar_domain *domain)
{
struct iommu_domain_info *info;
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
bool found = false;
unsigned long i;
domain->iommu_coherency = true;
xa_for_each(&domain->iommu_array, i, info) {
found = true;
if (!iommu_paging_structure_coherency(info->iommu)) {
domain->iommu_coherency = false;
break;
}
}
if (found)
return;
/* No hardware attached; use lowest common denominator */
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
if (!iommu_paging_structure_coherency(iommu)) {
domain->iommu_coherency = false;
break;
}
}
rcu_read_unlock();
}
static int domain_update_iommu_superpage(struct dmar_domain *domain,
struct intel_iommu *skip)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
int mask = 0x3;
if (!intel_iommu_superpage)
return 0;
/* set iommu_superpage to the smallest common denominator */
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
if (iommu != skip) {
if (domain && domain->use_first_level) {
if (!cap_fl1gp_support(iommu->cap))
mask = 0x1;
} else {
mask &= cap_super_page_val(iommu->cap);
}
if (!mask)
break;
}
}
rcu_read_unlock();
return fls(mask);
}
static int domain_update_device_node(struct dmar_domain *domain)
{
struct device_domain_info *info;
int nid = NUMA_NO_NODE;
unsigned long flags;
spin_lock_irqsave(&domain->lock, flags);
list_for_each_entry(info, &domain->devices, link) {
/*
* There could possibly be multiple device numa nodes as devices
* within the same domain may sit behind different IOMMUs. There
* isn't perfect answer in such situation, so we select first
* come first served policy.
*/
nid = dev_to_node(info->dev);
if (nid != NUMA_NO_NODE)
break;
}
spin_unlock_irqrestore(&domain->lock, flags);
return nid;
}
/* Return the super pagesize bitmap if supported. */
static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
{
@ -452,34 +369,6 @@ static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
return bitmap;
}
/* Some capabilities may be different across iommus */
void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
/*
* If RHSA is missing, we should default to the device numa domain
* as fall back.
*/
if (domain->nid == NUMA_NO_NODE)
domain->nid = domain_update_device_node(domain);
/*
* First-level translation restricts the input-address to a
* canonical address (i.e., address bits 63:N have the same
* value as address bit [N-1], where N is 48-bits with 4-level
* paging and 57-bits with 5-level paging). Hence, skip bit
* [N-1].
*/
if (domain->use_first_level)
domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
else
domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
}
struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
u8 devfn, int alloc)
{
@ -707,14 +596,15 @@ static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
while (1) {
offset = pfn_level_offset(pfn, level);
pte = &parent[offset];
if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) {
pr_info("PTE not present at level %d\n", level);
break;
}
pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
if (level == 1)
if (!dma_pte_present(pte)) {
pr_info("page table not present at level %d\n", level - 1);
break;
}
if (level == 1 || dma_pte_superpage(pte))
break;
parent = phys_to_virt(dma_pte_addr(pte));
@ -737,11 +627,11 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
/* root entry dump */
rt_entry = &iommu->root_entry[bus];
if (!rt_entry) {
pr_info("root table entry is not present\n");
if (!iommu->root_entry) {
pr_info("root table is not present\n");
return;
}
rt_entry = &iommu->root_entry[bus];
if (sm_supported(iommu))
pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
@ -752,7 +642,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
/* context entry dump */
ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
if (!ctx_entry) {
pr_info("context table entry is not present\n");
pr_info("context table is not present\n");
return;
}
@ -761,17 +651,23 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
/* legacy mode does not require PASID entries */
if (!sm_supported(iommu)) {
if (!context_present(ctx_entry)) {
pr_info("legacy mode page table is not present\n");
return;
}
level = agaw_to_level(ctx_entry->hi & 7);
pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
goto pgtable_walk;
}
/* get the pointer to pasid directory entry */
dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
if (!dir) {
pr_info("pasid directory entry is not present\n");
if (!context_present(ctx_entry)) {
pr_info("pasid directory table is not present\n");
return;
}
/* get the pointer to pasid directory entry */
dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
/* For request-without-pasid, get the pasid from context entry */
if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
pasid = IOMMU_NO_PASID;
@ -783,7 +679,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
/* get the pointer to the pasid table entry */
entries = get_pasid_table_from_pde(pde);
if (!entries) {
pr_info("pasid table entry is not present\n");
pr_info("pasid table is not present\n");
return;
}
index = pasid & PASID_PTE_MASK;
@ -791,6 +687,11 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
for (i = 0; i < ARRAY_SIZE(pte->val); i++)
pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
if (!pasid_pte_is_present(pte)) {
pr_info("scalable mode page table is not present\n");
return;
}
if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
@ -1428,51 +1329,25 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
/* free context mapping */
free_context_table(iommu);
#ifdef CONFIG_INTEL_IOMMU_SVM
if (pasid_supported(iommu)) {
if (ecap_prs(iommu->ecap))
intel_svm_finish_prq(iommu);
}
#endif
if (ecap_prs(iommu->ecap))
intel_iommu_finish_prq(iommu);
}
/*
* Check and return whether first level is used by default for
* DMA translation.
*/
static bool first_level_by_default(unsigned int type)
static bool first_level_by_default(struct intel_iommu *iommu)
{
/* Only SL is available in legacy mode */
if (!scalable_mode_support())
if (!sm_supported(iommu))
return false;
/* Only level (either FL or SL) is available, just use it */
if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
return intel_cap_flts_sanity();
if (ecap_flts(iommu->ecap) ^ ecap_slts(iommu->ecap))
return ecap_flts(iommu->ecap);
/* Both levels are available, decide it based on domain type */
return type != IOMMU_DOMAIN_UNMANAGED;
}
static struct dmar_domain *alloc_domain(unsigned int type)
{
struct dmar_domain *domain;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
domain->nid = NUMA_NO_NODE;
if (first_level_by_default(type))
domain->use_first_level = true;
INIT_LIST_HEAD(&domain->devices);
INIT_LIST_HEAD(&domain->dev_pasids);
INIT_LIST_HEAD(&domain->cache_tags);
spin_lock_init(&domain->lock);
spin_lock_init(&domain->cache_lock);
xa_init(&domain->iommu_array);
return domain;
return true;
}
int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
@ -1514,7 +1389,6 @@ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
ret = xa_err(curr) ? : -EBUSY;
goto err_clear;
}
domain_update_iommu_cap(domain);
spin_unlock(&iommu->lock);
return 0;
@ -1540,26 +1414,11 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
clear_bit(info->did, iommu->domain_ids);
xa_erase(&domain->iommu_array, iommu->seq_id);
domain->nid = NUMA_NO_NODE;
domain_update_iommu_cap(domain);
kfree(info);
}
spin_unlock(&iommu->lock);
}
static int guestwidth_to_adjustwidth(int gaw)
{
int agaw;
int r = (gaw - 12) % 9;
if (r == 0)
agaw = gaw;
else
agaw = gaw + 9 - r;
if (agaw > 64)
agaw = 64;
return agaw;
}
static void domain_exit(struct dmar_domain *domain)
{
if (domain->pgd) {
@ -1601,7 +1460,7 @@ static void copied_context_tear_down(struct intel_iommu *iommu,
if (did_old < cap_ndoms(iommu->cap)) {
iommu->flush.flush_context(iommu, did_old,
(((u16)bus) << 8) | devfn,
PCI_DEVID(bus, devfn),
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
@ -1622,7 +1481,7 @@ static void context_present_cache_flush(struct intel_iommu *iommu, u16 did,
{
if (cap_caching_mode(iommu->cap)) {
iommu->flush.flush_context(iommu, 0,
(((u16)bus) << 8) | devfn,
PCI_DEVID(bus, devfn),
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
@ -1641,7 +1500,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
int translation = CONTEXT_TT_MULTI_LEVEL;
struct dma_pte *pgd = domain->pgd;
struct context_entry *context;
int agaw, ret;
int ret;
pr_debug("Set context mapping for %02x:%02x.%d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@ -1658,27 +1517,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
copied_context_tear_down(iommu, context, bus, devfn);
context_clear_entry(context);
context_set_domain_id(context, did);
/*
* Skip top levels of page tables for iommu which has
* less agaw than default. Unnecessary for PT mode.
*/
for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
ret = -ENOMEM;
pgd = phys_to_virt(dma_pte_addr(pgd));
if (!dma_pte_present(pgd))
goto out_unlock;
}
if (info && info->ats_supported)
translation = CONTEXT_TT_DEV_IOTLB;
else
translation = CONTEXT_TT_MULTI_LEVEL;
context_set_address_root(context, virt_to_phys(pgd));
context_set_address_width(context, agaw);
context_set_address_width(context, domain->agaw);
context_set_translation_type(context, translation);
context_set_fault_enable(context);
context_set_present(context);
@ -1905,26 +1752,52 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
intel_context_flush_present(info, context, did, true);
}
int __domain_setup_first_level(struct intel_iommu *iommu,
struct device *dev, ioasid_t pasid,
u16 did, pgd_t *pgd, int flags,
struct iommu_domain *old)
{
if (!old)
return intel_pasid_setup_first_level(iommu, dev, pgd,
pasid, did, flags);
return intel_pasid_replace_first_level(iommu, dev, pgd, pasid, did,
iommu_domain_did(old, iommu),
flags);
}
static int domain_setup_second_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, ioasid_t pasid,
struct iommu_domain *old)
{
if (!old)
return intel_pasid_setup_second_level(iommu, domain,
dev, pasid);
return intel_pasid_replace_second_level(iommu, domain, dev,
iommu_domain_did(old, iommu),
pasid);
}
static int domain_setup_passthrough(struct intel_iommu *iommu,
struct device *dev, ioasid_t pasid,
struct iommu_domain *old)
{
if (!old)
return intel_pasid_setup_pass_through(iommu, dev, pasid);
return intel_pasid_replace_pass_through(iommu, dev,
iommu_domain_did(old, iommu),
pasid);
}
static int domain_setup_first_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev,
u32 pasid)
u32 pasid, struct iommu_domain *old)
{
struct dma_pte *pgd = domain->pgd;
int agaw, level;
int flags = 0;
int level, flags = 0;
/*
* Skip top levels of page tables for iommu which has
* less agaw than default. Unnecessary for PT mode.
*/
for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
pgd = phys_to_virt(dma_pte_addr(pgd));
if (!dma_pte_present(pgd))
return -ENOMEM;
}
level = agaw_to_level(agaw);
level = agaw_to_level(domain->agaw);
if (level != 4 && level != 5)
return -EINVAL;
@ -1934,15 +1807,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
if (domain->force_snooping)
flags |= PASID_FLAG_PAGE_SNOOP;
return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
domain_id_iommu(domain, iommu),
flags);
}
static bool dev_is_real_dma_subdevice(struct device *dev)
{
return dev && dev_is_pci(dev) &&
pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
return __domain_setup_first_level(iommu, dev, pasid,
domain_id_iommu(domain, iommu),
(pgd_t *)pgd, flags, old);
}
static int dmar_domain_attach_device(struct dmar_domain *domain,
@ -1968,9 +1835,11 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
if (!sm_supported(iommu))
ret = domain_context_mapping(domain, dev);
else if (domain->use_first_level)
ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID);
ret = domain_setup_first_level(iommu, domain, dev,
IOMMU_NO_PASID, NULL);
else
ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID);
ret = domain_setup_second_level(iommu, domain, dev,
IOMMU_NO_PASID, NULL);
if (ret)
goto out_block_translation;
@ -2354,19 +2223,18 @@ static int __init init_dmars(void)
iommu_flush_write_buffer(iommu);
#ifdef CONFIG_INTEL_IOMMU_SVM
if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
if (ecap_prs(iommu->ecap)) {
/*
* Call dmar_alloc_hwirq() with dmar_global_lock held,
* could cause possible lock race condition.
*/
up_write(&dmar_global_lock);
ret = intel_svm_enable_prq(iommu);
ret = intel_iommu_enable_prq(iommu);
down_write(&dmar_global_lock);
if (ret)
goto free_iommu;
}
#endif
ret = dmar_set_interrupt(iommu);
if (ret)
goto free_iommu;
@ -2746,20 +2614,13 @@ int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
{
int sp, ret;
struct intel_iommu *iommu = dmaru->iommu;
int ret;
ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
if (ret)
goto out;
sp = domain_update_iommu_superpage(NULL, iommu) - 1;
if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
pr_warn("%s: Doesn't support large page.\n",
iommu->name);
return -ENXIO;
}
/*
* Disable translation if already enabled prior to OS handover.
*/
@ -2786,13 +2647,12 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
intel_iommu_init_qi(iommu);
iommu_flush_write_buffer(iommu);
#ifdef CONFIG_INTEL_IOMMU_SVM
if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
ret = intel_svm_enable_prq(iommu);
if (ecap_prs(iommu->ecap)) {
ret = intel_iommu_enable_prq(iommu);
if (ret)
goto disable_iommu;
}
#endif
ret = dmar_set_interrupt(iommu);
if (ret)
goto disable_iommu;
@ -3288,7 +3148,7 @@ int __init intel_iommu_init(void)
* the virtual and physical IOMMU page-tables.
*/
if (cap_caching_mode(iommu->cap) &&
!first_level_by_default(IOMMU_DOMAIN_DMA)) {
!first_level_by_default(iommu)) {
pr_info_once("IOMMU batching disallowed due to virtualization\n");
iommu_set_dma_strict();
}
@ -3381,27 +3241,6 @@ void device_block_translation(struct device *dev)
info->domain = NULL;
}
static int md_domain_init(struct dmar_domain *domain, int guest_width)
{
int adjust_width;
/* calculate AGAW */
domain->gaw = guest_width;
adjust_width = guestwidth_to_adjustwidth(guest_width);
domain->agaw = width_to_agaw(adjust_width);
domain->iommu_coherency = false;
domain->iommu_superpage = 0;
domain->max_addr = 0;
/* always allocate the top pgd */
domain->pgd = iommu_alloc_page_node(domain->nid, GFP_ATOMIC);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
return 0;
}
static int blocking_domain_attach_dev(struct iommu_domain *domain,
struct device *dev)
{
@ -3488,39 +3327,6 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st
return domain;
}
static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
{
struct dmar_domain *dmar_domain;
struct iommu_domain *domain;
switch (type) {
case IOMMU_DOMAIN_DMA:
case IOMMU_DOMAIN_UNMANAGED:
dmar_domain = alloc_domain(type);
if (!dmar_domain) {
pr_err("Can't allocate dmar_domain\n");
return NULL;
}
if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
pr_err("Domain initialization failed\n");
domain_exit(dmar_domain);
return NULL;
}
domain = &dmar_domain->domain;
domain->geometry.aperture_start = 0;
domain->geometry.aperture_end =
__DOMAIN_MAX_ADDR(dmar_domain->gaw);
domain->geometry.force_aperture = true;
return domain;
default:
return NULL;
}
return NULL;
}
static struct iommu_domain *
intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
struct iommu_domain *parent,
@ -3532,6 +3338,7 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
struct intel_iommu *iommu = info->iommu;
struct dmar_domain *dmar_domain;
struct iommu_domain *domain;
bool first_stage;
/* Must be NESTING domain */
if (parent) {
@ -3541,15 +3348,28 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
}
if (flags &
(~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING)))
(~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING
| IOMMU_HWPT_FAULT_ID_VALID)))
return ERR_PTR(-EOPNOTSUPP);
if (nested_parent && !nested_supported(iommu))
return ERR_PTR(-EOPNOTSUPP);
if (user_data || (dirty_tracking && !ssads_supported(iommu)))
return ERR_PTR(-EOPNOTSUPP);
/* Do not use first stage for user domain translation. */
dmar_domain = paging_domain_alloc(dev, false);
/*
* Always allocate the guest compatible page table unless
* IOMMU_HWPT_ALLOC_NEST_PARENT or IOMMU_HWPT_ALLOC_DIRTY_TRACKING
* is specified.
*/
if (nested_parent || dirty_tracking) {
if (!sm_supported(iommu) || !ecap_slts(iommu->ecap))
return ERR_PTR(-EOPNOTSUPP);
first_stage = false;
} else {
first_stage = first_level_by_default(iommu);
}
dmar_domain = paging_domain_alloc(dev, first_stage);
if (IS_ERR(dmar_domain))
return ERR_CAST(dmar_domain);
domain = &dmar_domain->domain;
@ -3583,42 +3403,41 @@ static void intel_iommu_domain_free(struct iommu_domain *domain)
domain_exit(dmar_domain);
}
int prepare_domain_attach_device(struct iommu_domain *domain,
struct device *dev)
int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu = info->iommu;
int addr_width;
if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING)))
return -EPERM;
if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
return -EINVAL;
if (domain->dirty_ops && !ssads_supported(iommu))
return -EINVAL;
if (dmar_domain->iommu_coherency !=
iommu_paging_structure_coherency(iommu))
return -EINVAL;
if (dmar_domain->iommu_superpage !=
iommu_superpage_capability(iommu, dmar_domain->use_first_level))
return -EINVAL;
if (dmar_domain->use_first_level &&
(!sm_supported(iommu) || !ecap_flts(iommu->ecap)))
return -EINVAL;
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
addr_width = cap_mgaw(iommu->cap);
if (dmar_domain->max_addr > (1LL << addr_width))
if (dmar_domain->gaw > addr_width || dmar_domain->agaw > iommu->agaw)
return -EINVAL;
dmar_domain->gaw = addr_width;
/*
* Knock out extra levels of page tables if necessary
*/
while (iommu->agaw < dmar_domain->agaw) {
struct dma_pte *pte;
pte = dmar_domain->pgd;
if (dma_pte_present(pte)) {
dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
iommu_free_page(pte);
}
dmar_domain->agaw--;
}
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
context_copied(iommu, info->bus, info->devfn))
@ -3634,7 +3453,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
device_block_translation(dev);
ret = prepare_domain_attach_device(domain, dev);
ret = paging_domain_compatible(domain, dev);
if (ret)
return ret;
@ -4252,8 +4071,8 @@ static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
return 0;
}
static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain)
void domain_remove_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dev_pasid_info *curr, *dev_pasid = NULL;
@ -4261,10 +4080,12 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct dmar_domain *dmar_domain;
unsigned long flags;
if (domain->type == IOMMU_DOMAIN_IDENTITY) {
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
if (!domain)
return;
/* Identity domain has no meta data for pasid. */
if (domain->type == IOMMU_DOMAIN_IDENTITY)
return;
}
dmar_domain = to_dmar_domain(domain);
spin_lock_irqsave(&dmar_domain->lock, flags);
@ -4282,12 +4103,20 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
domain_detach_iommu(dmar_domain, iommu);
intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
kfree(dev_pasid);
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
intel_drain_pasid_prq(dev, pasid);
}
static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
domain_remove_dev_pasid(domain, dev, pasid);
}
struct dev_pasid_info *
domain_add_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
@ -4296,6 +4125,45 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
unsigned long flags;
int ret;
dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
if (!dev_pasid)
return ERR_PTR(-ENOMEM);
ret = domain_attach_iommu(dmar_domain, iommu);
if (ret)
goto out_free;
ret = cache_tag_assign_domain(dmar_domain, dev, pasid);
if (ret)
goto out_detach_iommu;
dev_pasid->dev = dev;
dev_pasid->pasid = pasid;
spin_lock_irqsave(&dmar_domain->lock, flags);
list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
return dev_pasid;
out_detach_iommu:
domain_detach_iommu(dmar_domain, iommu);
out_free:
kfree(dev_pasid);
return ERR_PTR(ret);
}
static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid,
struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu = info->iommu;
struct dev_pasid_info *dev_pasid;
int ret;
if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING)))
return -EINVAL;
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
return -EOPNOTSUPP;
@ -4305,47 +4173,31 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
if (context_copied(iommu, info->bus, info->devfn))
return -EBUSY;
ret = prepare_domain_attach_device(domain, dev);
ret = paging_domain_compatible(domain, dev);
if (ret)
return ret;
dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
if (!dev_pasid)
return -ENOMEM;
ret = domain_attach_iommu(dmar_domain, iommu);
if (ret)
goto out_free;
ret = cache_tag_assign_domain(dmar_domain, dev, pasid);
if (ret)
goto out_detach_iommu;
dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
if (IS_ERR(dev_pasid))
return PTR_ERR(dev_pasid);
if (dmar_domain->use_first_level)
ret = domain_setup_first_level(iommu, dmar_domain,
dev, pasid);
dev, pasid, old);
else
ret = intel_pasid_setup_second_level(iommu, dmar_domain,
dev, pasid);
ret = domain_setup_second_level(iommu, dmar_domain,
dev, pasid, old);
if (ret)
goto out_unassign_tag;
goto out_remove_dev_pasid;
dev_pasid->dev = dev;
dev_pasid->pasid = pasid;
spin_lock_irqsave(&dmar_domain->lock, flags);
list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
domain_remove_dev_pasid(old, dev, pasid);
if (domain->type & __IOMMU_DOMAIN_PAGING)
intel_iommu_debugfs_create_dev_pasid(dev_pasid);
intel_iommu_debugfs_create_dev_pasid(dev_pasid);
return 0;
out_unassign_tag:
cache_tag_unassign_domain(dmar_domain, dev, pasid);
out_detach_iommu:
domain_detach_iommu(dmar_domain, iommu);
out_free:
kfree(dev_pasid);
out_remove_dev_pasid:
domain_remove_dev_pasid(domain, dev, pasid);
return ret;
}
@ -4573,15 +4425,22 @@ static int identity_domain_attach_dev(struct iommu_domain *domain, struct device
}
static int identity_domain_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
struct device *dev, ioasid_t pasid,
struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
int ret;
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
return -EOPNOTSUPP;
return intel_pasid_setup_pass_through(iommu, dev, pasid);
ret = domain_setup_passthrough(iommu, dev, pasid, old);
if (ret)
return ret;
domain_remove_dev_pasid(old, dev, pasid);
return 0;
}
static struct iommu_domain identity_domain = {
@ -4592,15 +4451,30 @@ static struct iommu_domain identity_domain = {
},
};
static struct iommu_domain *intel_iommu_domain_alloc_paging(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
struct dmar_domain *dmar_domain;
bool first_stage;
first_stage = first_level_by_default(iommu);
dmar_domain = paging_domain_alloc(dev, first_stage);
if (IS_ERR(dmar_domain))
return ERR_CAST(dmar_domain);
return &dmar_domain->domain;
}
const struct iommu_ops intel_iommu_ops = {
.blocked_domain = &blocking_domain,
.release_domain = &blocking_domain,
.identity_domain = &identity_domain,
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
.domain_alloc_user = intel_iommu_domain_alloc_user,
.domain_alloc_sva = intel_svm_domain_alloc,
.domain_alloc_paging = intel_iommu_domain_alloc_paging,
.probe_device = intel_iommu_probe_device,
.release_device = intel_iommu_release_device,
.get_resv_regions = intel_iommu_get_resv_regions,
@ -4611,9 +4485,7 @@ const struct iommu_ops intel_iommu_ops = {
.def_domain_type = device_def_domain_type,
.remove_dev_pasid = intel_iommu_remove_dev_pasid,
.pgsize_bitmap = SZ_4K,
#ifdef CONFIG_INTEL_IOMMU_SVM
.page_response = intel_svm_page_response,
#endif
.page_response = intel_iommu_page_response,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = intel_iommu_attach_device,
.set_dev_pasid = intel_iommu_set_dev_pasid,

View File

@ -22,6 +22,7 @@
#include <linux/bitfield.h>
#include <linux/xarray.h>
#include <linux/perf_event.h>
#include <linux/pci.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
@ -653,8 +654,6 @@ struct dmar_domain {
struct {
/* parent page table which the user domain is nested on */
struct dmar_domain *s2_domain;
/* user page table pointer (in GPA) */
unsigned long s1_pgtbl;
/* page table attributes */
struct iommu_hwpt_vtd_s1 s1_cfg;
/* link to parent domain siblings */
@ -720,7 +719,7 @@ struct intel_iommu {
int msagaw; /* max sagaw of this iommu */
unsigned int irq, pr_irq, perf_irq;
u16 segment; /* PCI segment# */
unsigned char name[13]; /* Device Name */
unsigned char name[16]; /* Device Name */
#ifdef CONFIG_INTEL_IOMMU
unsigned long *domain_ids; /* bitmap of domains */
@ -730,12 +729,10 @@ struct intel_iommu {
struct iommu_flush flush;
#endif
#ifdef CONFIG_INTEL_IOMMU_SVM
struct page_req_dsc *prq;
unsigned char prq_name[16]; /* Name for PRQ interrupt */
unsigned long prq_seq_number;
struct completion prq_complete;
#endif
struct iopf_queue *iopf_queue;
unsigned char iopfq_name[16];
/* Synchronization between fault report and iommu device release. */
@ -810,6 +807,13 @@ static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
return container_of(dom, struct dmar_domain, domain);
}
/*
* Domain ID reserved for pasid entries programmed for first-level
* only and pass-through transfer modes.
*/
#define FLPT_DEFAULT_DID 1
#define NUM_RESERVED_DID 2
/* Retrieve the domain ID which has allocated to the domain */
static inline u16
domain_id_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
@ -820,6 +824,21 @@ domain_id_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
return info->did;
}
static inline u16
iommu_domain_did(struct iommu_domain *domain, struct intel_iommu *iommu)
{
if (domain->type == IOMMU_DOMAIN_SVA ||
domain->type == IOMMU_DOMAIN_IDENTITY)
return FLPT_DEFAULT_DID;
return domain_id_iommu(to_dmar_domain(domain), iommu);
}
static inline bool dev_is_real_dma_subdevice(struct device *dev)
{
return dev && dev_is_pci(dev) &&
pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
}
/*
* 0: readable
* 1: writable
@ -1230,9 +1249,18 @@ void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
void device_block_translation(struct device *dev);
int prepare_domain_attach_device(struct iommu_domain *domain,
struct device *dev);
void domain_update_iommu_cap(struct dmar_domain *domain);
int paging_domain_compatible(struct iommu_domain *domain, struct device *dev);
struct dev_pasid_info *
domain_add_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
void domain_remove_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
int __domain_setup_first_level(struct intel_iommu *iommu,
struct device *dev, ioasid_t pasid,
u16 did, pgd_t *pgd, int flags,
struct iommu_domain *old);
int dmar_ir_support(void);
@ -1278,18 +1306,18 @@ void intel_context_flush_present(struct device_domain_info *info,
struct context_entry *context,
u16 did, bool affect_domains);
int intel_iommu_enable_prq(struct intel_iommu *iommu);
int intel_iommu_finish_prq(struct intel_iommu *iommu);
void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *msg);
void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid);
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
int intel_svm_enable_prq(struct intel_iommu *iommu);
int intel_svm_finish_prq(struct intel_iommu *iommu);
void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *msg);
struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
struct mm_struct *mm);
void intel_drain_pasid_prq(struct device *dev, u32 pasid);
#else
static inline void intel_svm_check(struct intel_iommu *iommu) {}
static inline void intel_drain_pasid_prq(struct device *dev, u32 pasid) {}
static inline struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
struct mm_struct *mm)
{

View File

@ -312,7 +312,7 @@ static int set_ioapic_sid(struct irte *irte, int apic)
for (i = 0; i < MAX_IO_APICS; i++) {
if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) {
sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
sid = PCI_DEVID(ir_ioapic[i].bus, ir_ioapic[i].devfn);
break;
}
}
@ -337,7 +337,7 @@ static int set_hpet_sid(struct irte *irte, u8 id)
for (i = 0; i < MAX_HPET_TBS; i++) {
if (ir_hpet[i].iommu && ir_hpet[i].id == id) {
sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
sid = PCI_DEVID(ir_hpet[i].bus, ir_hpet[i].devfn);
break;
}
}

View File

@ -40,7 +40,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
* The s2_domain will be used in nested translation, hence needs
* to ensure the s2_domain is compatible with this IOMMU.
*/
ret = prepare_domain_attach_device(&dmar_domain->s2_domain->domain, dev);
ret = paging_domain_compatible(&dmar_domain->s2_domain->domain, dev);
if (ret) {
dev_err_ratelimited(dev, "s2 domain is not compatible\n");
return ret;
@ -130,8 +130,58 @@ static int intel_nested_cache_invalidate_user(struct iommu_domain *domain,
return ret;
}
static int domain_setup_nested(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, ioasid_t pasid,
struct iommu_domain *old)
{
if (!old)
return intel_pasid_setup_nested(iommu, dev, pasid, domain);
return intel_pasid_replace_nested(iommu, dev, pasid,
iommu_domain_did(old, iommu),
domain);
}
static int intel_nested_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid,
struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu = info->iommu;
struct dev_pasid_info *dev_pasid;
int ret;
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
return -EOPNOTSUPP;
if (context_copied(iommu, info->bus, info->devfn))
return -EBUSY;
ret = paging_domain_compatible(&dmar_domain->s2_domain->domain, dev);
if (ret)
return ret;
dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
if (IS_ERR(dev_pasid))
return PTR_ERR(dev_pasid);
ret = domain_setup_nested(iommu, dmar_domain, dev, pasid, old);
if (ret)
goto out_remove_dev_pasid;
domain_remove_dev_pasid(old, dev, pasid);
return 0;
out_remove_dev_pasid:
domain_remove_dev_pasid(domain, dev, pasid);
return ret;
}
static const struct iommu_domain_ops intel_nested_domain_ops = {
.attach_dev = intel_nested_attach_dev,
.set_dev_pasid = intel_nested_set_dev_pasid,
.free = intel_nested_domain_free,
.cache_invalidate_user = intel_nested_cache_invalidate_user,
};
@ -162,7 +212,6 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
domain->use_first_level = true;
domain->s2_domain = s2_domain;
domain->s1_pgtbl = vtd.pgtbl_addr;
domain->s1_cfg = vtd;
domain->domain.ops = &intel_nested_domain_ops;
domain->domain.type = IOMMU_DOMAIN_NESTED;

View File

@ -220,7 +220,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
if (pci_dev_is_disconnected(to_pci_dev(dev)))
return;
sid = info->bus << 8 | info->devfn;
sid = PCI_DEVID(info->bus, info->devfn);
qdep = info->ats_qdep;
pfsid = info->pfsid;
@ -265,6 +265,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
devtlb_invalidation_with_pasid(iommu, dev, pasid);
intel_iommu_drain_pasid_prq(dev, pasid);
}
/*
@ -286,10 +287,69 @@ static void pasid_flush_caches(struct intel_iommu *iommu,
}
}
/*
* This function is supposed to be used after caller updates the fields
* except for the SSADE and P bit of a pasid table entry. It does the
* below:
* - Flush cacheline if needed
* - Flush the caches per Table 28 Guidance to Software for Invalidations
* of VT-d spec 5.0.
*/
static void intel_pasid_flush_present(struct intel_iommu *iommu,
struct device *dev,
u32 pasid, u16 did,
struct pasid_entry *pte)
{
if (!ecap_coherent(iommu->ecap))
clflush_cache_range(pte, sizeof(*pte));
/*
* VT-d spec 5.0 table28 states guides for cache invalidation:
*
* - PASID-selective-within-Domain PASID-cache invalidation
* - PASID-selective PASID-based IOTLB invalidation
* - If (pasid is RID_PASID)
* - Global Device-TLB invalidation to affected functions
* Else
* - PASID-based Device-TLB invalidation (with S=1 and
* Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
*/
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
devtlb_invalidation_with_pasid(iommu, dev, pasid);
}
/*
* Set up the scalable mode pasid table entry for first only
* translation type.
*/
static void pasid_pte_config_first_level(struct intel_iommu *iommu,
struct pasid_entry *pte,
pgd_t *pgd, u16 did, int flags)
{
lockdep_assert_held(&iommu->lock);
pasid_clear_entry(pte);
/* Setup the first level page table pointer: */
pasid_set_flptr(pte, (u64)__pa(pgd));
if (flags & PASID_FLAG_FL5LP)
pasid_set_flpm(pte, 1);
if (flags & PASID_FLAG_PAGE_SNOOP)
pasid_set_pgsnp(pte);
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, iommu->agaw);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
/* Setup Present and PASID Granular Transfer Type: */
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
pasid_set_present(pte);
}
int intel_pasid_setup_first_level(struct intel_iommu *iommu,
struct device *dev, pgd_t *pgd,
u32 pasid, u16 did, int flags)
@ -320,24 +380,8 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
return -EBUSY;
}
pasid_clear_entry(pte);
pasid_pte_config_first_level(iommu, pte, pgd, did, flags);
/* Setup the first level page table pointer: */
pasid_set_flptr(pte, (u64)__pa(pgd));
if (flags & PASID_FLAG_FL5LP)
pasid_set_flpm(pte, 1);
if (flags & PASID_FLAG_PAGE_SNOOP)
pasid_set_pgsnp(pte);
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, iommu->agaw);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
/* Setup Present and PASID Granular Transfer Type: */
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
pasid_set_present(pte);
spin_unlock(&iommu->lock);
pasid_flush_caches(iommu, pte, pasid, did);
@ -345,28 +389,73 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
return 0;
}
/*
* Skip top levels of page tables for iommu which has less agaw
* than default. Unnecessary for PT mode.
*/
static int iommu_skip_agaw(struct dmar_domain *domain,
struct intel_iommu *iommu,
struct dma_pte **pgd)
int intel_pasid_replace_first_level(struct intel_iommu *iommu,
struct device *dev, pgd_t *pgd,
u32 pasid, u16 did, u16 old_did,
int flags)
{
int agaw;
struct pasid_entry *pte, new_pte;
for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
*pgd = phys_to_virt(dma_pte_addr(*pgd));
if (!dma_pte_present(*pgd))
return -EINVAL;
if (!ecap_flts(iommu->ecap)) {
pr_err("No first level translation support on %s\n",
iommu->name);
return -EINVAL;
}
return agaw;
if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
pr_err("No 5-level paging support for first-level on %s\n",
iommu->name);
return -EINVAL;
}
pasid_pte_config_first_level(iommu, &new_pte, pgd, did, flags);
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
spin_unlock(&iommu->lock);
return -ENODEV;
}
if (!pasid_pte_is_present(pte)) {
spin_unlock(&iommu->lock);
return -EINVAL;
}
WARN_ON(old_did != pasid_get_domain_id(pte));
*pte = new_pte;
spin_unlock(&iommu->lock);
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
intel_iommu_drain_pasid_prq(dev, pasid);
return 0;
}
/*
* Set up the scalable mode pasid entry for second only translation type.
*/
static void pasid_pte_config_second_level(struct intel_iommu *iommu,
struct pasid_entry *pte,
u64 pgd_val, int agaw, u16 did,
bool dirty_tracking)
{
lockdep_assert_held(&iommu->lock);
pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);
pasid_set_slptr(pte, pgd_val);
pasid_set_address_width(pte, agaw);
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
if (dirty_tracking)
pasid_set_ssade(pte);
pasid_set_present(pte);
}
int intel_pasid_setup_second_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, u32 pasid)
@ -374,7 +463,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
struct pasid_entry *pte;
struct dma_pte *pgd;
u64 pgd_val;
int agaw;
u16 did;
/*
@ -388,12 +476,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
}
pgd = domain->pgd;
agaw = iommu_skip_agaw(domain, iommu, &pgd);
if (agaw < 0) {
dev_err(dev, "Invalid domain page table\n");
return -EINVAL;
}
pgd_val = virt_to_phys(pgd);
did = domain_id_iommu(domain, iommu);
@ -409,17 +491,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
return -EBUSY;
}
pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);
pasid_set_slptr(pte, pgd_val);
pasid_set_address_width(pte, agaw);
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
if (domain->dirty_tracking)
pasid_set_ssade(pte);
pasid_set_present(pte);
pasid_pte_config_second_level(iommu, pte, pgd_val, domain->agaw,
did, domain->dirty_tracking);
spin_unlock(&iommu->lock);
pasid_flush_caches(iommu, pte, pasid, did);
@ -427,6 +500,57 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
return 0;
}
int intel_pasid_replace_second_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, u16 old_did,
u32 pasid)
{
struct pasid_entry *pte, new_pte;
struct dma_pte *pgd;
u64 pgd_val;
u16 did;
/*
* If hardware advertises no support for second level
* translation, return directly.
*/
if (!ecap_slts(iommu->ecap)) {
pr_err("No second level translation support on %s\n",
iommu->name);
return -EINVAL;
}
pgd = domain->pgd;
pgd_val = virt_to_phys(pgd);
did = domain_id_iommu(domain, iommu);
pasid_pte_config_second_level(iommu, &new_pte, pgd_val,
domain->agaw, did,
domain->dirty_tracking);
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
spin_unlock(&iommu->lock);
return -ENODEV;
}
if (!pasid_pte_is_present(pte)) {
spin_unlock(&iommu->lock);
return -EINVAL;
}
WARN_ON(old_did != pasid_get_domain_id(pte));
*pte = new_pte;
spin_unlock(&iommu->lock);
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
intel_iommu_drain_pasid_prq(dev, pasid);
return 0;
}
/*
* Set up dirty tracking on a second only or nested translation type.
*/
@ -499,6 +623,20 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
/*
* Set up the scalable mode pasid entry for passthrough translation type.
*/
static void pasid_pte_config_pass_through(struct intel_iommu *iommu,
struct pasid_entry *pte, u16 did)
{
lockdep_assert_held(&iommu->lock);
pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, iommu->agaw);
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
pasid_set_present(pte);
}
int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
struct device *dev, u32 pasid)
{
@ -517,13 +655,7 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
return -EBUSY;
}
pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, iommu->agaw);
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
pasid_set_present(pte);
pasid_pte_config_pass_through(iommu, pte, did);
spin_unlock(&iommu->lock);
pasid_flush_caches(iommu, pte, pasid, did);
@ -531,6 +663,38 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
return 0;
}
int intel_pasid_replace_pass_through(struct intel_iommu *iommu,
struct device *dev, u16 old_did,
u32 pasid)
{
struct pasid_entry *pte, new_pte;
u16 did = FLPT_DEFAULT_DID;
pasid_pte_config_pass_through(iommu, &new_pte, did);
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
spin_unlock(&iommu->lock);
return -ENODEV;
}
if (!pasid_pte_is_present(pte)) {
spin_unlock(&iommu->lock);
return -EINVAL;
}
WARN_ON(old_did != pasid_get_domain_id(pte));
*pte = new_pte;
spin_unlock(&iommu->lock);
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
intel_iommu_drain_pasid_prq(dev, pasid);
return 0;
}
/*
* Set the page snoop control for a pasid entry which has been set up.
*/
@ -551,24 +715,47 @@ void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
did = pasid_get_domain_id(pte);
spin_unlock(&iommu->lock);
if (!ecap_coherent(iommu->ecap))
clflush_cache_range(pte, sizeof(*pte));
intel_pasid_flush_present(iommu, dev, pasid, did, pte);
}
/*
* VT-d spec 3.4 table23 states guides for cache invalidation:
*
* - PASID-selective-within-Domain PASID-cache invalidation
* - PASID-selective PASID-based IOTLB invalidation
* - If (pasid is RID_PASID)
* - Global Device-TLB invalidation to affected functions
* Else
* - PASID-based Device-TLB invalidation (with S=1 and
* Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
*/
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
static void pasid_pte_config_nestd(struct intel_iommu *iommu,
struct pasid_entry *pte,
struct iommu_hwpt_vtd_s1 *s1_cfg,
struct dmar_domain *s2_domain,
u16 did)
{
struct dma_pte *pgd = s2_domain->pgd;
devtlb_invalidation_with_pasid(iommu, dev, pasid);
lockdep_assert_held(&iommu->lock);
pasid_clear_entry(pte);
if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
pasid_set_flpm(pte, 1);
pasid_set_flptr(pte, s1_cfg->pgtbl_addr);
if (s1_cfg->flags & IOMMU_VTD_S1_SRE) {
pasid_set_sre(pte);
if (s1_cfg->flags & IOMMU_VTD_S1_WPE)
pasid_set_wpe(pte);
}
if (s1_cfg->flags & IOMMU_VTD_S1_EAFE)
pasid_set_eafe(pte);
if (s2_domain->force_snooping)
pasid_set_pgsnp(pte);
pasid_set_slptr(pte, virt_to_phys(pgd));
pasid_set_fault_enable(pte);
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, s2_domain->agaw);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
if (s2_domain->dirty_tracking)
pasid_set_ssade(pte);
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
pasid_set_present(pte);
}
/**
@ -586,10 +773,8 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
u32 pasid, struct dmar_domain *domain)
{
struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
pgd_t *s1_gpgd = (pgd_t *)(uintptr_t)domain->s1_pgtbl;
struct dmar_domain *s2_domain = domain->s2_domain;
u16 did = domain_id_iommu(domain, iommu);
struct dma_pte *pgd = s2_domain->pgd;
struct pasid_entry *pte;
/* Address width should match the address width supported by hardware */
@ -632,34 +817,7 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
return -EBUSY;
}
pasid_clear_entry(pte);
if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
pasid_set_flpm(pte, 1);
pasid_set_flptr(pte, (uintptr_t)s1_gpgd);
if (s1_cfg->flags & IOMMU_VTD_S1_SRE) {
pasid_set_sre(pte);
if (s1_cfg->flags & IOMMU_VTD_S1_WPE)
pasid_set_wpe(pte);
}
if (s1_cfg->flags & IOMMU_VTD_S1_EAFE)
pasid_set_eafe(pte);
if (s2_domain->force_snooping)
pasid_set_pgsnp(pte);
pasid_set_slptr(pte, virt_to_phys(pgd));
pasid_set_fault_enable(pte);
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, s2_domain->agaw);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
if (s2_domain->dirty_tracking)
pasid_set_ssade(pte);
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
pasid_set_present(pte);
pasid_pte_config_nestd(iommu, pte, s1_cfg, s2_domain, did);
spin_unlock(&iommu->lock);
pasid_flush_caches(iommu, pte, pasid, did);
@ -667,6 +825,69 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
return 0;
}
int intel_pasid_replace_nested(struct intel_iommu *iommu,
struct device *dev, u32 pasid,
u16 old_did, struct dmar_domain *domain)
{
struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
struct dmar_domain *s2_domain = domain->s2_domain;
u16 did = domain_id_iommu(domain, iommu);
struct pasid_entry *pte, new_pte;
/* Address width should match the address width supported by hardware */
switch (s1_cfg->addr_width) {
case ADDR_WIDTH_4LEVEL:
break;
case ADDR_WIDTH_5LEVEL:
if (!cap_fl5lp_support(iommu->cap)) {
dev_err_ratelimited(dev,
"5-level paging not supported\n");
return -EINVAL;
}
break;
default:
dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
s1_cfg->addr_width);
return -EINVAL;
}
if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
pr_err_ratelimited("No supervisor request support on %s\n",
iommu->name);
return -EINVAL;
}
if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
pr_err_ratelimited("No extended access flag support on %s\n",
iommu->name);
return -EINVAL;
}
pasid_pte_config_nestd(iommu, &new_pte, s1_cfg, s2_domain, did);
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
spin_unlock(&iommu->lock);
return -ENODEV;
}
if (!pasid_pte_is_present(pte)) {
spin_unlock(&iommu->lock);
return -EINVAL;
}
WARN_ON(old_did != pasid_get_domain_id(pte));
*pte = new_pte;
spin_unlock(&iommu->lock);
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
intel_iommu_drain_pasid_prq(dev, pasid);
return 0;
}
/*
* Interfaces to setup or teardown a pasid table to the scalable-mode
* context table entry:

View File

@ -22,13 +22,6 @@
#define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1)
#define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7))
/*
* Domain ID reserved for pasid entries programmed for first-level
* only and pass-through transfer modes.
*/
#define FLPT_DEFAULT_DID 1
#define NUM_RESERVED_DID 2
#define PASID_FLAG_NESTED BIT(1)
#define PASID_FLAG_PAGE_SNOOP BIT(2)
@ -303,6 +296,21 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
struct device *dev, u32 pasid);
int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
u32 pasid, struct dmar_domain *domain);
int intel_pasid_replace_first_level(struct intel_iommu *iommu,
struct device *dev, pgd_t *pgd,
u32 pasid, u16 did, u16 old_did,
int flags);
int intel_pasid_replace_second_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, u16 old_did,
u32 pasid);
int intel_pasid_replace_pass_through(struct intel_iommu *iommu,
struct device *dev, u16 old_did,
u32 pasid);
int intel_pasid_replace_nested(struct intel_iommu *iommu,
struct device *dev, u32 pasid,
u16 old_did, struct dmar_domain *domain);
void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
struct device *dev, u32 pasid,
bool fault_ignore);

396
drivers/iommu/intel/prq.c Normal file
View File

@ -0,0 +1,396 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2015 Intel Corporation
*
* Originally split from drivers/iommu/intel/svm.c
*/
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include "iommu.h"
#include "pasid.h"
#include "../iommu-pages.h"
#include "trace.h"
/* Page request queue descriptor */
struct page_req_dsc {
union {
struct {
u64 type:8;
u64 pasid_present:1;
u64 rsvd:7;
u64 rid:16;
u64 pasid:20;
u64 exe_req:1;
u64 pm_req:1;
u64 rsvd2:10;
};
u64 qw_0;
};
union {
struct {
u64 rd_req:1;
u64 wr_req:1;
u64 lpig:1;
u64 prg_index:9;
u64 addr:52;
};
u64 qw_1;
};
u64 qw_2;
u64 qw_3;
};
/**
* intel_iommu_drain_pasid_prq - Drain page requests and responses for a pasid
* @dev: target device
* @pasid: pasid for draining
*
* Drain all pending page requests and responses related to @pasid in both
* software and hardware. This is supposed to be called after the device
* driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
* and DevTLB have been invalidated.
*
* It waits until all pending page requests for @pasid in the page fault
* queue are completed by the prq handling thread. Then follow the steps
* described in VT-d spec CH7.10 to drain all page requests and page
* responses pending in the hardware.
*/
void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid)
{
struct device_domain_info *info;
struct dmar_domain *domain;
struct intel_iommu *iommu;
struct qi_desc desc[3];
int head, tail;
u16 sid, did;
info = dev_iommu_priv_get(dev);
if (!info->pri_enabled)
return;
iommu = info->iommu;
domain = info->domain;
sid = PCI_DEVID(info->bus, info->devfn);
did = domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID;
/*
* Check and wait until all pending page requests in the queue are
* handled by the prq handling thread.
*/
prq_retry:
reinit_completion(&iommu->prq_complete);
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
while (head != tail) {
struct page_req_dsc *req;
req = &iommu->prq[head / sizeof(*req)];
if (!req->pasid_present || req->pasid != pasid) {
head = (head + sizeof(*req)) & PRQ_RING_MASK;
continue;
}
wait_for_completion(&iommu->prq_complete);
goto prq_retry;
}
iopf_queue_flush_dev(dev);
/*
* Perform steps described in VT-d spec CH7.10 to drain page
* requests and responses in hardware.
*/
memset(desc, 0, sizeof(desc));
desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
QI_IWD_FENCE |
QI_IWD_TYPE;
if (pasid == IOMMU_NO_PASID) {
qi_desc_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, &desc[1]);
qi_desc_dev_iotlb(sid, info->pfsid, info->ats_qdep, 0,
MAX_AGAW_PFN_WIDTH, &desc[2]);
} else {
qi_desc_piotlb(did, pasid, 0, -1, 0, &desc[1]);
qi_desc_dev_iotlb_pasid(sid, info->pfsid, pasid, info->ats_qdep,
0, MAX_AGAW_PFN_WIDTH, &desc[2]);
}
qi_retry:
reinit_completion(&iommu->prq_complete);
qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
wait_for_completion(&iommu->prq_complete);
goto qi_retry;
}
}
static bool is_canonical_address(u64 addr)
{
int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
long saddr = (long)addr;
return (((saddr << shift) >> shift) == saddr);
}
static void handle_bad_prq_event(struct intel_iommu *iommu,
struct page_req_dsc *req, int result)
{
struct qi_desc desc = { };
pr_err("%s: Invalid page request: %08llx %08llx\n",
iommu->name, ((unsigned long long *)req)[0],
((unsigned long long *)req)[1]);
if (!req->lpig)
return;
desc.qw0 = QI_PGRP_PASID(req->pasid) |
QI_PGRP_DID(req->rid) |
QI_PGRP_PASID_P(req->pasid_present) |
QI_PGRP_RESP_CODE(result) |
QI_PGRP_RESP_TYPE;
desc.qw1 = QI_PGRP_IDX(req->prg_index) |
QI_PGRP_LPIG(req->lpig);
qi_submit_sync(iommu, &desc, 1, 0);
}
static int prq_to_iommu_prot(struct page_req_dsc *req)
{
int prot = 0;
if (req->rd_req)
prot |= IOMMU_FAULT_PERM_READ;
if (req->wr_req)
prot |= IOMMU_FAULT_PERM_WRITE;
if (req->exe_req)
prot |= IOMMU_FAULT_PERM_EXEC;
if (req->pm_req)
prot |= IOMMU_FAULT_PERM_PRIV;
return prot;
}
static void intel_prq_report(struct intel_iommu *iommu, struct device *dev,
struct page_req_dsc *desc)
{
struct iopf_fault event = { };
/* Fill in event data for device specific processing */
event.fault.type = IOMMU_FAULT_PAGE_REQ;
event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
event.fault.prm.pasid = desc->pasid;
event.fault.prm.grpid = desc->prg_index;
event.fault.prm.perm = prq_to_iommu_prot(desc);
if (desc->lpig)
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
if (desc->pasid_present) {
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
}
iommu_report_device_fault(dev, &event);
}
static irqreturn_t prq_event_thread(int irq, void *d)
{
struct intel_iommu *iommu = d;
struct page_req_dsc *req;
int head, tail, handled;
struct device *dev;
u64 address;
/*
* Clear PPR bit before reading head/tail registers, to ensure that
* we get a new interrupt if needed.
*/
writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
handled = (head != tail);
while (head != tail) {
req = &iommu->prq[head / sizeof(*req)];
address = (u64)req->addr << VTD_PAGE_SHIFT;
if (unlikely(!is_canonical_address(address))) {
pr_err("IOMMU: %s: Address is not canonical\n",
iommu->name);
bad_req:
handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
goto prq_advance;
}
if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
pr_err("IOMMU: %s: Page request in Privilege Mode\n",
iommu->name);
goto bad_req;
}
if (unlikely(req->exe_req && req->rd_req)) {
pr_err("IOMMU: %s: Execution request not supported\n",
iommu->name);
goto bad_req;
}
/* Drop Stop Marker message. No need for a response. */
if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
goto prq_advance;
/*
* If prq is to be handled outside iommu driver via receiver of
* the fault notifiers, we skip the page response here.
*/
mutex_lock(&iommu->iopf_lock);
dev = device_rbtree_find(iommu, req->rid);
if (!dev) {
mutex_unlock(&iommu->iopf_lock);
goto bad_req;
}
intel_prq_report(iommu, dev, req);
trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
req->qw_2, req->qw_3,
iommu->prq_seq_number++);
mutex_unlock(&iommu->iopf_lock);
prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
/*
* Clear the page request overflow bit and wake up all threads that
* are waiting for the completion of this handling.
*/
if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
iommu->name);
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
if (head == tail) {
iopf_queue_discard_partial(iommu->iopf_queue);
writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
iommu->name);
}
}
if (!completion_done(&iommu->prq_complete))
complete(&iommu->prq_complete);
return IRQ_RETVAL(handled);
}
int intel_iommu_enable_prq(struct intel_iommu *iommu)
{
struct iopf_queue *iopfq;
int irq, ret;
iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER);
if (!iommu->prq) {
pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
iommu->name);
return -ENOMEM;
}
irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
if (irq <= 0) {
pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
iommu->name);
ret = -EINVAL;
goto free_prq;
}
iommu->pr_irq = irq;
snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
"dmar%d-iopfq", iommu->seq_id);
iopfq = iopf_queue_alloc(iommu->iopfq_name);
if (!iopfq) {
pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
ret = -ENOMEM;
goto free_hwirq;
}
iommu->iopf_queue = iopfq;
snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
iommu->prq_name, iommu);
if (ret) {
pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
iommu->name);
goto free_iopfq;
}
dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
init_completion(&iommu->prq_complete);
return 0;
free_iopfq:
iopf_queue_free(iommu->iopf_queue);
iommu->iopf_queue = NULL;
free_hwirq:
dmar_free_hwirq(irq);
iommu->pr_irq = 0;
free_prq:
iommu_free_pages(iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
return ret;
}
int intel_iommu_finish_prq(struct intel_iommu *iommu)
{
dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
if (iommu->pr_irq) {
free_irq(iommu->pr_irq, iommu);
dmar_free_hwirq(iommu->pr_irq);
iommu->pr_irq = 0;
}
if (iommu->iopf_queue) {
iopf_queue_free(iommu->iopf_queue);
iommu->iopf_queue = NULL;
}
iommu_free_pages(iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
return 0;
}
void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *msg)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
u8 bus = info->bus, devfn = info->devfn;
struct iommu_fault_page_request *prm;
struct qi_desc desc;
bool pasid_present;
bool last_page;
u16 sid;
prm = &evt->fault.prm;
sid = PCI_DEVID(bus, devfn);
pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
QI_PGRP_PASID_P(pasid_present) |
QI_PGRP_RESP_CODE(msg->code) |
QI_PGRP_RESP_TYPE;
desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
desc.qw2 = 0;
desc.qw3 = 0;
qi_submit_sync(iommu, &desc, 1, 0);
}

View File

@ -25,92 +25,6 @@
#include "../iommu-pages.h"
#include "trace.h"
static irqreturn_t prq_event_thread(int irq, void *d);
int intel_svm_enable_prq(struct intel_iommu *iommu)
{
struct iopf_queue *iopfq;
int irq, ret;
iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER);
if (!iommu->prq) {
pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
iommu->name);
return -ENOMEM;
}
irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
if (irq <= 0) {
pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
iommu->name);
ret = -EINVAL;
goto free_prq;
}
iommu->pr_irq = irq;
snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
"dmar%d-iopfq", iommu->seq_id);
iopfq = iopf_queue_alloc(iommu->iopfq_name);
if (!iopfq) {
pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
ret = -ENOMEM;
goto free_hwirq;
}
iommu->iopf_queue = iopfq;
snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
iommu->prq_name, iommu);
if (ret) {
pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
iommu->name);
goto free_iopfq;
}
dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
init_completion(&iommu->prq_complete);
return 0;
free_iopfq:
iopf_queue_free(iommu->iopf_queue);
iommu->iopf_queue = NULL;
free_hwirq:
dmar_free_hwirq(irq);
iommu->pr_irq = 0;
free_prq:
iommu_free_pages(iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
return ret;
}
int intel_svm_finish_prq(struct intel_iommu *iommu)
{
dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
if (iommu->pr_irq) {
free_irq(iommu->pr_irq, iommu);
dmar_free_hwirq(iommu->pr_irq);
iommu->pr_irq = 0;
}
if (iommu->iopf_queue) {
iopf_queue_free(iommu->iopf_queue);
iommu->iopf_queue = NULL;
}
iommu_free_pages(iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
return 0;
}
void intel_svm_check(struct intel_iommu *iommu)
{
if (!pasid_supported(iommu))
@ -197,360 +111,37 @@ static const struct mmu_notifier_ops intel_mmuops = {
};
static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
struct device *dev, ioasid_t pasid,
struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu = info->iommu;
struct mm_struct *mm = domain->mm;
struct dev_pasid_info *dev_pasid;
unsigned long sflags;
unsigned long flags;
int ret = 0;
dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
if (!dev_pasid)
return -ENOMEM;
dev_pasid->dev = dev;
dev_pasid->pasid = pasid;
ret = cache_tag_assign_domain(to_dmar_domain(domain), dev, pasid);
if (ret)
goto free_dev_pasid;
dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
if (IS_ERR(dev_pasid))
return PTR_ERR(dev_pasid);
/* Setup the pasid table: */
sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid,
FLPT_DEFAULT_DID, sflags);
ret = __domain_setup_first_level(iommu, dev, pasid,
FLPT_DEFAULT_DID, mm->pgd,
sflags, old);
if (ret)
goto unassign_tag;
goto out_remove_dev_pasid;
spin_lock_irqsave(&dmar_domain->lock, flags);
list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
domain_remove_dev_pasid(old, dev, pasid);
return 0;
unassign_tag:
cache_tag_unassign_domain(to_dmar_domain(domain), dev, pasid);
free_dev_pasid:
kfree(dev_pasid);
out_remove_dev_pasid:
domain_remove_dev_pasid(domain, dev, pasid);
return ret;
}
/* Page request queue descriptor */
struct page_req_dsc {
union {
struct {
u64 type:8;
u64 pasid_present:1;
u64 rsvd:7;
u64 rid:16;
u64 pasid:20;
u64 exe_req:1;
u64 pm_req:1;
u64 rsvd2:10;
};
u64 qw_0;
};
union {
struct {
u64 rd_req:1;
u64 wr_req:1;
u64 lpig:1;
u64 prg_index:9;
u64 addr:52;
};
u64 qw_1;
};
u64 qw_2;
u64 qw_3;
};
static bool is_canonical_address(u64 addr)
{
int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
long saddr = (long) addr;
return (((saddr << shift) >> shift) == saddr);
}
/**
* intel_drain_pasid_prq - Drain page requests and responses for a pasid
* @dev: target device
* @pasid: pasid for draining
*
* Drain all pending page requests and responses related to @pasid in both
* software and hardware. This is supposed to be called after the device
* driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
* and DevTLB have been invalidated.
*
* It waits until all pending page requests for @pasid in the page fault
* queue are completed by the prq handling thread. Then follow the steps
* described in VT-d spec CH7.10 to drain all page requests and page
* responses pending in the hardware.
*/
void intel_drain_pasid_prq(struct device *dev, u32 pasid)
{
struct device_domain_info *info;
struct dmar_domain *domain;
struct intel_iommu *iommu;
struct qi_desc desc[3];
struct pci_dev *pdev;
int head, tail;
u16 sid, did;
int qdep;
info = dev_iommu_priv_get(dev);
if (WARN_ON(!info || !dev_is_pci(dev)))
return;
if (!info->pri_enabled)
return;
iommu = info->iommu;
domain = info->domain;
pdev = to_pci_dev(dev);
sid = PCI_DEVID(info->bus, info->devfn);
did = domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID;
qdep = pci_ats_queue_depth(pdev);
/*
* Check and wait until all pending page requests in the queue are
* handled by the prq handling thread.
*/
prq_retry:
reinit_completion(&iommu->prq_complete);
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
while (head != tail) {
struct page_req_dsc *req;
req = &iommu->prq[head / sizeof(*req)];
if (!req->pasid_present || req->pasid != pasid) {
head = (head + sizeof(*req)) & PRQ_RING_MASK;
continue;
}
wait_for_completion(&iommu->prq_complete);
goto prq_retry;
}
iopf_queue_flush_dev(dev);
/*
* Perform steps described in VT-d spec CH7.10 to drain page
* requests and responses in hardware.
*/
memset(desc, 0, sizeof(desc));
desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
QI_IWD_FENCE |
QI_IWD_TYPE;
desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
QI_EIOTLB_DID(did) |
QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
QI_EIOTLB_TYPE;
desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
QI_DEV_EIOTLB_SID(sid) |
QI_DEV_EIOTLB_QDEP(qdep) |
QI_DEIOTLB_TYPE |
QI_DEV_IOTLB_PFSID(info->pfsid);
qi_retry:
reinit_completion(&iommu->prq_complete);
qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
wait_for_completion(&iommu->prq_complete);
goto qi_retry;
}
}
static int prq_to_iommu_prot(struct page_req_dsc *req)
{
int prot = 0;
if (req->rd_req)
prot |= IOMMU_FAULT_PERM_READ;
if (req->wr_req)
prot |= IOMMU_FAULT_PERM_WRITE;
if (req->exe_req)
prot |= IOMMU_FAULT_PERM_EXEC;
if (req->pm_req)
prot |= IOMMU_FAULT_PERM_PRIV;
return prot;
}
static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
struct page_req_dsc *desc)
{
struct iopf_fault event = { };
/* Fill in event data for device specific processing */
event.fault.type = IOMMU_FAULT_PAGE_REQ;
event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
event.fault.prm.pasid = desc->pasid;
event.fault.prm.grpid = desc->prg_index;
event.fault.prm.perm = prq_to_iommu_prot(desc);
if (desc->lpig)
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
if (desc->pasid_present) {
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
}
iommu_report_device_fault(dev, &event);
}
static void handle_bad_prq_event(struct intel_iommu *iommu,
struct page_req_dsc *req, int result)
{
struct qi_desc desc = { };
pr_err("%s: Invalid page request: %08llx %08llx\n",
iommu->name, ((unsigned long long *)req)[0],
((unsigned long long *)req)[1]);
if (!req->lpig)
return;
desc.qw0 = QI_PGRP_PASID(req->pasid) |
QI_PGRP_DID(req->rid) |
QI_PGRP_PASID_P(req->pasid_present) |
QI_PGRP_RESP_CODE(result) |
QI_PGRP_RESP_TYPE;
desc.qw1 = QI_PGRP_IDX(req->prg_index) |
QI_PGRP_LPIG(req->lpig);
qi_submit_sync(iommu, &desc, 1, 0);
}
static irqreturn_t prq_event_thread(int irq, void *d)
{
struct intel_iommu *iommu = d;
struct page_req_dsc *req;
int head, tail, handled;
struct device *dev;
u64 address;
/*
* Clear PPR bit before reading head/tail registers, to ensure that
* we get a new interrupt if needed.
*/
writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
handled = (head != tail);
while (head != tail) {
req = &iommu->prq[head / sizeof(*req)];
address = (u64)req->addr << VTD_PAGE_SHIFT;
if (unlikely(!req->pasid_present)) {
pr_err("IOMMU: %s: Page request without PASID\n",
iommu->name);
bad_req:
handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
goto prq_advance;
}
if (unlikely(!is_canonical_address(address))) {
pr_err("IOMMU: %s: Address is not canonical\n",
iommu->name);
goto bad_req;
}
if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
pr_err("IOMMU: %s: Page request in Privilege Mode\n",
iommu->name);
goto bad_req;
}
if (unlikely(req->exe_req && req->rd_req)) {
pr_err("IOMMU: %s: Execution request not supported\n",
iommu->name);
goto bad_req;
}
/* Drop Stop Marker message. No need for a response. */
if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
goto prq_advance;
/*
* If prq is to be handled outside iommu driver via receiver of
* the fault notifiers, we skip the page response here.
*/
mutex_lock(&iommu->iopf_lock);
dev = device_rbtree_find(iommu, req->rid);
if (!dev) {
mutex_unlock(&iommu->iopf_lock);
goto bad_req;
}
intel_svm_prq_report(iommu, dev, req);
trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
req->qw_2, req->qw_3,
iommu->prq_seq_number++);
mutex_unlock(&iommu->iopf_lock);
prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
/*
* Clear the page request overflow bit and wake up all threads that
* are waiting for the completion of this handling.
*/
if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
iommu->name);
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
if (head == tail) {
iopf_queue_discard_partial(iommu->iopf_queue);
writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
iommu->name);
}
}
if (!completion_done(&iommu->prq_complete))
complete(&iommu->prq_complete);
return IRQ_RETVAL(handled);
}
void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *msg)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
u8 bus = info->bus, devfn = info->devfn;
struct iommu_fault_page_request *prm;
struct qi_desc desc;
bool pasid_present;
bool last_page;
u16 sid;
prm = &evt->fault.prm;
sid = PCI_DEVID(bus, devfn);
pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
QI_PGRP_PASID_P(pasid_present) |
QI_PGRP_RESP_CODE(msg->code) |
QI_PGRP_RESP_TYPE;
desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
desc.qw2 = 0;
desc.qw3 = 0;
qi_submit_sync(iommu, &desc, 1, 0);
}
static void intel_svm_domain_free(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);

View File

@ -166,7 +166,6 @@ struct arm_v7s_io_pgtable {
arm_v7s_iopte *pgd;
struct kmem_cache *l2_tables;
spinlock_t split_lock;
};
static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl);
@ -363,25 +362,6 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
return pte;
}
static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl)
{
int prot = IOMMU_READ;
arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl);
if (!(attr & ARM_V7S_PTE_AP_RDONLY))
prot |= IOMMU_WRITE;
if (!(attr & ARM_V7S_PTE_AP_UNPRIV))
prot |= IOMMU_PRIV;
if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0)
prot |= IOMMU_MMIO;
else if (pte & ARM_V7S_ATTR_C)
prot |= IOMMU_CACHE;
if (pte & ARM_V7S_ATTR_XN(lvl))
prot |= IOMMU_NOEXEC;
return prot;
}
static arm_v7s_iopte arm_v7s_pte_to_cont(arm_v7s_iopte pte, int lvl)
{
if (lvl == 1) {
@ -398,23 +378,6 @@ static arm_v7s_iopte arm_v7s_pte_to_cont(arm_v7s_iopte pte, int lvl)
return pte;
}
static arm_v7s_iopte arm_v7s_cont_to_pte(arm_v7s_iopte pte, int lvl)
{
if (lvl == 1) {
pte &= ~ARM_V7S_CONT_SECTION;
} else if (lvl == 2) {
arm_v7s_iopte xn = pte & BIT(ARM_V7S_CONT_PAGE_XN_SHIFT);
arm_v7s_iopte tex = pte & (ARM_V7S_CONT_PAGE_TEX_MASK <<
ARM_V7S_CONT_PAGE_TEX_SHIFT);
pte ^= xn | tex | ARM_V7S_PTE_TYPE_CONT_PAGE;
pte |= (xn >> ARM_V7S_CONT_PAGE_XN_SHIFT) |
(tex >> ARM_V7S_CONT_PAGE_TEX_SHIFT) |
ARM_V7S_PTE_TYPE_PAGE;
}
return pte;
}
static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
{
if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte, lvl))
@ -591,77 +554,6 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop)
kfree(data);
}
static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
unsigned long iova, int idx, int lvl,
arm_v7s_iopte *ptep)
{
struct io_pgtable *iop = &data->iop;
arm_v7s_iopte pte;
size_t size = ARM_V7S_BLOCK_SIZE(lvl);
int i;
/* Check that we didn't lose a race to get the lock */
pte = *ptep;
if (!arm_v7s_pte_is_cont(pte, lvl))
return pte;
ptep -= idx & (ARM_V7S_CONT_PAGES - 1);
pte = arm_v7s_cont_to_pte(pte, lvl);
for (i = 0; i < ARM_V7S_CONT_PAGES; i++)
ptep[i] = pte + i * size;
__arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
size *= ARM_V7S_CONT_PAGES;
io_pgtable_tlb_flush_walk(iop, iova, size, size);
return pte;
}
static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size,
arm_v7s_iopte blk_pte,
arm_v7s_iopte *ptep)
{
struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_v7s_iopte pte, *tablep;
int i, unmap_idx, num_entries, num_ptes;
tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data);
if (!tablep)
return 0; /* Bytes unmapped */
num_ptes = ARM_V7S_PTES_PER_LVL(2, cfg);
num_entries = size >> ARM_V7S_LVL_SHIFT(2);
unmap_idx = ARM_V7S_LVL_IDX(iova, 2, cfg);
pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
if (num_entries > 1)
pte = arm_v7s_pte_to_cont(pte, 2);
for (i = 0; i < num_ptes; i += num_entries, pte += size) {
/* Unmap! */
if (i == unmap_idx)
continue;
__arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg);
}
pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg);
if (pte != blk_pte) {
__arm_v7s_free_table(tablep, 2, data);
if (!ARM_V7S_PTE_IS_TABLE(pte, 1))
return 0;
tablep = iopte_deref(pte, 1, data);
return __arm_v7s_unmap(data, gather, iova, size, 2, tablep);
}
io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
return size;
}
static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, int lvl,
@ -694,11 +586,8 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
* case in a lock for the sake of correctness and be done with it.
*/
if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) {
unsigned long flags;
spin_lock_irqsave(&data->split_lock, flags);
pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep);
spin_unlock_irqrestore(&data->split_lock, flags);
WARN_ONCE(true, "Unmap of a partial large IOPTE is not allowed");
return 0;
}
/* If the size matches this level, we're in the right place */
@ -721,12 +610,8 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
}
return size;
} else if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte[0], lvl)) {
/*
* Insert a table at the next level to map the old region,
* minus the part we want to unmap
*/
return arm_v7s_split_blk_unmap(data, gather, iova, size, pte[0],
ptep);
WARN_ONCE(true, "Unmap of a partial large IOPTE is not allowed");
return 0;
}
/* Keep on walkin' */
@ -811,8 +696,6 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
if (!data)
return NULL;
spin_lock_init(&data->split_lock);
/*
* ARM_MTK_TTBR_EXT extend the translation table base support larger
* memory address.
@ -936,8 +819,8 @@ static int __init arm_v7s_do_selftests(void)
.quirks = IO_PGTABLE_QUIRK_ARM_NS,
.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
};
unsigned int iova, size, iova_start;
unsigned int i, loopnr = 0;
unsigned int iova, size;
unsigned int i;
size_t mapped;
selftest_running = true;
@ -985,26 +868,6 @@ static int __init arm_v7s_do_selftests(void)
return __FAIL(ops);
iova += SZ_16M;
loopnr++;
}
/* Partial unmap */
i = 1;
size = 1UL << __ffs(cfg.pgsize_bitmap);
while (i < loopnr) {
iova_start = i * SZ_16M;
if (ops->unmap_pages(ops, iova_start + size, size, 1, NULL) != size)
return __FAIL(ops);
/* Remap of partial unmap */
if (ops->map_pages(ops, iova_start + size, size, size, 1,
IOMMU_READ, GFP_KERNEL, &mapped))
return __FAIL(ops);
if (ops->iova_to_phys(ops, iova_start + size + 42)
!= (size + 42))
return __FAIL(ops);
i++;
}
/* Full unmap */

View File

@ -211,6 +211,18 @@ static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
}
/*
* Convert an index returned by ARM_LPAE_PGD_IDX(), which can point into
* a concatenated PGD, into the maximum number of entries that can be
* mapped in the same table page.
*/
static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data)
{
int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
return ptes_per_table - (i & (ptes_per_table - 1));
}
static bool selftest_running = false;
static dma_addr_t __arm_lpae_dma_addr(void *pages)
@ -402,7 +414,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
/* If we can install a leaf entry at this level, then do so */
if (size == block_size) {
max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
max_entries = arm_lpae_max_entries(map_idx_start, data);
num_entries = min_t(int, pgcount, max_entries);
ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
if (!ret)
@ -585,66 +597,6 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
kfree(data);
}
static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size,
arm_lpae_iopte blk_pte, int lvl,
arm_lpae_iopte *ptep, size_t pgcount)
{
struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte pte, *tablep;
phys_addr_t blk_paddr;
size_t tablesz = ARM_LPAE_GRANULE(data);
size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
int i, unmap_idx_start = -1, num_entries = 0, max_entries;
if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
return 0;
tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg, data->iop.cookie);
if (!tablep)
return 0; /* Bytes unmapped */
if (size == split_sz) {
unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
max_entries = ptes_per_table - unmap_idx_start;
num_entries = min_t(int, pgcount, max_entries);
}
blk_paddr = iopte_to_paddr(blk_pte, data);
pte = iopte_prot(blk_pte);
for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) {
/* Unmap! */
if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries))
continue;
__arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
}
pte = arm_lpae_install_table(tablep, ptep, blk_pte, data);
if (pte != blk_pte) {
__arm_lpae_free_pages(tablep, tablesz, cfg, data->iop.cookie);
/*
* We may race against someone unmapping another part of this
* block, but anything else is invalid. We can't misinterpret
* a page entry here since we're never at the last level.
*/
if (iopte_type(pte) != ARM_LPAE_PTE_TYPE_TABLE)
return 0;
tablep = iopte_deref(pte, data);
} else if (unmap_idx_start >= 0) {
for (i = 0; i < num_entries; i++)
io_pgtable_tlb_add_page(&data->iop, gather, iova + i * size, size);
return num_entries * size;
}
return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
}
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, size_t pgcount,
@ -666,7 +618,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
/* If the size matches this level, we're in the right place */
if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
max_entries = arm_lpae_max_entries(unmap_idx_start, data);
num_entries = min_t(int, pgcount, max_entries);
/* Find and handle non-leaf entries */
@ -694,12 +646,8 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
return i * size;
} else if (iopte_leaf(pte, lvl, iop->fmt)) {
/*
* Insert a table at the next level to map the old region,
* minus the part we want to unmap
*/
return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
lvl + 1, ptep, pgcount);
WARN_ONCE(true, "Unmap of a partial large IOPTE is not allowed");
return 0;
}
/* Keep on walkin' */
@ -1362,19 +1310,6 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
iova += SZ_1G;
}
/* Partial unmap */
size = 1UL << __ffs(cfg->pgsize_bitmap);
if (ops->unmap_pages(ops, SZ_1G + size, size, 1, NULL) != size)
return __FAIL(ops, i);
/* Remap of partial unmap */
if (ops->map_pages(ops, SZ_1G + size, size, size, 1,
IOMMU_READ, GFP_KERNEL, &mapped))
return __FAIL(ops, i);
if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42))
return __FAIL(ops, i);
/* Full unmap */
iova = 0;
for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
@ -1397,6 +1332,23 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
iova += SZ_1G;
}
/*
* Map/unmap the last largest supported page of the IAS, this can
* trigger corner cases in the concatednated page tables.
*/
mapped = 0;
size = 1UL << __fls(cfg->pgsize_bitmap);
iova = (1UL << cfg->ias) - size;
if (ops->map_pages(ops, iova, iova, size, 1,
IOMMU_READ | IOMMU_WRITE |
IOMMU_NOEXEC | IOMMU_CACHE,
GFP_KERNEL, &mapped))
return __FAIL(ops, i);
if (mapped != size)
return __FAIL(ops, i);
if (ops->unmap_pages(ops, iova, size, 1, NULL) != size)
return __FAIL(ops, i);
free_io_pgtable_ops(ops);
}

View File

@ -34,7 +34,7 @@ static void release_device(struct device *dev)
kfree(dev);
}
static struct class iommu_class = {
static const struct class iommu_class = {
.name = "iommu",
.dev_release = release_device,
.dev_groups = dev_groups,

View File

@ -32,6 +32,7 @@
#include <trace/events/iommu.h>
#include <linux/sched/mm.h>
#include <linux/msi.h>
#include <uapi/linux/iommufd.h>
#include "dma-iommu.h"
#include "iommu-priv.h"
@ -90,15 +91,17 @@ static const char * const iommu_group_resv_type_string[] = {
#define IOMMU_CMD_LINE_DMA_API BIT(0)
#define IOMMU_CMD_LINE_STRICT BIT(1)
static int bus_iommu_probe(const struct bus_type *bus);
static int iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data);
static void iommu_release_device(struct device *dev);
static struct iommu_domain *
__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type);
static int __iommu_attach_device(struct iommu_domain *domain,
struct device *dev);
static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group);
static struct iommu_domain *__iommu_paging_domain_alloc_flags(struct device *dev,
unsigned int type,
unsigned int flags);
enum {
IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0,
@ -133,6 +136,8 @@ static struct group_device *iommu_group_alloc_device(struct iommu_group *group,
struct device *dev);
static void __iommu_group_free_device(struct iommu_group *group,
struct group_device *grp_dev);
static void iommu_domain_init(struct iommu_domain *domain, unsigned int type,
const struct iommu_ops *ops);
#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \
struct iommu_group_attribute iommu_group_attr_##_name = \
@ -1141,10 +1146,6 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
}
}
if (!list_empty(&mappings) && iommu_is_dma_domain(domain))
iommu_flush_iotlb_all(domain);
out:
iommu_put_resv_regions(dev, &mappings);
@ -1586,12 +1587,59 @@ struct iommu_group *fsl_mc_device_group(struct device *dev)
}
EXPORT_SYMBOL_GPL(fsl_mc_device_group);
static struct iommu_domain *__iommu_alloc_identity_domain(struct device *dev)
{
const struct iommu_ops *ops = dev_iommu_ops(dev);
struct iommu_domain *domain;
if (ops->identity_domain)
return ops->identity_domain;
/* Older drivers create the identity domain via ops->domain_alloc() */
if (!ops->domain_alloc)
return ERR_PTR(-EOPNOTSUPP);
domain = ops->domain_alloc(IOMMU_DOMAIN_IDENTITY);
if (IS_ERR(domain))
return domain;
if (!domain)
return ERR_PTR(-ENOMEM);
iommu_domain_init(domain, IOMMU_DOMAIN_IDENTITY, ops);
return domain;
}
static struct iommu_domain *
__iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
{
struct device *dev = iommu_group_first_dev(group);
struct iommu_domain *dom;
if (group->default_domain && group->default_domain->type == req_type)
return group->default_domain;
return __iommu_group_domain_alloc(group, req_type);
/*
* When allocating the DMA API domain assume that the driver is going to
* use PASID and make sure the RID's domain is PASID compatible.
*/
if (req_type & __IOMMU_DOMAIN_PAGING) {
dom = __iommu_paging_domain_alloc_flags(dev, req_type,
dev->iommu->max_pasids ? IOMMU_HWPT_ALLOC_PASID : 0);
/*
* If driver does not support PASID feature then
* try to allocate non-PASID domain
*/
if (PTR_ERR(dom) == -EOPNOTSUPP)
dom = __iommu_paging_domain_alloc_flags(dev, req_type, 0);
return dom;
}
if (req_type == IOMMU_DOMAIN_IDENTITY)
return __iommu_alloc_identity_domain(dev);
return ERR_PTR(-EINVAL);
}
/*
@ -1795,7 +1843,7 @@ static void iommu_group_do_probe_finalize(struct device *dev)
ops->probe_finalize(dev);
}
int bus_iommu_probe(const struct bus_type *bus)
static int bus_iommu_probe(const struct bus_type *bus)
{
struct iommu_group *group, *next;
LIST_HEAD(group_list);
@ -1840,31 +1888,6 @@ int bus_iommu_probe(const struct bus_type *bus)
return 0;
}
/**
* iommu_present() - make platform-specific assumptions about an IOMMU
* @bus: bus to check
*
* Do not use this function. You want device_iommu_mapped() instead.
*
* Return: true if some IOMMU is present and aware of devices on the given bus;
* in general it may not be the only IOMMU, and it may not have anything to do
* with whatever device you are ultimately interested in.
*/
bool iommu_present(const struct bus_type *bus)
{
bool ret = false;
for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
if (iommu_buses[i] == bus) {
spin_lock(&iommu_device_lock);
ret = !list_empty(&iommu_device_list);
spin_unlock(&iommu_device_lock);
}
}
return ret;
}
EXPORT_SYMBOL_GPL(iommu_present);
/**
* device_iommu_capable() - check for a general IOMMU capability
* @dev: device to which the capability would be relevant, if available
@ -1934,117 +1957,67 @@ void iommu_set_fault_handler(struct iommu_domain *domain,
}
EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops,
struct device *dev,
unsigned int type)
static void iommu_domain_init(struct iommu_domain *domain, unsigned int type,
const struct iommu_ops *ops)
{
struct iommu_domain *domain;
unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS;
if (alloc_type == IOMMU_DOMAIN_IDENTITY && ops->identity_domain)
return ops->identity_domain;
else if (alloc_type == IOMMU_DOMAIN_BLOCKED && ops->blocked_domain)
return ops->blocked_domain;
else if (type & __IOMMU_DOMAIN_PAGING && ops->domain_alloc_paging)
domain = ops->domain_alloc_paging(dev);
else if (ops->domain_alloc)
domain = ops->domain_alloc(alloc_type);
else
return ERR_PTR(-EOPNOTSUPP);
/*
* Many domain_alloc ops now return ERR_PTR, make things easier for the
* driver by accepting ERR_PTR from all domain_alloc ops instead of
* having two rules.
*/
if (IS_ERR(domain))
return domain;
if (!domain)
return ERR_PTR(-ENOMEM);
domain->type = type;
domain->owner = ops;
if (!domain->ops)
domain->ops = ops->default_domain_ops;
/*
* If not already set, assume all sizes by default; the driver
* may override this later
*/
if (!domain->pgsize_bitmap)
domain->pgsize_bitmap = ops->pgsize_bitmap;
if (!domain->ops)
domain->ops = ops->default_domain_ops;
if (iommu_is_dma_domain(domain)) {
int rc;
rc = iommu_get_dma_cookie(domain);
if (rc) {
iommu_domain_free(domain);
return ERR_PTR(rc);
}
}
return domain;
}
static struct iommu_domain *
__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type)
__iommu_paging_domain_alloc_flags(struct device *dev, unsigned int type,
unsigned int flags)
{
struct device *dev = iommu_group_first_dev(group);
return __iommu_domain_alloc(dev_iommu_ops(dev), dev, type);
}
static int __iommu_domain_alloc_dev(struct device *dev, void *data)
{
const struct iommu_ops **ops = data;
if (!dev_has_iommu(dev))
return 0;
if (WARN_ONCE(*ops && *ops != dev_iommu_ops(dev),
"Multiple IOMMU drivers present for bus %s, which the public IOMMU API can't fully support yet. You will still need to disable one or more for this to work, sorry!\n",
dev_bus_name(dev)))
return -EBUSY;
*ops = dev_iommu_ops(dev);
return 0;
}
/*
* The iommu ops in bus has been retired. Do not use this interface in
* new drivers.
*/
struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
{
const struct iommu_ops *ops = NULL;
int err = bus_for_each_dev(bus, NULL, &ops, __iommu_domain_alloc_dev);
const struct iommu_ops *ops;
struct iommu_domain *domain;
if (err || !ops)
return NULL;
domain = __iommu_domain_alloc(ops, NULL, IOMMU_DOMAIN_UNMANAGED);
if (IS_ERR(domain))
return NULL;
return domain;
}
EXPORT_SYMBOL_GPL(iommu_domain_alloc);
/**
* iommu_paging_domain_alloc() - Allocate a paging domain
* @dev: device for which the domain is allocated
*
* Allocate a paging domain which will be managed by a kernel driver. Return
* allocated domain if successful, or a ERR pointer for failure.
*/
struct iommu_domain *iommu_paging_domain_alloc(struct device *dev)
{
if (!dev_has_iommu(dev))
return ERR_PTR(-ENODEV);
return __iommu_domain_alloc(dev_iommu_ops(dev), dev, IOMMU_DOMAIN_UNMANAGED);
ops = dev_iommu_ops(dev);
if (ops->domain_alloc_paging && !flags)
domain = ops->domain_alloc_paging(dev);
else if (ops->domain_alloc_user)
domain = ops->domain_alloc_user(dev, flags, NULL, NULL);
else if (ops->domain_alloc && !flags)
domain = ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED);
else
return ERR_PTR(-EOPNOTSUPP);
if (IS_ERR(domain))
return domain;
if (!domain)
return ERR_PTR(-ENOMEM);
iommu_domain_init(domain, type, ops);
return domain;
}
EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc);
/**
* iommu_paging_domain_alloc_flags() - Allocate a paging domain
* @dev: device for which the domain is allocated
* @flags: Bitmap of iommufd_hwpt_alloc_flags
*
* Allocate a paging domain which will be managed by a kernel driver. Return
* allocated domain if successful, or an ERR pointer for failure.
*/
struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev,
unsigned int flags)
{
return __iommu_paging_domain_alloc_flags(dev,
IOMMU_DOMAIN_UNMANAGED, flags);
}
EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags);
void iommu_domain_free(struct iommu_domain *domain)
{
@ -2216,8 +2189,8 @@ EXPORT_SYMBOL_GPL(iommu_attach_group);
/**
* iommu_group_replace_domain - replace the domain that a group is attached to
* @new_domain: new IOMMU domain to replace with
* @group: IOMMU group that will be attached to the new domain
* @new_domain: new IOMMU domain to replace with
*
* This API allows the group to switch domains without being forced to go to
* the blocking domain in-between.
@ -2586,6 +2559,20 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
return unmapped;
}
/**
* iommu_unmap() - Remove mappings from a range of IOVA
* @domain: Domain to manipulate
* @iova: IO virtual address to start
* @size: Length of the range starting from @iova
*
* iommu_unmap() will remove a translation created by iommu_map(). It cannot
* subdivide a mapping created by iommu_map(), so it should be called with IOVA
* ranges that match what was passed to iommu_map(). The range can aggregate
* contiguous iommu_map() calls so long as no individual range is split.
*
* Returns: Number of bytes of IOVA unmapped. iova + res will be the point
* unmapping stopped.
*/
size_t iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
@ -2955,6 +2942,14 @@ static int iommu_setup_default_domain(struct iommu_group *group,
if (group->default_domain == dom)
return 0;
if (iommu_is_dma_domain(dom)) {
ret = iommu_get_dma_cookie(dom);
if (ret) {
iommu_domain_free(dom);
return ret;
}
}
/*
* IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be
* mapped before their device is attached, in order to guarantee
@ -3142,22 +3137,25 @@ void iommu_device_unuse_default_domain(struct device *dev)
static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
{
struct device *dev = iommu_group_first_dev(group);
const struct iommu_ops *ops = dev_iommu_ops(dev);
struct iommu_domain *domain;
if (group->blocking_domain)
return 0;
domain = __iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED);
if (IS_ERR(domain)) {
/*
* For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED
* create an empty domain instead.
*/
domain = __iommu_group_domain_alloc(group,
IOMMU_DOMAIN_UNMANAGED);
if (IS_ERR(domain))
return PTR_ERR(domain);
if (ops->blocked_domain) {
group->blocking_domain = ops->blocked_domain;
return 0;
}
/*
* For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED create an
* empty PAGING domain instead.
*/
domain = iommu_paging_domain_alloc(dev);
if (IS_ERR(domain))
return PTR_ERR(domain);
group->blocking_domain = domain;
return 0;
}
@ -3321,7 +3319,8 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
int ret;
for_each_group_device(group, device) {
ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
ret = domain->ops->set_dev_pasid(domain, device->dev,
pasid, NULL);
if (ret)
goto err_revert;
}

View File

@ -110,7 +110,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
const struct iommu_user_data *user_data)
{
const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
IOMMU_HWPT_FAULT_ID_VALID;
const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
struct iommufd_hwpt_paging *hwpt_paging;
struct iommufd_hw_pagetable *hwpt;

View File

@ -506,7 +506,7 @@ __adjust_overlap_range(struct iova *iova,
* reserve_iova - reserves an iova in the given range
* @iovad: - iova domain pointer
* @pfn_lo: - lower page frame address
* @pfn_hi:- higher pfn adderss
* @pfn_hi:- higher pfn address
* This function allocates reserves the address range from pfn_lo to pfn_hi so
* that this address is not dished out as part of alloc_iova.
*/

View File

@ -1599,7 +1599,7 @@ static const unsigned int mt8186_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK
static const struct mtk_iommu_plat_data mt8186_data_mm = {
.m4u_plat = M4U_MT8186,
.flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN |
WR_THROT_EN | IOVA_34_EN | MTK_IOMMU_TYPE_MM,
WR_THROT_EN | IOVA_34_EN | MTK_IOMMU_TYPE_MM | PGTABLE_PA_35_EN,
.larbid_remap = {{0}, {1, MTK_INVALID_LARBID, 8}, {4}, {7}, {2}, {9, 11, 19, 20},
{MTK_INVALID_LARBID, 14, 16},
{MTK_INVALID_LARBID, 13, MTK_INVALID_LARBID, 17}},

View File

@ -1230,25 +1230,24 @@ static int omap_iommu_probe(struct platform_device *pdev)
if (err)
return err;
err = iommu_device_register(&obj->iommu, &omap_iommu_ops, &pdev->dev);
if (err)
goto out_sysfs;
obj->has_iommu_driver = true;
}
err = iommu_device_register(&obj->iommu, &omap_iommu_ops, &pdev->dev);
if (err)
goto out_sysfs;
pm_runtime_enable(obj->dev);
omap_iommu_debugfs_add(obj);
dev_info(&pdev->dev, "%s registered\n", obj->name);
/* Re-probe bus to probe device attached to this IOMMU */
bus_iommu_probe(&platform_bus_type);
return 0;
out_sysfs:
iommu_device_sysfs_remove(&obj->iommu);
if (obj->has_iommu_driver)
iommu_device_sysfs_remove(&obj->iommu);
return err;
}
@ -1256,10 +1255,10 @@ static void omap_iommu_remove(struct platform_device *pdev)
{
struct omap_iommu *obj = platform_get_drvdata(pdev);
if (obj->has_iommu_driver) {
if (obj->has_iommu_driver)
iommu_device_sysfs_remove(&obj->iommu);
iommu_device_unregister(&obj->iommu);
}
iommu_device_unregister(&obj->iommu);
omap_iommu_debugfs_remove(obj);
@ -1723,12 +1722,19 @@ static void omap_iommu_release_device(struct device *dev)
}
static int omap_iommu_of_xlate(struct device *dev, const struct of_phandle_args *args)
{
/* TODO: collect args->np to save re-parsing in probe above */
return 0;
}
static const struct iommu_ops omap_iommu_ops = {
.identity_domain = &omap_iommu_identity_domain,
.domain_alloc_paging = omap_iommu_domain_alloc_paging,
.probe_device = omap_iommu_probe_device,
.release_device = omap_iommu_release_device,
.device_group = generic_single_device_group,
.of_xlate = omap_iommu_of_xlate,
.pgsize_bitmap = OMAP_IOMMU_PGSIZES,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = omap_iommu_attach_dev,

View File

@ -0,0 +1,20 @@
# SPDX-License-Identifier: GPL-2.0-only
# RISC-V IOMMU support
config RISCV_IOMMU
bool "RISC-V IOMMU Support"
depends on RISCV && 64BIT
default y
select IOMMU_API
help
Support for implementations of the RISC-V IOMMU architecture that
complements the RISC-V MMU capabilities, providing similar address
translation and protection functions for accesses from I/O devices.
Say Y here if your SoC includes an IOMMU device implementing
the RISC-V IOMMU architecture.
config RISCV_IOMMU_PCI
def_bool y if RISCV_IOMMU && PCI_MSI
help
Support for the PCIe implementation of RISC-V IOMMU architecture.

View File

@ -0,0 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o
obj-$(CONFIG_RISCV_IOMMU_PCI) += iommu-pci.o

View File

@ -0,0 +1,784 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright © 2022-2024 Rivos Inc.
* Copyright © 2023 FORTH-ICS/CARV
* Copyright © 2023 RISC-V IOMMU Task Group
*
* RISC-V IOMMU - Register Layout and Data Structures.
*
* Based on the 'RISC-V IOMMU Architecture Specification', Version 1.0
* Published at https://github.com/riscv-non-isa/riscv-iommu
*
*/
#ifndef _RISCV_IOMMU_BITS_H_
#define _RISCV_IOMMU_BITS_H_
#include <linux/types.h>
#include <linux/bitfield.h>
#include <linux/bits.h>
/*
* Chapter 5: Memory Mapped register interface
*/
/* Common field positions */
#define RISCV_IOMMU_PPN_FIELD GENMASK_ULL(53, 10)
#define RISCV_IOMMU_QUEUE_LOG2SZ_FIELD GENMASK_ULL(4, 0)
#define RISCV_IOMMU_QUEUE_INDEX_FIELD GENMASK_ULL(31, 0)
#define RISCV_IOMMU_QUEUE_ENABLE BIT(0)
#define RISCV_IOMMU_QUEUE_INTR_ENABLE BIT(1)
#define RISCV_IOMMU_QUEUE_MEM_FAULT BIT(8)
#define RISCV_IOMMU_QUEUE_OVERFLOW BIT(9)
#define RISCV_IOMMU_QUEUE_ACTIVE BIT(16)
#define RISCV_IOMMU_QUEUE_BUSY BIT(17)
#define RISCV_IOMMU_ATP_PPN_FIELD GENMASK_ULL(43, 0)
#define RISCV_IOMMU_ATP_MODE_FIELD GENMASK_ULL(63, 60)
/* 5.3 IOMMU Capabilities (64bits) */
#define RISCV_IOMMU_REG_CAPABILITIES 0x0000
#define RISCV_IOMMU_CAPABILITIES_VERSION GENMASK_ULL(7, 0)
#define RISCV_IOMMU_CAPABILITIES_SV32 BIT_ULL(8)
#define RISCV_IOMMU_CAPABILITIES_SV39 BIT_ULL(9)
#define RISCV_IOMMU_CAPABILITIES_SV48 BIT_ULL(10)
#define RISCV_IOMMU_CAPABILITIES_SV57 BIT_ULL(11)
#define RISCV_IOMMU_CAPABILITIES_SVPBMT BIT_ULL(15)
#define RISCV_IOMMU_CAPABILITIES_SV32X4 BIT_ULL(16)
#define RISCV_IOMMU_CAPABILITIES_SV39X4 BIT_ULL(17)
#define RISCV_IOMMU_CAPABILITIES_SV48X4 BIT_ULL(18)
#define RISCV_IOMMU_CAPABILITIES_SV57X4 BIT_ULL(19)
#define RISCV_IOMMU_CAPABILITIES_AMO_MRIF BIT_ULL(21)
#define RISCV_IOMMU_CAPABILITIES_MSI_FLAT BIT_ULL(22)
#define RISCV_IOMMU_CAPABILITIES_MSI_MRIF BIT_ULL(23)
#define RISCV_IOMMU_CAPABILITIES_AMO_HWAD BIT_ULL(24)
#define RISCV_IOMMU_CAPABILITIES_ATS BIT_ULL(25)
#define RISCV_IOMMU_CAPABILITIES_T2GPA BIT_ULL(26)
#define RISCV_IOMMU_CAPABILITIES_END BIT_ULL(27)
#define RISCV_IOMMU_CAPABILITIES_IGS GENMASK_ULL(29, 28)
#define RISCV_IOMMU_CAPABILITIES_HPM BIT_ULL(30)
#define RISCV_IOMMU_CAPABILITIES_DBG BIT_ULL(31)
#define RISCV_IOMMU_CAPABILITIES_PAS GENMASK_ULL(37, 32)
#define RISCV_IOMMU_CAPABILITIES_PD8 BIT_ULL(38)
#define RISCV_IOMMU_CAPABILITIES_PD17 BIT_ULL(39)
#define RISCV_IOMMU_CAPABILITIES_PD20 BIT_ULL(40)
/**
* enum riscv_iommu_igs_settings - Interrupt Generation Support Settings
* @RISCV_IOMMU_CAPABILITIES_IGS_MSI: IOMMU supports only MSI generation
* @RISCV_IOMMU_CAPABILITIES_IGS_WSI: IOMMU supports only Wired-Signaled interrupt
* @RISCV_IOMMU_CAPABILITIES_IGS_BOTH: IOMMU supports both MSI and WSI generation
* @RISCV_IOMMU_CAPABILITIES_IGS_RSRV: Reserved for standard use
*/
enum riscv_iommu_igs_settings {
RISCV_IOMMU_CAPABILITIES_IGS_MSI = 0,
RISCV_IOMMU_CAPABILITIES_IGS_WSI = 1,
RISCV_IOMMU_CAPABILITIES_IGS_BOTH = 2,
RISCV_IOMMU_CAPABILITIES_IGS_RSRV = 3
};
/* 5.4 Features control register (32bits) */
#define RISCV_IOMMU_REG_FCTL 0x0008
#define RISCV_IOMMU_FCTL_BE BIT(0)
#define RISCV_IOMMU_FCTL_WSI BIT(1)
#define RISCV_IOMMU_FCTL_GXL BIT(2)
/* 5.5 Device-directory-table pointer (64bits) */
#define RISCV_IOMMU_REG_DDTP 0x0010
#define RISCV_IOMMU_DDTP_IOMMU_MODE GENMASK_ULL(3, 0)
#define RISCV_IOMMU_DDTP_BUSY BIT_ULL(4)
#define RISCV_IOMMU_DDTP_PPN RISCV_IOMMU_PPN_FIELD
/**
* enum riscv_iommu_ddtp_modes - IOMMU translation modes
* @RISCV_IOMMU_DDTP_IOMMU_MODE_OFF: No inbound transactions allowed
* @RISCV_IOMMU_DDTP_IOMMU_MODE_BARE: Pass-through mode
* @RISCV_IOMMU_DDTP_IOMMU_MODE_1LVL: One-level DDT
* @RISCV_IOMMU_DDTP_IOMMU_MODE_2LVL: Two-level DDT
* @RISCV_IOMMU_DDTP_IOMMU_MODE_3LVL: Three-level DDT
* @RISCV_IOMMU_DDTP_IOMMU_MODE_MAX: Max value allowed by specification
*/
enum riscv_iommu_ddtp_modes {
RISCV_IOMMU_DDTP_IOMMU_MODE_OFF = 0,
RISCV_IOMMU_DDTP_IOMMU_MODE_BARE = 1,
RISCV_IOMMU_DDTP_IOMMU_MODE_1LVL = 2,
RISCV_IOMMU_DDTP_IOMMU_MODE_2LVL = 3,
RISCV_IOMMU_DDTP_IOMMU_MODE_3LVL = 4,
RISCV_IOMMU_DDTP_IOMMU_MODE_MAX = 4
};
/* 5.6 Command Queue Base (64bits) */
#define RISCV_IOMMU_REG_CQB 0x0018
#define RISCV_IOMMU_CQB_ENTRIES RISCV_IOMMU_QUEUE_LOG2SZ_FIELD
#define RISCV_IOMMU_CQB_PPN RISCV_IOMMU_PPN_FIELD
/* 5.7 Command Queue head (32bits) */
#define RISCV_IOMMU_REG_CQH 0x0020
#define RISCV_IOMMU_CQH_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD
/* 5.8 Command Queue tail (32bits) */
#define RISCV_IOMMU_REG_CQT 0x0024
#define RISCV_IOMMU_CQT_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD
/* 5.9 Fault Queue Base (64bits) */
#define RISCV_IOMMU_REG_FQB 0x0028
#define RISCV_IOMMU_FQB_ENTRIES RISCV_IOMMU_QUEUE_LOG2SZ_FIELD
#define RISCV_IOMMU_FQB_PPN RISCV_IOMMU_PPN_FIELD
/* 5.10 Fault Queue Head (32bits) */
#define RISCV_IOMMU_REG_FQH 0x0030
#define RISCV_IOMMU_FQH_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD
/* 5.11 Fault Queue tail (32bits) */
#define RISCV_IOMMU_REG_FQT 0x0034
#define RISCV_IOMMU_FQT_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD
/* 5.12 Page Request Queue base (64bits) */
#define RISCV_IOMMU_REG_PQB 0x0038
#define RISCV_IOMMU_PQB_ENTRIES RISCV_IOMMU_QUEUE_LOG2SZ_FIELD
#define RISCV_IOMMU_PQB_PPN RISCV_IOMMU_PPN_FIELD
/* 5.13 Page Request Queue head (32bits) */
#define RISCV_IOMMU_REG_PQH 0x0040
#define RISCV_IOMMU_PQH_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD
/* 5.14 Page Request Queue tail (32bits) */
#define RISCV_IOMMU_REG_PQT 0x0044
#define RISCV_IOMMU_PQT_INDEX_MASK RISCV_IOMMU_QUEUE_INDEX_FIELD
/* 5.15 Command Queue CSR (32bits) */
#define RISCV_IOMMU_REG_CQCSR 0x0048
#define RISCV_IOMMU_CQCSR_CQEN RISCV_IOMMU_QUEUE_ENABLE
#define RISCV_IOMMU_CQCSR_CIE RISCV_IOMMU_QUEUE_INTR_ENABLE
#define RISCV_IOMMU_CQCSR_CQMF RISCV_IOMMU_QUEUE_MEM_FAULT
#define RISCV_IOMMU_CQCSR_CMD_TO BIT(9)
#define RISCV_IOMMU_CQCSR_CMD_ILL BIT(10)
#define RISCV_IOMMU_CQCSR_FENCE_W_IP BIT(11)
#define RISCV_IOMMU_CQCSR_CQON RISCV_IOMMU_QUEUE_ACTIVE
#define RISCV_IOMMU_CQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY
/* 5.16 Fault Queue CSR (32bits) */
#define RISCV_IOMMU_REG_FQCSR 0x004C
#define RISCV_IOMMU_FQCSR_FQEN RISCV_IOMMU_QUEUE_ENABLE
#define RISCV_IOMMU_FQCSR_FIE RISCV_IOMMU_QUEUE_INTR_ENABLE
#define RISCV_IOMMU_FQCSR_FQMF RISCV_IOMMU_QUEUE_MEM_FAULT
#define RISCV_IOMMU_FQCSR_FQOF RISCV_IOMMU_QUEUE_OVERFLOW
#define RISCV_IOMMU_FQCSR_FQON RISCV_IOMMU_QUEUE_ACTIVE
#define RISCV_IOMMU_FQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY
/* 5.17 Page Request Queue CSR (32bits) */
#define RISCV_IOMMU_REG_PQCSR 0x0050
#define RISCV_IOMMU_PQCSR_PQEN RISCV_IOMMU_QUEUE_ENABLE
#define RISCV_IOMMU_PQCSR_PIE RISCV_IOMMU_QUEUE_INTR_ENABLE
#define RISCV_IOMMU_PQCSR_PQMF RISCV_IOMMU_QUEUE_MEM_FAULT
#define RISCV_IOMMU_PQCSR_PQOF RISCV_IOMMU_QUEUE_OVERFLOW
#define RISCV_IOMMU_PQCSR_PQON RISCV_IOMMU_QUEUE_ACTIVE
#define RISCV_IOMMU_PQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY
/* 5.18 Interrupt Pending Status (32bits) */
#define RISCV_IOMMU_REG_IPSR 0x0054
#define RISCV_IOMMU_INTR_CQ 0
#define RISCV_IOMMU_INTR_FQ 1
#define RISCV_IOMMU_INTR_PM 2
#define RISCV_IOMMU_INTR_PQ 3
#define RISCV_IOMMU_INTR_COUNT 4
#define RISCV_IOMMU_IPSR_CIP BIT(RISCV_IOMMU_INTR_CQ)
#define RISCV_IOMMU_IPSR_FIP BIT(RISCV_IOMMU_INTR_FQ)
#define RISCV_IOMMU_IPSR_PMIP BIT(RISCV_IOMMU_INTR_PM)
#define RISCV_IOMMU_IPSR_PIP BIT(RISCV_IOMMU_INTR_PQ)
/* 5.19 Performance monitoring counter overflow status (32bits) */
#define RISCV_IOMMU_REG_IOCOUNTOVF 0x0058
#define RISCV_IOMMU_IOCOUNTOVF_CY BIT(0)
#define RISCV_IOMMU_IOCOUNTOVF_HPM GENMASK_ULL(31, 1)
/* 5.20 Performance monitoring counter inhibits (32bits) */
#define RISCV_IOMMU_REG_IOCOUNTINH 0x005C
#define RISCV_IOMMU_IOCOUNTINH_CY BIT(0)
#define RISCV_IOMMU_IOCOUNTINH_HPM GENMASK(31, 1)
/* 5.21 Performance monitoring cycles counter (64bits) */
#define RISCV_IOMMU_REG_IOHPMCYCLES 0x0060
#define RISCV_IOMMU_IOHPMCYCLES_COUNTER GENMASK_ULL(62, 0)
#define RISCV_IOMMU_IOHPMCYCLES_OF BIT_ULL(63)
/* 5.22 Performance monitoring event counters (31 * 64bits) */
#define RISCV_IOMMU_REG_IOHPMCTR_BASE 0x0068
#define RISCV_IOMMU_REG_IOHPMCTR(_n) (RISCV_IOMMU_REG_IOHPMCTR_BASE + ((_n) * 0x8))
/* 5.23 Performance monitoring event selectors (31 * 64bits) */
#define RISCV_IOMMU_REG_IOHPMEVT_BASE 0x0160
#define RISCV_IOMMU_REG_IOHPMEVT(_n) (RISCV_IOMMU_REG_IOHPMEVT_BASE + ((_n) * 0x8))
#define RISCV_IOMMU_IOHPMEVT_EVENTID GENMASK_ULL(14, 0)
#define RISCV_IOMMU_IOHPMEVT_DMASK BIT_ULL(15)
#define RISCV_IOMMU_IOHPMEVT_PID_PSCID GENMASK_ULL(35, 16)
#define RISCV_IOMMU_IOHPMEVT_DID_GSCID GENMASK_ULL(59, 36)
#define RISCV_IOMMU_IOHPMEVT_PV_PSCV BIT_ULL(60)
#define RISCV_IOMMU_IOHPMEVT_DV_GSCV BIT_ULL(61)
#define RISCV_IOMMU_IOHPMEVT_IDT BIT_ULL(62)
#define RISCV_IOMMU_IOHPMEVT_OF BIT_ULL(63)
/* Number of defined performance-monitoring event selectors */
#define RISCV_IOMMU_IOHPMEVT_CNT 31
/**
* enum riscv_iommu_hpmevent_id - Performance-monitoring event identifier
*
* @RISCV_IOMMU_HPMEVENT_INVALID: Invalid event, do not count
* @RISCV_IOMMU_HPMEVENT_URQ: Untranslated requests
* @RISCV_IOMMU_HPMEVENT_TRQ: Translated requests
* @RISCV_IOMMU_HPMEVENT_ATS_RQ: ATS translation requests
* @RISCV_IOMMU_HPMEVENT_TLB_MISS: TLB misses
* @RISCV_IOMMU_HPMEVENT_DD_WALK: Device directory walks
* @RISCV_IOMMU_HPMEVENT_PD_WALK: Process directory walks
* @RISCV_IOMMU_HPMEVENT_S_VS_WALKS: First-stage page table walks
* @RISCV_IOMMU_HPMEVENT_G_WALKS: Second-stage page table walks
* @RISCV_IOMMU_HPMEVENT_MAX: Value to denote maximum Event IDs
*/
enum riscv_iommu_hpmevent_id {
RISCV_IOMMU_HPMEVENT_INVALID = 0,
RISCV_IOMMU_HPMEVENT_URQ = 1,
RISCV_IOMMU_HPMEVENT_TRQ = 2,
RISCV_IOMMU_HPMEVENT_ATS_RQ = 3,
RISCV_IOMMU_HPMEVENT_TLB_MISS = 4,
RISCV_IOMMU_HPMEVENT_DD_WALK = 5,
RISCV_IOMMU_HPMEVENT_PD_WALK = 6,
RISCV_IOMMU_HPMEVENT_S_VS_WALKS = 7,
RISCV_IOMMU_HPMEVENT_G_WALKS = 8,
RISCV_IOMMU_HPMEVENT_MAX = 9
};
/* 5.24 Translation request IOVA (64bits) */
#define RISCV_IOMMU_REG_TR_REQ_IOVA 0x0258
#define RISCV_IOMMU_TR_REQ_IOVA_VPN GENMASK_ULL(63, 12)
/* 5.25 Translation request control (64bits) */
#define RISCV_IOMMU_REG_TR_REQ_CTL 0x0260
#define RISCV_IOMMU_TR_REQ_CTL_GO_BUSY BIT_ULL(0)
#define RISCV_IOMMU_TR_REQ_CTL_PRIV BIT_ULL(1)
#define RISCV_IOMMU_TR_REQ_CTL_EXE BIT_ULL(2)
#define RISCV_IOMMU_TR_REQ_CTL_NW BIT_ULL(3)
#define RISCV_IOMMU_TR_REQ_CTL_PID GENMASK_ULL(31, 12)
#define RISCV_IOMMU_TR_REQ_CTL_PV BIT_ULL(32)
#define RISCV_IOMMU_TR_REQ_CTL_DID GENMASK_ULL(63, 40)
/* 5.26 Translation request response (64bits) */
#define RISCV_IOMMU_REG_TR_RESPONSE 0x0268
#define RISCV_IOMMU_TR_RESPONSE_FAULT BIT_ULL(0)
#define RISCV_IOMMU_TR_RESPONSE_PBMT GENMASK_ULL(8, 7)
#define RISCV_IOMMU_TR_RESPONSE_SZ BIT_ULL(9)
#define RISCV_IOMMU_TR_RESPONSE_PPN RISCV_IOMMU_PPN_FIELD
/* 5.27 Interrupt cause to vector (64bits) */
#define RISCV_IOMMU_REG_ICVEC 0x02F8
#define RISCV_IOMMU_ICVEC_CIV GENMASK_ULL(3, 0)
#define RISCV_IOMMU_ICVEC_FIV GENMASK_ULL(7, 4)
#define RISCV_IOMMU_ICVEC_PMIV GENMASK_ULL(11, 8)
#define RISCV_IOMMU_ICVEC_PIV GENMASK_ULL(15, 12)
/* 5.28 MSI Configuration table (32 * 64bits) */
#define RISCV_IOMMU_REG_MSI_CFG_TBL 0x0300
#define RISCV_IOMMU_REG_MSI_CFG_TBL_ADDR(_n) \
(RISCV_IOMMU_REG_MSI_CFG_TBL + ((_n) * 0x10))
#define RISCV_IOMMU_MSI_CFG_TBL_ADDR GENMASK_ULL(55, 2)
#define RISCV_IOMMU_REG_MSI_CFG_TBL_DATA(_n) \
(RISCV_IOMMU_REG_MSI_CFG_TBL + ((_n) * 0x10) + 0x08)
#define RISCV_IOMMU_MSI_CFG_TBL_DATA GENMASK_ULL(31, 0)
#define RISCV_IOMMU_REG_MSI_CFG_TBL_CTRL(_n) \
(RISCV_IOMMU_REG_MSI_CFG_TBL + ((_n) * 0x10) + 0x0C)
#define RISCV_IOMMU_MSI_CFG_TBL_CTRL_M BIT_ULL(0)
#define RISCV_IOMMU_REG_SIZE 0x1000
/*
* Chapter 2: Data structures
*/
/*
* Device Directory Table macros for non-leaf nodes
*/
#define RISCV_IOMMU_DDTE_V BIT_ULL(0)
#define RISCV_IOMMU_DDTE_PPN RISCV_IOMMU_PPN_FIELD
/**
* struct riscv_iommu_dc - Device Context
* @tc: Translation Control
* @iohgatp: I/O Hypervisor guest address translation and protection
* (Second stage context)
* @ta: Translation Attributes
* @fsc: First stage context
* @msiptp: MSI page table pointer
* @msi_addr_mask: MSI address mask
* @msi_addr_pattern: MSI address pattern
* @_reserved: Reserved for future use, padding
*
* This structure is used for leaf nodes on the Device Directory Table,
* in case RISCV_IOMMU_CAPABILITIES_MSI_FLAT is not set, the bottom 4 fields
* are not present and are skipped with pointer arithmetic to avoid
* casting, check out riscv_iommu_get_dc().
* See section 2.1 for more details
*/
struct riscv_iommu_dc {
u64 tc;
u64 iohgatp;
u64 ta;
u64 fsc;
u64 msiptp;
u64 msi_addr_mask;
u64 msi_addr_pattern;
u64 _reserved;
};
/* Translation control fields */
#define RISCV_IOMMU_DC_TC_V BIT_ULL(0)
#define RISCV_IOMMU_DC_TC_EN_ATS BIT_ULL(1)
#define RISCV_IOMMU_DC_TC_EN_PRI BIT_ULL(2)
#define RISCV_IOMMU_DC_TC_T2GPA BIT_ULL(3)
#define RISCV_IOMMU_DC_TC_DTF BIT_ULL(4)
#define RISCV_IOMMU_DC_TC_PDTV BIT_ULL(5)
#define RISCV_IOMMU_DC_TC_PRPR BIT_ULL(6)
#define RISCV_IOMMU_DC_TC_GADE BIT_ULL(7)
#define RISCV_IOMMU_DC_TC_SADE BIT_ULL(8)
#define RISCV_IOMMU_DC_TC_DPE BIT_ULL(9)
#define RISCV_IOMMU_DC_TC_SBE BIT_ULL(10)
#define RISCV_IOMMU_DC_TC_SXL BIT_ULL(11)
/* Second-stage (aka G-stage) context fields */
#define RISCV_IOMMU_DC_IOHGATP_PPN RISCV_IOMMU_ATP_PPN_FIELD
#define RISCV_IOMMU_DC_IOHGATP_GSCID GENMASK_ULL(59, 44)
#define RISCV_IOMMU_DC_IOHGATP_MODE RISCV_IOMMU_ATP_MODE_FIELD
/**
* enum riscv_iommu_dc_iohgatp_modes - Guest address translation/protection modes
* @RISCV_IOMMU_DC_IOHGATP_MODE_BARE: No translation/protection
* @RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: Sv32x4 (2-bit extension of Sv32), when fctl.GXL == 1
* @RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: Sv39x4 (2-bit extension of Sv39), when fctl.GXL == 0
* @RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: Sv48x4 (2-bit extension of Sv48), when fctl.GXL == 0
* @RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: Sv57x4 (2-bit extension of Sv57), when fctl.GXL == 0
*/
enum riscv_iommu_dc_iohgatp_modes {
RISCV_IOMMU_DC_IOHGATP_MODE_BARE = 0,
RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4 = 8,
RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4 = 8,
RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4 = 9,
RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4 = 10
};
/* Translation attributes fields */
#define RISCV_IOMMU_DC_TA_PSCID GENMASK_ULL(31, 12)
/* First-stage context fields */
#define RISCV_IOMMU_DC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD
#define RISCV_IOMMU_DC_FSC_MODE RISCV_IOMMU_ATP_MODE_FIELD
/**
* enum riscv_iommu_dc_fsc_atp_modes - First stage address translation/protection modes
* @RISCV_IOMMU_DC_FSC_MODE_BARE: No translation/protection
* @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32: Sv32, when dc.tc.SXL == 1
* @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: Sv39, when dc.tc.SXL == 0
* @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: Sv48, when dc.tc.SXL == 0
* @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: Sv57, when dc.tc.SXL == 0
* @RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: 1lvl PDT, 8bit process ids
* @RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: 2lvl PDT, 17bit process ids
* @RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: 3lvl PDT, 20bit process ids
*
* FSC holds IOSATP when RISCV_IOMMU_DC_TC_PDTV is 0 and PDTP otherwise.
* IOSATP controls the first stage address translation (same as the satp register on
* the RISC-V MMU), and PDTP holds the process directory table, used to select a
* first stage page table based on a process id (for devices that support multiple
* process ids).
*/
enum riscv_iommu_dc_fsc_atp_modes {
RISCV_IOMMU_DC_FSC_MODE_BARE = 0,
RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 = 8,
RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 = 8,
RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48 = 9,
RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57 = 10,
RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8 = 1,
RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17 = 2,
RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20 = 3
};
/* MSI page table pointer */
#define RISCV_IOMMU_DC_MSIPTP_PPN RISCV_IOMMU_ATP_PPN_FIELD
#define RISCV_IOMMU_DC_MSIPTP_MODE RISCV_IOMMU_ATP_MODE_FIELD
#define RISCV_IOMMU_DC_MSIPTP_MODE_OFF 0
#define RISCV_IOMMU_DC_MSIPTP_MODE_FLAT 1
/* MSI address mask */
#define RISCV_IOMMU_DC_MSI_ADDR_MASK GENMASK_ULL(51, 0)
/* MSI address pattern */
#define RISCV_IOMMU_DC_MSI_PATTERN GENMASK_ULL(51, 0)
/**
* struct riscv_iommu_pc - Process Context
* @ta: Translation Attributes
* @fsc: First stage context
*
* This structure is used for leaf nodes on the Process Directory Table
* See section 2.3 for more details
*/
struct riscv_iommu_pc {
u64 ta;
u64 fsc;
};
/* Translation attributes fields */
#define RISCV_IOMMU_PC_TA_V BIT_ULL(0)
#define RISCV_IOMMU_PC_TA_ENS BIT_ULL(1)
#define RISCV_IOMMU_PC_TA_SUM BIT_ULL(2)
#define RISCV_IOMMU_PC_TA_PSCID GENMASK_ULL(31, 12)
/* First stage context fields */
#define RISCV_IOMMU_PC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD
#define RISCV_IOMMU_PC_FSC_MODE RISCV_IOMMU_ATP_MODE_FIELD
/*
* Chapter 3: In-memory queue interface
*/
/**
* struct riscv_iommu_command - Generic IOMMU command structure
* @dword0: Includes the opcode and the function identifier
* @dword1: Opcode specific data
*
* The commands are interpreted as two 64bit fields, where the first
* 7bits of the first field are the opcode which also defines the
* command's format, followed by a 3bit field that specifies the
* function invoked by that command, and the rest is opcode-specific.
* This is a generic struct which will be populated differently
* according to each command. For more infos on the commands and
* the command queue check section 3.1.
*/
struct riscv_iommu_command {
u64 dword0;
u64 dword1;
};
/* Fields on dword0, common for all commands */
#define RISCV_IOMMU_CMD_OPCODE GENMASK_ULL(6, 0)
#define RISCV_IOMMU_CMD_FUNC GENMASK_ULL(9, 7)
/* 3.1.1 IOMMU Page-table cache invalidation */
/* Fields on dword0 */
#define RISCV_IOMMU_CMD_IOTINVAL_OPCODE 1
#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA 0
#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA 1
#define RISCV_IOMMU_CMD_IOTINVAL_AV BIT_ULL(10)
#define RISCV_IOMMU_CMD_IOTINVAL_PSCID GENMASK_ULL(31, 12)
#define RISCV_IOMMU_CMD_IOTINVAL_PSCV BIT_ULL(32)
#define RISCV_IOMMU_CMD_IOTINVAL_GV BIT_ULL(33)
#define RISCV_IOMMU_CMD_IOTINVAL_GSCID GENMASK_ULL(59, 44)
/* dword1[61:10] is the 4K-aligned page address */
#define RISCV_IOMMU_CMD_IOTINVAL_ADDR GENMASK_ULL(61, 10)
/* 3.1.2 IOMMU Command Queue Fences */
/* Fields on dword0 */
#define RISCV_IOMMU_CMD_IOFENCE_OPCODE 2
#define RISCV_IOMMU_CMD_IOFENCE_FUNC_C 0
#define RISCV_IOMMU_CMD_IOFENCE_AV BIT_ULL(10)
#define RISCV_IOMMU_CMD_IOFENCE_WSI BIT_ULL(11)
#define RISCV_IOMMU_CMD_IOFENCE_PR BIT_ULL(12)
#define RISCV_IOMMU_CMD_IOFENCE_PW BIT_ULL(13)
#define RISCV_IOMMU_CMD_IOFENCE_DATA GENMASK_ULL(63, 32)
/* dword1 is the address, word-size aligned and shifted to the right by two bits. */
/* 3.1.3 IOMMU Directory cache invalidation */
/* Fields on dword0 */
#define RISCV_IOMMU_CMD_IODIR_OPCODE 3
#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT 0
#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT 1
#define RISCV_IOMMU_CMD_IODIR_PID GENMASK_ULL(31, 12)
#define RISCV_IOMMU_CMD_IODIR_DV BIT_ULL(33)
#define RISCV_IOMMU_CMD_IODIR_DID GENMASK_ULL(63, 40)
/* dword1 is reserved for standard use */
/* 3.1.4 IOMMU PCIe ATS */
/* Fields on dword0 */
#define RISCV_IOMMU_CMD_ATS_OPCODE 4
#define RISCV_IOMMU_CMD_ATS_FUNC_INVAL 0
#define RISCV_IOMMU_CMD_ATS_FUNC_PRGR 1
#define RISCV_IOMMU_CMD_ATS_PID GENMASK_ULL(31, 12)
#define RISCV_IOMMU_CMD_ATS_PV BIT_ULL(32)
#define RISCV_IOMMU_CMD_ATS_DSV BIT_ULL(33)
#define RISCV_IOMMU_CMD_ATS_RID GENMASK_ULL(55, 40)
#define RISCV_IOMMU_CMD_ATS_DSEG GENMASK_ULL(63, 56)
/* dword1 is the ATS payload, two different payload types for INVAL and PRGR */
/* ATS.INVAL payload*/
#define RISCV_IOMMU_CMD_ATS_INVAL_G BIT_ULL(0)
/* Bits 1 - 10 are zeroed */
#define RISCV_IOMMU_CMD_ATS_INVAL_S BIT_ULL(11)
#define RISCV_IOMMU_CMD_ATS_INVAL_UADDR GENMASK_ULL(63, 12)
/* ATS.PRGR payload */
/* Bits 0 - 31 are zeroed */
#define RISCV_IOMMU_CMD_ATS_PRGR_PRG_INDEX GENMASK_ULL(40, 32)
/* Bits 41 - 43 are zeroed */
#define RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE GENMASK_ULL(47, 44)
#define RISCV_IOMMU_CMD_ATS_PRGR_DST_ID GENMASK_ULL(63, 48)
/**
* struct riscv_iommu_fq_record - Fault/Event Queue Record
* @hdr: Header, includes fault/event cause, PID/DID, transaction type etc
* @_reserved: Low 32bits for custom use, high 32bits for standard use
* @iotval: Transaction-type/cause specific format
* @iotval2: Cause specific format
*
* The fault/event queue reports events and failures raised when
* processing transactions. Each record is a 32byte structure where
* the first dword has a fixed format for providing generic infos
* regarding the fault/event, and two more dwords are there for
* fault/event-specific information. For more details see section
* 3.2.
*/
struct riscv_iommu_fq_record {
u64 hdr;
u64 _reserved;
u64 iotval;
u64 iotval2;
};
/* Fields on header */
#define RISCV_IOMMU_FQ_HDR_CAUSE GENMASK_ULL(11, 0)
#define RISCV_IOMMU_FQ_HDR_PID GENMASK_ULL(31, 12)
#define RISCV_IOMMU_FQ_HDR_PV BIT_ULL(32)
#define RISCV_IOMMU_FQ_HDR_PRIV BIT_ULL(33)
#define RISCV_IOMMU_FQ_HDR_TTYP GENMASK_ULL(39, 34)
#define RISCV_IOMMU_FQ_HDR_DID GENMASK_ULL(63, 40)
/**
* enum riscv_iommu_fq_causes - Fault/event cause values
* @RISCV_IOMMU_FQ_CAUSE_INST_FAULT: Instruction access fault
* @RISCV_IOMMU_FQ_CAUSE_RD_ADDR_MISALIGNED: Read address misaligned
* @RISCV_IOMMU_FQ_CAUSE_RD_FAULT: Read load fault
* @RISCV_IOMMU_FQ_CAUSE_WR_ADDR_MISALIGNED: Write/AMO address misaligned
* @RISCV_IOMMU_FQ_CAUSE_WR_FAULT: Write/AMO access fault
* @RISCV_IOMMU_FQ_CAUSE_INST_FAULT_S: Instruction page fault
* @RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S: Read page fault
* @RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S: Write/AMO page fault
* @RISCV_IOMMU_FQ_CAUSE_INST_FAULT_VS: Instruction guest page fault
* @RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS: Read guest page fault
* @RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS: Write/AMO guest page fault
* @RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: All inbound transactions disallowed
* @RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: DDT entry load access fault
* @RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: DDT entry invalid
* @RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: DDT entry misconfigured
* @RISCV_IOMMU_FQ_CAUSE_TTYP_BLOCKED: Transaction type disallowed
* @RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT: MSI PTE load access fault
* @RISCV_IOMMU_FQ_CAUSE_MSI_INVALID: MSI PTE invalid
* @RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED: MSI PTE misconfigured
* @RISCV_IOMMU_FQ_CAUSE_MRIF_FAULT: MRIF access fault
* @RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT: PDT entry load access fault
* @RISCV_IOMMU_FQ_CAUSE_PDT_INVALID: PDT entry invalid
* @RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED: PDT entry misconfigured
* @RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: DDT data corruption
* @RISCV_IOMMU_FQ_CAUSE_PDT_CORRUPTED: PDT data corruption
* @RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED: MSI page table data corruption
* @RISCV_IOMMU_FQ_CAUSE_MRIF_CORRUIPTED: MRIF data corruption
* @RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: Internal data path error
* @RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: IOMMU MSI write access fault
* @RISCV_IOMMU_FQ_CAUSE_PT_CORRUPTED: First/second stage page table data corruption
*
* Values are on table 11 of the spec, encodings 275 - 2047 are reserved for standard
* use, and 2048 - 4095 for custom use.
*/
enum riscv_iommu_fq_causes {
RISCV_IOMMU_FQ_CAUSE_INST_FAULT = 1,
RISCV_IOMMU_FQ_CAUSE_RD_ADDR_MISALIGNED = 4,
RISCV_IOMMU_FQ_CAUSE_RD_FAULT = 5,
RISCV_IOMMU_FQ_CAUSE_WR_ADDR_MISALIGNED = 6,
RISCV_IOMMU_FQ_CAUSE_WR_FAULT = 7,
RISCV_IOMMU_FQ_CAUSE_INST_FAULT_S = 12,
RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S = 13,
RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S = 15,
RISCV_IOMMU_FQ_CAUSE_INST_FAULT_VS = 20,
RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS = 21,
RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS = 23,
RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED = 256,
RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT = 257,
RISCV_IOMMU_FQ_CAUSE_DDT_INVALID = 258,
RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED = 259,
RISCV_IOMMU_FQ_CAUSE_TTYP_BLOCKED = 260,
RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT = 261,
RISCV_IOMMU_FQ_CAUSE_MSI_INVALID = 262,
RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED = 263,
RISCV_IOMMU_FQ_CAUSE_MRIF_FAULT = 264,
RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT = 265,
RISCV_IOMMU_FQ_CAUSE_PDT_INVALID = 266,
RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED = 267,
RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED = 268,
RISCV_IOMMU_FQ_CAUSE_PDT_CORRUPTED = 269,
RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED = 270,
RISCV_IOMMU_FQ_CAUSE_MRIF_CORRUIPTED = 271,
RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR = 272,
RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT = 273,
RISCV_IOMMU_FQ_CAUSE_PT_CORRUPTED = 274
};
/**
* enum riscv_iommu_fq_ttypes: Fault/event transaction types
* @RISCV_IOMMU_FQ_TTYP_NONE: None. Fault not caused by an inbound transaction.
* @RISCV_IOMMU_FQ_TTYP_UADDR_INST_FETCH: Instruction fetch from untranslated address
* @RISCV_IOMMU_FQ_TTYP_UADDR_RD: Read from untranslated address
* @RISCV_IOMMU_FQ_TTYP_UADDR_WR: Write/AMO to untranslated address
* @RISCV_IOMMU_FQ_TTYP_TADDR_INST_FETCH: Instruction fetch from translated address
* @RISCV_IOMMU_FQ_TTYP_TADDR_RD: Read from translated address
* @RISCV_IOMMU_FQ_TTYP_TADDR_WR: Write/AMO to translated address
* @RISCV_IOMMU_FQ_TTYP_PCIE_ATS_REQ: PCIe ATS translation request
* @RISCV_IOMMU_FQ_TTYP_PCIE_MSG_REQ: PCIe message request
*
* Values are on table 12 of the spec, type 4 and 10 - 31 are reserved for standard use
* and 31 - 63 for custom use.
*/
enum riscv_iommu_fq_ttypes {
RISCV_IOMMU_FQ_TTYP_NONE = 0,
RISCV_IOMMU_FQ_TTYP_UADDR_INST_FETCH = 1,
RISCV_IOMMU_FQ_TTYP_UADDR_RD = 2,
RISCV_IOMMU_FQ_TTYP_UADDR_WR = 3,
RISCV_IOMMU_FQ_TTYP_TADDR_INST_FETCH = 5,
RISCV_IOMMU_FQ_TTYP_TADDR_RD = 6,
RISCV_IOMMU_FQ_TTYP_TADDR_WR = 7,
RISCV_IOMMU_FQ_TTYP_PCIE_ATS_REQ = 8,
RISCV_IOMMU_FQ_TTYP_PCIE_MSG_REQ = 9,
};
/**
* struct riscv_iommu_pq_record - PCIe Page Request record
* @hdr: Header, includes PID, DID etc
* @payload: Holds the page address, request group and permission bits
*
* For more infos on the PCIe Page Request queue see chapter 3.3.
*/
struct riscv_iommu_pq_record {
u64 hdr;
u64 payload;
};
/* Header fields */
#define RISCV_IOMMU_PQ_HDR_PID GENMASK_ULL(31, 12)
#define RISCV_IOMMU_PQ_HDR_PV BIT_ULL(32)
#define RISCV_IOMMU_PQ_HDR_PRIV BIT_ULL(33)
#define RISCV_IOMMU_PQ_HDR_EXEC BIT_ULL(34)
#define RISCV_IOMMU_PQ_HDR_DID GENMASK_ULL(63, 40)
/* Payload fields */
#define RISCV_IOMMU_PQ_PAYLOAD_R BIT_ULL(0)
#define RISCV_IOMMU_PQ_PAYLOAD_W BIT_ULL(1)
#define RISCV_IOMMU_PQ_PAYLOAD_L BIT_ULL(2)
#define RISCV_IOMMU_PQ_PAYLOAD_RWL_MASK GENMASK_ULL(2, 0)
#define RISCV_IOMMU_PQ_PAYLOAD_PRGI GENMASK_ULL(11, 3) /* Page Request Group Index */
#define RISCV_IOMMU_PQ_PAYLOAD_ADDR GENMASK_ULL(63, 12)
/**
* struct riscv_iommu_msipte - MSI Page Table Entry
* @pte: MSI PTE
* @mrif_info: Memory-resident interrupt file info
*
* The MSI Page Table is used for virtualizing MSIs, so that when
* a device sends an MSI to a guest, the IOMMU can reroute it
* by translating the MSI address, either to a guest interrupt file
* or a memory resident interrupt file (MRIF). Note that this page table
* is an array of MSI PTEs, not a multi-level pt, each entry
* is a leaf entry. For more infos check out the AIA spec, chapter 9.5.
*
* Also in basic mode the mrif_info field is ignored by the IOMMU and can
* be used by software, any other reserved fields on pte must be zeroed-out
* by software.
*/
struct riscv_iommu_msipte {
u64 pte;
u64 mrif_info;
};
/* Fields on pte */
#define RISCV_IOMMU_MSIPTE_V BIT_ULL(0)
#define RISCV_IOMMU_MSIPTE_M GENMASK_ULL(2, 1)
#define RISCV_IOMMU_MSIPTE_MRIF_ADDR GENMASK_ULL(53, 7) /* When M == 1 (MRIF mode) */
#define RISCV_IOMMU_MSIPTE_PPN RISCV_IOMMU_PPN_FIELD /* When M == 3 (basic mode) */
#define RISCV_IOMMU_MSIPTE_C BIT_ULL(63)
/* Fields on mrif_info */
#define RISCV_IOMMU_MSIPTE_MRIF_NID GENMASK_ULL(9, 0)
#define RISCV_IOMMU_MSIPTE_MRIF_NPPN RISCV_IOMMU_PPN_FIELD
#define RISCV_IOMMU_MSIPTE_MRIF_NID_MSB BIT_ULL(60)
/* Helper functions: command structure builders. */
static inline void riscv_iommu_cmd_inval_vma(struct riscv_iommu_command *cmd)
{
cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOTINVAL_OPCODE) |
FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
cmd->dword1 = 0;
}
static inline void riscv_iommu_cmd_inval_set_addr(struct riscv_iommu_command *cmd,
u64 addr)
{
cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_ADDR, phys_to_pfn(addr));
cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV;
}
static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd,
int pscid)
{
cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_PSCID, pscid) |
RISCV_IOMMU_CMD_IOTINVAL_PSCV;
}
static inline void riscv_iommu_cmd_inval_set_gscid(struct riscv_iommu_command *cmd,
int gscid)
{
cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_GSCID, gscid) |
RISCV_IOMMU_CMD_IOTINVAL_GV;
}
static inline void riscv_iommu_cmd_iofence(struct riscv_iommu_command *cmd)
{
cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
RISCV_IOMMU_CMD_IOFENCE_PR | RISCV_IOMMU_CMD_IOFENCE_PW;
cmd->dword1 = 0;
}
static inline void riscv_iommu_cmd_iofence_set_av(struct riscv_iommu_command *cmd,
u64 addr, u32 data)
{
cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
FIELD_PREP(RISCV_IOMMU_CMD_IOFENCE_DATA, data) |
RISCV_IOMMU_CMD_IOFENCE_AV;
cmd->dword1 = addr >> 2;
}
static inline void riscv_iommu_cmd_iodir_inval_ddt(struct riscv_iommu_command *cmd)
{
cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT);
cmd->dword1 = 0;
}
static inline void riscv_iommu_cmd_iodir_inval_pdt(struct riscv_iommu_command *cmd)
{
cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT);
cmd->dword1 = 0;
}
static inline void riscv_iommu_cmd_iodir_set_did(struct riscv_iommu_command *cmd,
unsigned int devid)
{
cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IODIR_DID, devid) |
RISCV_IOMMU_CMD_IODIR_DV;
}
static inline void riscv_iommu_cmd_iodir_set_pid(struct riscv_iommu_command *cmd,
unsigned int pasid)
{
cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IODIR_PID, pasid);
}
#endif /* _RISCV_IOMMU_BITS_H_ */

View File

@ -0,0 +1,120 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright © 2022-2024 Rivos Inc.
* Copyright © 2023 FORTH-ICS/CARV
*
* RISCV IOMMU as a PCIe device
*
* Authors
* Tomasz Jeznach <tjeznach@rivosinc.com>
* Nick Kossifidis <mick@ics.forth.gr>
*/
#include <linux/compiler.h>
#include <linux/init.h>
#include <linux/iommu.h>
#include <linux/kernel.h>
#include <linux/pci.h>
#include "iommu-bits.h"
#include "iommu.h"
/* QEMU RISC-V IOMMU implementation */
#define PCI_DEVICE_ID_REDHAT_RISCV_IOMMU 0x0014
/* Rivos Inc. assigned PCI Vendor and Device IDs */
#ifndef PCI_VENDOR_ID_RIVOS
#define PCI_VENDOR_ID_RIVOS 0x1efd
#endif
#define PCI_DEVICE_ID_RIVOS_RISCV_IOMMU_GA 0x0008
static int riscv_iommu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct device *dev = &pdev->dev;
struct riscv_iommu_device *iommu;
int rc, vec;
rc = pcim_enable_device(pdev);
if (rc)
return rc;
if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM))
return -ENODEV;
if (pci_resource_len(pdev, 0) < RISCV_IOMMU_REG_SIZE)
return -ENODEV;
rc = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
if (rc)
return dev_err_probe(dev, rc, "pcim_iomap_regions failed\n");
iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
if (!iommu)
return -ENOMEM;
iommu->dev = dev;
iommu->reg = pcim_iomap_table(pdev)[0];
pci_set_master(pdev);
dev_set_drvdata(dev, iommu);
/* Check device reported capabilities / features. */
iommu->caps = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAPABILITIES);
iommu->fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL);
/* The PCI driver only uses MSIs, make sure the IOMMU supports this */
switch (FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps)) {
case RISCV_IOMMU_CAPABILITIES_IGS_MSI:
case RISCV_IOMMU_CAPABILITIES_IGS_BOTH:
break;
default:
return dev_err_probe(dev, -ENODEV,
"unable to use message-signaled interrupts\n");
}
/* Allocate and assign IRQ vectors for the various events */
rc = pci_alloc_irq_vectors(pdev, 1, RISCV_IOMMU_INTR_COUNT,
PCI_IRQ_MSIX | PCI_IRQ_MSI);
if (rc <= 0)
return dev_err_probe(dev, -ENODEV,
"unable to allocate irq vectors\n");
iommu->irqs_count = rc;
for (vec = 0; vec < iommu->irqs_count; vec++)
iommu->irqs[vec] = msi_get_virq(dev, vec);
/* Enable message-signaled interrupts, fctl.WSI */
if (iommu->fctl & RISCV_IOMMU_FCTL_WSI) {
iommu->fctl ^= RISCV_IOMMU_FCTL_WSI;
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl);
}
return riscv_iommu_init(iommu);
}
static void riscv_iommu_pci_remove(struct pci_dev *pdev)
{
struct riscv_iommu_device *iommu = dev_get_drvdata(&pdev->dev);
riscv_iommu_remove(iommu);
}
static const struct pci_device_id riscv_iommu_pci_tbl[] = {
{PCI_VDEVICE(REDHAT, PCI_DEVICE_ID_REDHAT_RISCV_IOMMU), 0},
{PCI_VDEVICE(RIVOS, PCI_DEVICE_ID_RIVOS_RISCV_IOMMU_GA), 0},
{0,}
};
static struct pci_driver riscv_iommu_pci_driver = {
.name = KBUILD_MODNAME,
.id_table = riscv_iommu_pci_tbl,
.probe = riscv_iommu_pci_probe,
.remove = riscv_iommu_pci_remove,
.driver = {
.suppress_bind_attrs = true,
},
};
builtin_pci_driver(riscv_iommu_pci_driver);

View File

@ -0,0 +1,92 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* RISC-V IOMMU as a platform device
*
* Copyright © 2023 FORTH-ICS/CARV
* Copyright © 2023-2024 Rivos Inc.
*
* Authors
* Nick Kossifidis <mick@ics.forth.gr>
* Tomasz Jeznach <tjeznach@rivosinc.com>
*/
#include <linux/kernel.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include "iommu-bits.h"
#include "iommu.h"
static int riscv_iommu_platform_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct riscv_iommu_device *iommu = NULL;
struct resource *res = NULL;
int vec;
iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
if (!iommu)
return -ENOMEM;
iommu->dev = dev;
iommu->reg = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(iommu->reg))
return dev_err_probe(dev, PTR_ERR(iommu->reg),
"could not map register region\n");
dev_set_drvdata(dev, iommu);
/* Check device reported capabilities / features. */
iommu->caps = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAPABILITIES);
iommu->fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL);
/* For now we only support WSI */
switch (FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps)) {
case RISCV_IOMMU_CAPABILITIES_IGS_WSI:
case RISCV_IOMMU_CAPABILITIES_IGS_BOTH:
break;
default:
return dev_err_probe(dev, -ENODEV,
"unable to use wire-signaled interrupts\n");
}
iommu->irqs_count = platform_irq_count(pdev);
if (iommu->irqs_count <= 0)
return dev_err_probe(dev, -ENODEV,
"no IRQ resources provided\n");
if (iommu->irqs_count > RISCV_IOMMU_INTR_COUNT)
iommu->irqs_count = RISCV_IOMMU_INTR_COUNT;
for (vec = 0; vec < iommu->irqs_count; vec++)
iommu->irqs[vec] = platform_get_irq(pdev, vec);
/* Enable wire-signaled interrupts, fctl.WSI */
if (!(iommu->fctl & RISCV_IOMMU_FCTL_WSI)) {
iommu->fctl |= RISCV_IOMMU_FCTL_WSI;
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl);
}
return riscv_iommu_init(iommu);
};
static void riscv_iommu_platform_remove(struct platform_device *pdev)
{
riscv_iommu_remove(dev_get_drvdata(&pdev->dev));
};
static const struct of_device_id riscv_iommu_of_match[] = {
{.compatible = "riscv,iommu",},
{},
};
static struct platform_driver riscv_iommu_platform_driver = {
.probe = riscv_iommu_platform_probe,
.remove_new = riscv_iommu_platform_remove,
.driver = {
.name = "riscv,iommu",
.of_match_table = riscv_iommu_of_match,
.suppress_bind_attrs = true,
},
};
builtin_platform_driver(riscv_iommu_platform_driver);

1661
drivers/iommu/riscv/iommu.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,88 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright © 2022-2024 Rivos Inc.
* Copyright © 2023 FORTH-ICS/CARV
*
* Authors
* Tomasz Jeznach <tjeznach@rivosinc.com>
* Nick Kossifidis <mick@ics.forth.gr>
*/
#ifndef _RISCV_IOMMU_H_
#define _RISCV_IOMMU_H_
#include <linux/iommu.h>
#include <linux/types.h>
#include <linux/iopoll.h>
#include "iommu-bits.h"
struct riscv_iommu_device;
struct riscv_iommu_queue {
atomic_t prod; /* unbounded producer allocation index */
atomic_t head; /* unbounded shadow ring buffer consumer index */
atomic_t tail; /* unbounded shadow ring buffer producer index */
unsigned int mask; /* index mask, queue length - 1 */
unsigned int irq; /* allocated interrupt number */
struct riscv_iommu_device *iommu; /* iommu device handling the queue when active */
void *base; /* ring buffer kernel pointer */
dma_addr_t phys; /* ring buffer physical address */
u16 qbr; /* base register offset, head and tail reference */
u16 qcr; /* control and status register offset */
u8 qid; /* queue identifier, same as RISCV_IOMMU_INTR_XX */
};
struct riscv_iommu_device {
/* iommu core interface */
struct iommu_device iommu;
/* iommu hardware */
struct device *dev;
/* hardware control register space */
void __iomem *reg;
/* supported and enabled hardware capabilities */
u64 caps;
u32 fctl;
/* available interrupt numbers, MSI or WSI */
unsigned int irqs[RISCV_IOMMU_INTR_COUNT];
unsigned int irqs_count;
unsigned int icvec;
/* hardware queues */
struct riscv_iommu_queue cmdq;
struct riscv_iommu_queue fltq;
/* device directory */
unsigned int ddt_mode;
dma_addr_t ddt_phys;
u64 *ddt_root;
};
int riscv_iommu_init(struct riscv_iommu_device *iommu);
void riscv_iommu_remove(struct riscv_iommu_device *iommu);
#define riscv_iommu_readl(iommu, addr) \
readl_relaxed((iommu)->reg + (addr))
#define riscv_iommu_readq(iommu, addr) \
readq_relaxed((iommu)->reg + (addr))
#define riscv_iommu_writel(iommu, addr, val) \
writel_relaxed((val), (iommu)->reg + (addr))
#define riscv_iommu_writeq(iommu, addr, val) \
writeq_relaxed((val), (iommu)->reg + (addr))
#define riscv_iommu_readq_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
readx_poll_timeout(readq_relaxed, (iommu)->reg + (addr), val, cond, \
delay_us, timeout_us)
#define riscv_iommu_readl_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
readx_poll_timeout(readl_relaxed, (iommu)->reg + (addr), val, cond, \
delay_us, timeout_us)
#endif

View File

@ -33,6 +33,8 @@ struct s390_domain {
struct rcu_head rcu;
};
static struct iommu_domain blocking_domain;
static inline unsigned int calc_rtx(dma_addr_t ptr)
{
return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
@ -369,20 +371,36 @@ static void s390_domain_free(struct iommu_domain *domain)
call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
}
static void s390_iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
static void zdev_s390_domain_update(struct zpci_dev *zdev,
struct iommu_domain *domain)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev = to_zpci_dev(dev);
unsigned long flags;
spin_lock_irqsave(&zdev->dom_lock, flags);
zdev->s390_domain = domain;
spin_unlock_irqrestore(&zdev->dom_lock, flags);
}
static int blocking_domain_attach_device(struct iommu_domain *domain,
struct device *dev)
{
struct zpci_dev *zdev = to_zpci_dev(dev);
struct s390_domain *s390_domain;
unsigned long flags;
if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
return 0;
s390_domain = to_s390_domain(zdev->s390_domain);
spin_lock_irqsave(&s390_domain->list_lock, flags);
list_del_rcu(&zdev->iommu_list);
spin_unlock_irqrestore(&s390_domain->list_lock, flags);
zpci_unregister_ioat(zdev, 0);
zdev->s390_domain = NULL;
zdev->dma_table = NULL;
zdev_s390_domain_update(zdev, domain);
return 0;
}
static int s390_iommu_attach_device(struct iommu_domain *domain,
@ -401,20 +419,15 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
domain->geometry.aperture_end < zdev->start_dma))
return -EINVAL;
if (zdev->s390_domain)
s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
blocking_domain_attach_device(&blocking_domain, dev);
/* If we fail now DMA remains blocked via blocking domain */
cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(s390_domain->dma_table), &status);
/*
* If the device is undergoing error recovery the reset code
* will re-establish the new domain.
*/
if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
return -EIO;
zdev->dma_table = s390_domain->dma_table;
zdev->s390_domain = s390_domain;
zdev_s390_domain_update(zdev, domain);
spin_lock_irqsave(&s390_domain->list_lock, flags);
list_add_rcu(&zdev->iommu_list, &s390_domain->devices);
@ -466,21 +479,13 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
if (zdev->tlb_refresh)
dev->iommu->shadow_on_flush = 1;
/* Start with DMA blocked */
spin_lock_init(&zdev->dom_lock);
zdev_s390_domain_update(zdev, &blocking_domain);
return &zdev->iommu_dev;
}
static void s390_iommu_release_device(struct device *dev)
{
struct zpci_dev *zdev = to_zpci_dev(dev);
/*
* release_device is expected to detach any domain currently attached
* to the device, but keep it attached to other devices in the group.
*/
if (zdev)
s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
}
static int zpci_refresh_all(struct zpci_dev *zdev)
{
return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
@ -697,9 +702,15 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
{
if (!zdev || !zdev->s390_domain)
struct s390_domain *s390_domain;
lockdep_assert_held(&zdev->dom_lock);
if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
return NULL;
return &zdev->s390_domain->ctrs;
s390_domain = to_s390_domain(zdev->s390_domain);
return &s390_domain->ctrs;
}
int zpci_init_iommu(struct zpci_dev *zdev)
@ -776,11 +787,19 @@ static int __init s390_iommu_init(void)
}
subsys_initcall(s390_iommu_init);
static struct iommu_domain blocking_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocking_domain_attach_device,
}
};
static const struct iommu_ops s390_iommu_ops = {
.blocked_domain = &blocking_domain,
.release_domain = &blocking_domain,
.capable = s390_iommu_capable,
.domain_alloc_paging = s390_domain_alloc_paging,
.probe_device = s390_iommu_probe_device,
.release_device = s390_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = SZ_4K,
.get_resv_regions = s390_iommu_get_resv_regions,

View File

@ -109,10 +109,10 @@ static int rproc_enable_iommu(struct rproc *rproc)
return 0;
}
domain = iommu_domain_alloc(dev->bus);
if (!domain) {
domain = iommu_paging_domain_alloc(dev);
if (IS_ERR(domain)) {
dev_err(dev, "can't alloc iommu domain\n");
return -ENOMEM;
return PTR_ERR(domain);
}
iommu_set_fault_handler(domain, rproc_iommu_fault, rproc);

View File

@ -292,7 +292,6 @@ static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src)
struct irq_data;
extern void dmar_msi_unmask(struct irq_data *data);
extern void dmar_msi_mask(struct irq_data *data);
extern void dmar_msi_read(int irq, struct msi_msg *msg);
extern void dmar_msi_write(int irq, struct msi_msg *msg);
extern int dmar_set_interrupt(struct intel_iommu *iommu);
extern irqreturn_t dmar_fault(int irq, void *dev_id);

View File

@ -559,8 +559,6 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
* the caller iommu_domain_alloc() returns.
* @domain_alloc_user: Allocate an iommu domain corresponding to the input
* parameters as defined in include/uapi/linux/iommufd.h.
* Unlike @domain_alloc, it is called only by IOMMUFD and
* must fully initialize the new domain before return.
* Upon success, if the @user_data is valid and the @parent
* points to a kernel-managed domain, the new domain must be
* IOMMU_DOMAIN_NESTED type; otherwise, the @parent must be
@ -676,7 +674,8 @@ struct iommu_ops {
* * EBUSY - device is attached to a domain and cannot be changed
* * ENODEV - device specific errors, not able to be attached
* * <others> - treated as ENODEV by the caller. Use is discouraged
* @set_dev_pasid: set an iommu domain to a pasid of device
* @set_dev_pasid: set or replace an iommu domain to a pasid of device. The pasid of
* the device should be left in the old config in error case.
* @map_pages: map a physically contiguous set of pages of the same size to
* an iommu domain.
* @unmap_pages: unmap a number of pages of the same size from an iommu domain
@ -701,7 +700,7 @@ struct iommu_ops {
struct iommu_domain_ops {
int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
ioasid_t pasid);
ioasid_t pasid, struct iommu_domain *old);
int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
@ -842,12 +841,14 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
};
}
extern int bus_iommu_probe(const struct bus_type *bus);
extern bool iommu_present(const struct bus_type *bus);
extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus);
struct iommu_domain *iommu_paging_domain_alloc(struct device *dev);
struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, unsigned int flags);
static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev)
{
return iommu_paging_domain_alloc_flags(dev, 0);
}
extern void iommu_domain_free(struct iommu_domain *domain);
extern int iommu_attach_device(struct iommu_domain *domain,
struct device *dev);
@ -1140,19 +1141,15 @@ struct iommu_iotlb_gather {};
struct iommu_dirty_bitmap {};
struct iommu_dirty_ops {};
static inline bool iommu_present(const struct bus_type *bus)
{
return false;
}
static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
{
return false;
}
static inline struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
static inline struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev,
unsigned int flags)
{
return NULL;
return ERR_PTR(-ENODEV);
}
static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev)

View File

@ -387,11 +387,19 @@ struct iommu_vfio_ioas {
* enforced on device attachment
* @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
* valid.
* @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The
* domain can be attached to any PASID on the device.
* Any domain attached to the non-PASID part of the
* device must also be flaged, otherwise attaching a
* PASID will blocked.
* If IOMMU does not support PASID it will return
* error (-EOPNOTSUPP).
*/
enum iommufd_hwpt_alloc_flags {
IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
IOMMU_HWPT_ALLOC_PASID = 1 << 3,
};
/**