Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git

This commit is contained in:
Stephen Rothwell 2024-12-20 13:26:07 +11:00
commit 3cb77937e6
9 changed files with 573 additions and 260 deletions

View File

@ -16,7 +16,6 @@ irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data);
irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data);
irqreturn_t amd_iommu_int_thread_galog(int irq, void *data);
irqreturn_t amd_iommu_int_handler(int irq, void *data);
void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type,
u8 cntrl_intr, u8 cntrl_log,
u32 status_run_mask, u32 status_overflow_mask);
@ -184,3 +183,6 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
#endif
struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid);
struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid);

View File

@ -220,6 +220,8 @@
#define DEV_ENTRY_EX 0x67
#define DEV_ENTRY_SYSMGT1 0x68
#define DEV_ENTRY_SYSMGT2 0x69
#define DTE_DATA1_SYSMGT_MASK GENMASK_ULL(41, 40)
#define DEV_ENTRY_IRQ_TBL_EN 0x80
#define DEV_ENTRY_INIT_PASS 0xb8
#define DEV_ENTRY_EINT_PASS 0xb9
@ -407,8 +409,7 @@
#define DTE_FLAG_HAD (3ULL << 7)
#define DTE_FLAG_GIOV BIT_ULL(54)
#define DTE_FLAG_GV BIT_ULL(55)
#define DTE_GLX_SHIFT (56)
#define DTE_GLX_MASK (3)
#define DTE_GLX GENMASK_ULL(57, 56)
#define DTE_FLAG_IR BIT_ULL(61)
#define DTE_FLAG_IW BIT_ULL(62)
@ -416,18 +417,18 @@
#define DTE_FLAG_MASK (0x3ffULL << 32)
#define DEV_DOMID_MASK 0xffffULL
#define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL)
#define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL)
#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0x1fffffULL)
#define DTE_GCR3_SHIFT_A 58
#define DTE_GCR3_SHIFT_B 16
#define DTE_GCR3_SHIFT_C 43
#define DTE_GCR3_14_12 GENMASK_ULL(60, 58)
#define DTE_GCR3_30_15 GENMASK_ULL(31, 16)
#define DTE_GCR3_51_31 GENMASK_ULL(63, 43)
#define DTE_GPT_LEVEL_SHIFT 54
#define DTE_GPT_LEVEL_MASK GENMASK_ULL(55, 54)
#define GCR3_VALID 0x01ULL
/* DTE[128:179] | DTE[184:191] */
#define DTE_DATA2_INTR_MASK ~GENMASK_ULL(55, 52)
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
#define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD)
@ -468,7 +469,7 @@ extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
do { \
if (amd_iommu_dump) \
pr_info("AMD-Vi: " format, ## arg); \
pr_info(format, ## arg); \
} while(0);
/* global flag if IOMMUs cache non-present entries */
@ -516,6 +517,9 @@ extern struct kmem_cache *amd_iommu_irq_cache;
#define for_each_pdom_dev_data_safe(pdom_dev_data, next, pdom) \
list_for_each_entry_safe((pdom_dev_data), (next), &pdom->dev_data_list, list)
#define for_each_ivhd_dte_flags(entry) \
list_for_each_entry((entry), &amd_ivhd_dev_flags_list, list)
struct amd_iommu;
struct iommu_domain;
struct irq_domain;
@ -837,6 +841,7 @@ struct devid_map {
struct iommu_dev_data {
/*Protect against attach/detach races */
struct mutex mutex;
spinlock_t dte_lock; /* DTE lock for 256-bit access */
struct list_head list; /* For domain->dev_list */
struct llist_node dev_data_list; /* For global dev_data_list */
@ -881,7 +886,21 @@ extern struct list_head amd_iommu_list;
* Structure defining one entry in the device table
*/
struct dev_table_entry {
u64 data[4];
union {
u64 data[4];
u128 data128[2];
};
};
/*
* Structure to sture persistent DTE flags from IVHD
*/
struct ivhd_dte_flags {
struct list_head list;
u16 segid;
u16 devid_first;
u16 devid_last;
struct dev_table_entry dte;
};
/*

View File

@ -174,8 +174,8 @@ bool amd_iommu_snp_en;
EXPORT_SYMBOL(amd_iommu_snp_en);
LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
system */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */
LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */
/* Number of IOMMUs present in the system */
static int amd_iommus_present;
@ -984,36 +984,12 @@ static void iommu_enable_gt(struct amd_iommu *iommu)
}
/* sets a specific bit in the device table entry. */
static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
u16 devid, u8 bit)
static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
{
int i = (bit >> 6) & 0x03;
int _bit = bit & 0x3f;
dev_table[devid].data[i] |= (1UL << _bit);
}
static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
{
struct dev_table_entry *dev_table = get_dev_table(iommu);
return __set_dev_entry_bit(dev_table, devid, bit);
}
static int __get_dev_entry_bit(struct dev_table_entry *dev_table,
u16 devid, u8 bit)
{
int i = (bit >> 6) & 0x03;
int _bit = bit & 0x3f;
return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
}
static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
{
struct dev_table_entry *dev_table = get_dev_table(iommu);
return __get_dev_entry_bit(dev_table, devid, bit);
dte->data[i] |= (1UL << _bit);
}
static bool __copy_device_table(struct amd_iommu *iommu)
@ -1081,11 +1057,9 @@ static bool __copy_device_table(struct amd_iommu *iommu)
}
/* If gcr3 table existed, mask it out */
if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
tmp = (DTE_GCR3_30_15 | DTE_GCR3_51_31);
pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
tmp |= DTE_FLAG_GV;
tmp = (DTE_GCR3_14_12 | DTE_FLAG_GV);
pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
}
}
@ -1136,42 +1110,107 @@ static bool copy_device_table(void)
return true;
}
void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid)
struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid)
{
int sysmgt;
struct ivhd_dte_flags *e;
unsigned int best_len = UINT_MAX;
struct dev_table_entry *dte = NULL;
sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) |
(get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1);
for_each_ivhd_dte_flags(e) {
/*
* Need to go through the whole list to find the smallest range,
* which contains the devid.
*/
if ((e->segid == segid) &&
(e->devid_first <= devid) && (devid <= e->devid_last)) {
unsigned int len = e->devid_last - e->devid_first;
if (sysmgt == 0x01)
set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW);
if (len < best_len) {
dte = &(e->dte);
best_len = len;
}
}
}
return dte;
}
static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last)
{
struct ivhd_dte_flags *e;
for_each_ivhd_dte_flags(e) {
if ((e->segid == segid) &&
(e->devid_first == first) &&
(e->devid_last == last))
return true;
}
return false;
}
/*
* This function takes the device specific flags read from the ACPI
* table and sets up the device table entry with that information
*/
static void __init
set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last,
u32 flags, u32 ext_flags)
{
int i;
struct dev_table_entry dte = {};
/* Parse IVHD DTE setting flags and store information */
if (flags) {
struct ivhd_dte_flags *d;
if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last))
return;
d = kzalloc(sizeof(struct ivhd_dte_flags), GFP_KERNEL);
if (!d)
return;
pr_debug("%s: devid range %#x:%#x\n", __func__, first, last);
if (flags & ACPI_DEVFLAG_INITPASS)
set_dte_bit(&dte, DEV_ENTRY_INIT_PASS);
if (flags & ACPI_DEVFLAG_EXTINT)
set_dte_bit(&dte, DEV_ENTRY_EINT_PASS);
if (flags & ACPI_DEVFLAG_NMI)
set_dte_bit(&dte, DEV_ENTRY_NMI_PASS);
if (flags & ACPI_DEVFLAG_SYSMGT1)
set_dte_bit(&dte, DEV_ENTRY_SYSMGT1);
if (flags & ACPI_DEVFLAG_SYSMGT2)
set_dte_bit(&dte, DEV_ENTRY_SYSMGT2);
if (flags & ACPI_DEVFLAG_LINT0)
set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS);
if (flags & ACPI_DEVFLAG_LINT1)
set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS);
/* Apply erratum 63, which needs info in initial_dte */
if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1)
dte.data[0] |= DTE_FLAG_IW;
memcpy(&d->dte, &dte, sizeof(dte));
d->segid = iommu->pci_seg->id;
d->devid_first = first;
d->devid_last = last;
list_add_tail(&d->list, &amd_ivhd_dev_flags_list);
}
for (i = first; i <= last; i++) {
if (flags) {
struct dev_table_entry *dev_table = get_dev_table(iommu);
memcpy(&dev_table[i], &dte, sizeof(dte));
}
amd_iommu_set_rlookup_table(iommu, i);
}
}
static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
u16 devid, u32 flags, u32 ext_flags)
{
if (flags & ACPI_DEVFLAG_INITPASS)
set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
if (flags & ACPI_DEVFLAG_EXTINT)
set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
if (flags & ACPI_DEVFLAG_NMI)
set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
if (flags & ACPI_DEVFLAG_SYSMGT1)
set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
if (flags & ACPI_DEVFLAG_SYSMGT2)
set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
if (flags & ACPI_DEVFLAG_LINT0)
set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
if (flags & ACPI_DEVFLAG_LINT1)
set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
amd_iommu_apply_erratum_63(iommu, devid);
amd_iommu_set_rlookup_table(iommu, devid);
set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags);
}
int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
@ -1239,7 +1278,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
entry->cmd_line = cmd_line;
entry->root_devid = (entry->devid & (~0x7));
pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n",
entry->cmd_line ? "cmd" : "ivrs",
entry->hid, entry->uid, entry->root_devid);
@ -1331,15 +1370,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
switch (e->type) {
case IVHD_DEV_ALL:
DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags);
for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i)
set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags);
set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0);
break;
case IVHD_DEV_SELECT:
DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x "
"flags: %02x\n",
DUMP_printk(" DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@ -1350,8 +1386,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_SELECT_RANGE_START:
DUMP_printk(" DEV_SELECT_RANGE_START\t "
"devid: %04x:%02x:%02x.%x flags: %02x\n",
DUMP_printk(" DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@ -1364,8 +1399,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_ALIAS:
DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x "
"flags: %02x devid_to: %02x:%02x.%x\n",
DUMP_printk(" DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@ -1382,9 +1416,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_ALIAS_RANGE:
DUMP_printk(" DEV_ALIAS_RANGE\t\t "
"devid: %04x:%02x:%02x.%x flags: %02x "
"devid_to: %04x:%02x:%02x.%x\n",
DUMP_printk(" DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@ -1401,8 +1433,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_EXT_SELECT:
DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x "
"flags: %02x ext: %08x\n",
DUMP_printk(" DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@ -1414,8 +1445,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_EXT_SELECT_RANGE:
DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: "
"%04x:%02x:%02x.%x flags: %02x ext: %08x\n",
DUMP_printk(" DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@ -1428,21 +1458,18 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_RANGE_END:
DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n",
DUMP_printk(" DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid));
devid = e->devid;
for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
if (alias) {
if (alias)
pci_seg->alias_table[dev_i] = devid_to;
set_dev_entry_from_acpi(iommu,
devid_to, flags, ext_flags);
}
set_dev_entry_from_acpi(iommu, dev_i,
flags, ext_flags);
}
set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags);
set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags);
break;
case IVHD_DEV_SPECIAL: {
u8 handle, type;
@ -1461,11 +1488,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
else
var = "UNKNOWN";
DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n",
DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n",
var, (int)handle,
seg_id, PCI_BUS_NUM(devid),
PCI_SLOT(devid),
PCI_FUNC(devid));
PCI_FUNC(devid),
e->flags);
ret = add_special_device(type, handle, &devid, false);
if (ret)
@ -1525,11 +1553,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
}
devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n",
DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n",
hid, uid, seg_id,
PCI_BUS_NUM(devid),
PCI_SLOT(devid),
PCI_FUNC(devid));
PCI_FUNC(devid),
e->flags);
flags = e->flags;
@ -1757,13 +1786,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
else
iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
/*
* Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
* GAM also requires GA mode. Therefore, we need to
* check cmpxchg16b support before enabling it.
*/
if (!boot_cpu_has(X86_FEATURE_CX16) ||
((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
/* GAM requires GA mode. */
if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
break;
case 0x11:
@ -1773,13 +1797,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
else
iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
/*
* Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
* XT, GAM also requires GA mode. Therefore, we need to
* check cmpxchg16b support before enabling them.
*/
if (!boot_cpu_has(X86_FEATURE_CX16) ||
((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
/* XT and GAM require GA mode. */
if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) {
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
break;
}
@ -2575,9 +2594,9 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
return;
for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID);
set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID);
if (!amd_iommu_snp_en)
__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION);
set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION);
}
}
@ -2605,8 +2624,7 @@ static void init_device_table(void)
for_each_pci_segment(pci_seg) {
for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
__set_dev_entry_bit(pci_seg->dev_table,
devid, DEV_ENTRY_IRQ_TBL_EN);
set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN);
}
}
@ -3033,6 +3051,11 @@ static int __init early_amd_iommu_init(void)
return -EINVAL;
}
if (!boot_cpu_has(X86_FEATURE_CX16)) {
pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n");
return -EINVAL;
}
/*
* Validate checksum here so we don't need to do it when
* we actually parse the table

View File

@ -83,12 +83,142 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
static void set_dte_entry(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data);
static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid);
static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid);
/****************************************************************************
*
* Helper functions
*
****************************************************************************/
static __always_inline void amd_iommu_atomic128_set(__int128 *ptr, __int128 val)
{
/*
* Note:
* We use arch_cmpxchg128_local() because:
* - Need cmpxchg16b instruction mainly for 128-bit store to DTE
* (not necessary for cmpxchg since this function is already
* protected by a spin_lock for this DTE).
* - Neither need LOCK_PREFIX nor try loop because of the spin_lock.
*/
arch_cmpxchg128_local(ptr, *ptr, val);
}
static void write_dte_upper128(struct dev_table_entry *ptr, struct dev_table_entry *new)
{
struct dev_table_entry old;
old.data128[1] = ptr->data128[1];
/*
* Preserve DTE_DATA2_INTR_MASK. This needs to be
* done here since it requires to be inside
* spin_lock(&dev_data->dte_lock) context.
*/
new->data[2] &= ~DTE_DATA2_INTR_MASK;
new->data[2] |= old.data[2] & DTE_DATA2_INTR_MASK;
amd_iommu_atomic128_set(&ptr->data128[1], new->data128[1]);
}
static void write_dte_lower128(struct dev_table_entry *ptr, struct dev_table_entry *new)
{
amd_iommu_atomic128_set(&ptr->data128[0], new->data128[0]);
}
/*
* Note:
* IOMMU reads the entire Device Table entry in a single 256-bit transaction
* but the driver is programming DTE using 2 128-bit cmpxchg. So, the driver
* need to ensure the following:
* - DTE[V|GV] bit is being written last when setting.
* - DTE[V|GV] bit is being written first when clearing.
*
* This function is used only by code, which updates DMA translation part of the DTE.
* So, only consider control bits related to DMA when updating the entry.
*/
static void update_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data,
struct dev_table_entry *new)
{
unsigned long flags;
struct dev_table_entry *dev_table = get_dev_table(iommu);
struct dev_table_entry *ptr = &dev_table[dev_data->devid];
spin_lock_irqsave(&dev_data->dte_lock, flags);
if (!(ptr->data[0] & DTE_FLAG_V)) {
/* Existing DTE is not valid. */
write_dte_upper128(ptr, new);
write_dte_lower128(ptr, new);
iommu_flush_dte_sync(iommu, dev_data->devid);
} else if (!(new->data[0] & DTE_FLAG_V)) {
/* Existing DTE is valid. New DTE is not valid. */
write_dte_lower128(ptr, new);
write_dte_upper128(ptr, new);
iommu_flush_dte_sync(iommu, dev_data->devid);
} else if (!FIELD_GET(DTE_FLAG_GV, ptr->data[0])) {
/*
* Both DTEs are valid.
* Existing DTE has no guest page table.
*/
write_dte_upper128(ptr, new);
write_dte_lower128(ptr, new);
iommu_flush_dte_sync(iommu, dev_data->devid);
} else if (!FIELD_GET(DTE_FLAG_GV, new->data[0])) {
/*
* Both DTEs are valid.
* Existing DTE has guest page table,
* new DTE has no guest page table,
*/
write_dte_lower128(ptr, new);
write_dte_upper128(ptr, new);
iommu_flush_dte_sync(iommu, dev_data->devid);
} else if (FIELD_GET(DTE_GPT_LEVEL_MASK, ptr->data[2]) !=
FIELD_GET(DTE_GPT_LEVEL_MASK, new->data[2])) {
/*
* Both DTEs are valid and have guest page table,
* but have different number of levels. So, we need
* to upadte both upper and lower 128-bit value, which
* require disabling and flushing.
*/
struct dev_table_entry clear = {};
/* First disable DTE */
write_dte_lower128(ptr, &clear);
iommu_flush_dte_sync(iommu, dev_data->devid);
/* Then update DTE */
write_dte_upper128(ptr, new);
write_dte_lower128(ptr, new);
iommu_flush_dte_sync(iommu, dev_data->devid);
} else {
/*
* Both DTEs are valid and have guest page table,
* and same number of levels. We just need to only
* update the lower 128-bit. So no need to disable DTE.
*/
write_dte_lower128(ptr, new);
}
spin_unlock_irqrestore(&dev_data->dte_lock, flags);
}
static void get_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data,
struct dev_table_entry *dte)
{
unsigned long flags;
struct dev_table_entry *ptr;
struct dev_table_entry *dev_table = get_dev_table(iommu);
ptr = &dev_table[dev_data->devid];
spin_lock_irqsave(&dev_data->dte_lock, flags);
dte->data128[0] = ptr->data128[0];
dte->data128[1] = ptr->data128[1];
spin_unlock_irqrestore(&dev_data->dte_lock, flags);
}
static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom)
{
return (pdom && (pdom->pd_mode == PD_MODE_V2));
@ -209,6 +339,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid)
return NULL;
mutex_init(&dev_data->mutex);
spin_lock_init(&dev_data->dte_lock);
dev_data->devid = devid;
ratelimit_default_init(&dev_data->rs);
@ -216,7 +347,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid)
return dev_data;
}
static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid)
struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid)
{
struct iommu_dev_data *dev_data;
struct llist_node *node;
@ -236,9 +367,11 @@ static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid
static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
{
struct dev_table_entry new;
struct amd_iommu *iommu;
struct dev_table_entry *dev_table;
struct iommu_dev_data *dev_data, *alias_data;
u16 devid = pci_dev_id(pdev);
int ret = 0;
if (devid == alias)
return 0;
@ -247,13 +380,27 @@ static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
if (!iommu)
return 0;
amd_iommu_set_rlookup_table(iommu, alias);
dev_table = get_dev_table(iommu);
memcpy(dev_table[alias].data,
dev_table[devid].data,
sizeof(dev_table[alias].data));
/* Copy the data from pdev */
dev_data = dev_iommu_priv_get(&pdev->dev);
if (!dev_data) {
pr_err("%s : Failed to get dev_data for 0x%x\n", __func__, devid);
ret = -EINVAL;
goto out;
}
get_dte256(iommu, dev_data, &new);
return 0;
/* Setup alias */
alias_data = find_dev_data(iommu, alias);
if (!alias_data) {
pr_err("%s : Failed to get alias dev_data for 0x%x\n", __func__, alias);
ret = -EINVAL;
goto out;
}
update_dte256(iommu, alias_data, &new);
amd_iommu_set_rlookup_table(iommu, alias);
out:
return ret;
}
static void clone_aliases(struct amd_iommu *iommu, struct device *dev)
@ -526,6 +673,12 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
return -ENOMEM;
dev_data->dev = dev;
/*
* The dev_iommu_priv_set() needes to be called before setup_aliases.
* Otherwise, subsequent call to dev_iommu_priv_get() will fail.
*/
dev_iommu_priv_set(dev, dev_data);
setup_aliases(iommu, dev);
/*
@ -539,8 +692,6 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
dev_data->flags = pdev_get_caps(to_pci_dev(dev));
}
dev_iommu_priv_set(dev, dev_data);
return 0;
}
@ -571,10 +722,13 @@ static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev)
static void dump_dte_entry(struct amd_iommu *iommu, u16 devid)
{
int i;
struct dev_table_entry *dev_table = get_dev_table(iommu);
struct dev_table_entry dte;
struct iommu_dev_data *dev_data = find_dev_data(iommu, devid);
get_dte256(iommu, dev_data, &dte);
for (i = 0; i < 4; ++i)
pr_err("DTE[%d]: %016llx\n", i, dev_table[devid].data[i]);
pr_err("DTE[%d]: %016llx\n", i, dte.data[i]);
}
static void dump_command(unsigned long phys_addr)
@ -1261,6 +1415,15 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
return iommu_queue_command(iommu, &cmd);
}
static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid)
{
int ret;
ret = iommu_flush_dte(iommu, devid);
if (!ret)
iommu_completion_wait(iommu);
}
static void amd_iommu_flush_dte_all(struct amd_iommu *iommu)
{
u32 devid;
@ -1826,90 +1989,109 @@ int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid)
return ret;
}
static void make_clear_dte(struct iommu_dev_data *dev_data, struct dev_table_entry *ptr,
struct dev_table_entry *new)
{
/* All existing DTE must have V bit set */
new->data128[0] = DTE_FLAG_V;
new->data128[1] = 0;
}
/*
* Note:
* The old value for GCR3 table and GPT have been cleared from caller.
*/
static void set_dte_gcr3_table(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data,
struct dev_table_entry *target)
{
struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
u64 gcr3;
if (!gcr3_info->gcr3_tbl)
return;
pr_debug("%s: devid=%#x, glx=%#x, gcr3_tbl=%#llx\n",
__func__, dev_data->devid, gcr3_info->glx,
(unsigned long long)gcr3_info->gcr3_tbl);
gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
target->data[0] |= DTE_FLAG_GV |
FIELD_PREP(DTE_GLX, gcr3_info->glx) |
FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12);
if (pdom_is_v2_pgtbl_mode(dev_data->domain))
target->data[0] |= DTE_FLAG_GIOV;
target->data[1] |= FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) |
FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31);
/* Guest page table can only support 4 and 5 levels */
if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL)
target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_5_LEVEL);
else
target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL);
}
static void set_dte_entry(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data)
{
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
u16 devid = dev_data->devid;
u16 domid;
u32 old_domid;
struct dev_table_entry *initial_dte;
struct dev_table_entry new = {};
struct protection_domain *domain = dev_data->domain;
struct dev_table_entry *dev_table = get_dev_table(iommu);
struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
if (gcr3_info && gcr3_info->gcr3_tbl)
domid = dev_data->gcr3_info.domid;
else
domid = domain->id;
if (domain->iop.mode != PAGE_MODE_NONE)
pte_root = iommu_virt_to_phys(domain->iop.root);
make_clear_dte(dev_data, dte, &new);
pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
if (domain->iop.mode != PAGE_MODE_NONE)
new.data[0] = iommu_virt_to_phys(domain->iop.root);
new.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V;
new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V;
/*
* When SNP is enabled, Only set TV bit when IOMMU
* page translation is in use.
* When SNP is enabled, we can only support TV=1 with non-zero domain ID.
* This is prevented by the SNP-enable and IOMMU_DOMAIN_IDENTITY check in
* do_iommu_domain_alloc().
*/
if (!amd_iommu_snp_en || (domid != 0))
pte_root |= DTE_FLAG_TV;
flags = dev_table[devid].data[1];
if (dev_data->ats_enabled)
flags |= DTE_FLAG_IOTLB;
WARN_ON(amd_iommu_snp_en && (domid == 0));
new.data[0] |= DTE_FLAG_TV;
if (dev_data->ppr)
pte_root |= 1ULL << DEV_ENTRY_PPR;
new.data[0] |= 1ULL << DEV_ENTRY_PPR;
if (domain->dirty_tracking)
pte_root |= DTE_FLAG_HAD;
new.data[0] |= DTE_FLAG_HAD;
if (gcr3_info && gcr3_info->gcr3_tbl) {
u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
u64 glx = gcr3_info->glx;
u64 tmp;
if (dev_data->ats_enabled)
new.data[1] |= DTE_FLAG_IOTLB;
pte_root |= DTE_FLAG_GV;
pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;
new.data[1] |= domid;
/* First mask out possible old values for GCR3 table */
tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
flags &= ~tmp;
tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
flags &= ~tmp;
/* Encode GCR3 table into DTE */
tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
pte_root |= tmp;
tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
flags |= tmp;
tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
flags |= tmp;
if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) {
dev_table[devid].data[2] |=
((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT);
}
/* GIOV is supported with V2 page table mode only */
if (pdom_is_v2_pgtbl_mode(domain))
pte_root |= DTE_FLAG_GIOV;
/*
* Restore cached persistent DTE bits, which can be set by information
* in IVRS table. See set_dev_entry_from_acpi().
*/
initial_dte = amd_iommu_get_ivhd_dte_flags(iommu->pci_seg->id, dev_data->devid);
if (initial_dte) {
new.data128[0] |= initial_dte->data128[0];
new.data128[1] |= initial_dte->data128[1];
}
flags &= ~DEV_DOMID_MASK;
flags |= domid;
set_dte_gcr3_table(iommu, dev_data, &new);
old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK;
dev_table[devid].data[1] = flags;
dev_table[devid].data[0] = pte_root;
update_dte256(iommu, dev_data, &new);
/*
* A kdump kernel might be replacing a domain ID that was copied from
@ -1921,19 +2103,16 @@ static void set_dte_entry(struct amd_iommu *iommu,
}
}
static void clear_dte_entry(struct amd_iommu *iommu, u16 devid)
/*
* Clear DMA-remap related flags to block all DMA (blockeded domain)
*/
static void clear_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data)
{
struct dev_table_entry *dev_table = get_dev_table(iommu);
struct dev_table_entry new = {};
struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
/* remove entry from the device table seen by the hardware */
dev_table[devid].data[0] = DTE_FLAG_V;
if (!amd_iommu_snp_en)
dev_table[devid].data[0] |= DTE_FLAG_TV;
dev_table[devid].data[1] &= DTE_FLAG_MASK;
amd_iommu_apply_erratum_63(iommu, devid);
make_clear_dte(dev_data, dte, &new);
update_dte256(iommu, dev_data, &new);
}
/* Update and flush DTE for the given device */
@ -1944,7 +2123,7 @@ static void dev_update_dte(struct iommu_dev_data *dev_data, bool set)
if (set)
set_dte_entry(iommu, dev_data);
else
clear_dte_entry(iommu, dev_data->devid);
clear_dte_entry(iommu, dev_data);
clone_aliases(iommu, dev_data->dev);
device_flush_dte(dev_data);
@ -2475,10 +2654,19 @@ static int blocked_domain_attach_device(struct iommu_domain *domain,
return 0;
}
static int blocked_domain_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid,
struct iommu_domain *old)
{
amd_iommu_remove_dev_pasid(dev, pasid, old);
return 0;
}
static struct iommu_domain blocked_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocked_domain_attach_device,
.set_dev_pasid = blocked_domain_set_dev_pasid,
}
};
@ -2666,12 +2854,12 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
bool enable)
{
struct protection_domain *pdomain = to_pdomain(domain);
struct dev_table_entry *dev_table;
struct dev_table_entry *dte;
struct iommu_dev_data *dev_data;
bool domain_flush = false;
struct amd_iommu *iommu;
unsigned long flags;
u64 pte_root;
u64 new;
spin_lock_irqsave(&pdomain->lock, flags);
if (!(pdomain->dirty_tracking ^ enable)) {
@ -2680,16 +2868,15 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
}
list_for_each_entry(dev_data, &pdomain->dev_list, list) {
spin_lock(&dev_data->dte_lock);
iommu = get_amd_iommu_from_dev_data(dev_data);
dev_table = get_dev_table(iommu);
pte_root = dev_table[dev_data->devid].data[0];
pte_root = (enable ? pte_root | DTE_FLAG_HAD :
pte_root & ~DTE_FLAG_HAD);
dte = &get_dev_table(iommu)[dev_data->devid];
new = dte->data[0];
new = (enable ? new | DTE_FLAG_HAD : new & ~DTE_FLAG_HAD);
dte->data[0] = new;
spin_unlock(&dev_data->dte_lock);
/* Flush device DTE */
dev_table[dev_data->devid].data[0] = pte_root;
device_flush_dte(dev_data);
domain_flush = true;
}
@ -2901,7 +3088,6 @@ const struct iommu_ops amd_iommu_ops = {
.def_domain_type = amd_iommu_def_domain_type,
.dev_enable_feat = amd_iommu_dev_enable_feature,
.dev_disable_feat = amd_iommu_dev_disable_feature,
.remove_dev_pasid = amd_iommu_remove_dev_pasid,
.page_response = amd_iommu_page_response,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = amd_iommu_attach_device,
@ -2956,17 +3142,23 @@ static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
struct irq_remap_table *table)
{
u64 dte;
struct dev_table_entry *dev_table = get_dev_table(iommu);
u64 new;
struct dev_table_entry *dte = &get_dev_table(iommu)[devid];
struct iommu_dev_data *dev_data = search_dev_data(iommu, devid);
dte = dev_table[devid].data[2];
dte &= ~DTE_IRQ_PHYS_ADDR_MASK;
dte |= iommu_virt_to_phys(table->table);
dte |= DTE_IRQ_REMAP_INTCTL;
dte |= DTE_INTTABLEN;
dte |= DTE_IRQ_REMAP_ENABLE;
if (dev_data)
spin_lock(&dev_data->dte_lock);
dev_table[devid].data[2] = dte;
new = READ_ONCE(dte->data[2]);
new &= ~DTE_IRQ_PHYS_ADDR_MASK;
new |= iommu_virt_to_phys(table->table);
new |= DTE_IRQ_REMAP_INTCTL;
new |= DTE_INTTABLEN;
new |= DTE_IRQ_REMAP_ENABLE;
WRITE_ONCE(dte->data[2], new);
if (dev_data)
spin_unlock(&dev_data->dte_lock);
}
static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid)

View File

@ -3016,13 +3016,12 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master,
return ret;
}
static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain)
static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
struct device *dev, ioasid_t pasid,
struct iommu_domain *old_domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct arm_smmu_domain *smmu_domain;
smmu_domain = to_smmu_domain(domain);
mutex_lock(&arm_smmu_asid_lock);
arm_smmu_clear_cd(master, pasid);
@ -3043,6 +3042,7 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
sid_domain->type == IOMMU_DOMAIN_BLOCKED)
sid_domain->ops->attach_dev(sid_domain, dev);
}
return 0;
}
static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
@ -3124,6 +3124,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
static const struct iommu_domain_ops arm_smmu_blocked_ops = {
.attach_dev = arm_smmu_attach_dev_blocked,
.set_dev_pasid = arm_smmu_blocking_set_dev_pasid,
};
static struct iommu_domain arm_smmu_blocked_domain = {
@ -3551,7 +3552,6 @@ static struct iommu_ops arm_smmu_ops = {
.device_group = arm_smmu_device_group,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
.remove_dev_pasid = arm_smmu_remove_dev_pasid,
.dev_enable_feat = arm_smmu_dev_enable_feature,
.dev_disable_feat = arm_smmu_dev_disable_feature,
.page_response = arm_smmu_page_response,

View File

@ -3250,10 +3250,15 @@ static int blocking_domain_attach_dev(struct iommu_domain *domain,
return 0;
}
static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid,
struct iommu_domain *old);
static struct iommu_domain blocking_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocking_domain_attach_dev,
.set_dev_pasid = blocking_domain_set_dev_pasid,
}
};
@ -4099,13 +4104,16 @@ void domain_remove_dev_pasid(struct iommu_domain *domain,
kfree(dev_pasid);
}
static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain)
static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid,
struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
domain_remove_dev_pasid(domain, dev, pasid);
domain_remove_dev_pasid(old, dev, pasid);
return 0;
}
struct dev_pasid_info *
@ -4478,7 +4486,6 @@ const struct iommu_ops intel_iommu_ops = {
.dev_disable_feat = intel_iommu_dev_disable_feat,
.is_attach_deferred = intel_iommu_is_attach_deferred,
.def_domain_type = device_def_domain_type,
.remove_dev_pasid = intel_iommu_remove_dev_pasid,
.pgsize_bitmap = SZ_4K,
.page_response = intel_iommu_page_response,
.default_domain_ops = &(const struct iommu_domain_ops) {

View File

@ -3312,6 +3312,16 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain)
{
const struct iommu_ops *ops = dev_iommu_ops(dev);
struct iommu_domain *blocked_domain = ops->blocked_domain;
WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain,
dev, pasid, domain));
}
static int __iommu_set_group_pasid(struct iommu_domain *domain,
struct iommu_group *group, ioasid_t pasid)
{
@ -3330,11 +3340,9 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
err_revert:
last_gdev = device;
for_each_group_device(group, device) {
const struct iommu_ops *ops = dev_iommu_ops(device->dev);
if (device == last_gdev)
break;
ops->remove_dev_pasid(device->dev, pasid, domain);
iommu_remove_dev_pasid(device->dev, pasid, domain);
}
return ret;
}
@ -3344,12 +3352,9 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
struct iommu_domain *domain)
{
struct group_device *device;
const struct iommu_ops *ops;
for_each_group_device(group, device) {
ops = dev_iommu_ops(device->dev);
ops->remove_dev_pasid(device->dev, pasid, domain);
}
for_each_group_device(group, device)
iommu_remove_dev_pasid(device->dev, pasid, domain);
}
/*
@ -3368,16 +3373,20 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
struct group_device *device;
const struct iommu_ops *ops;
int ret;
if (!domain->ops->set_dev_pasid)
return -EOPNOTSUPP;
if (!group)
return -ENODEV;
if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner ||
pasid == IOMMU_NO_PASID)
ops = dev_iommu_ops(dev);
if (!domain->ops->set_dev_pasid ||
!ops->blocked_domain ||
!ops->blocked_domain->ops->set_dev_pasid)
return -EOPNOTSUPP;
if (ops != domain->owner || pasid == IOMMU_NO_PASID)
return -EINVAL;
mutex_lock(&group->mutex);

View File

@ -11,18 +11,43 @@
*/
#include <linux/kernel.h>
#include <linux/msi.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include "iommu-bits.h"
#include "iommu.h"
static void riscv_iommu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
{
struct device *dev = msi_desc_to_dev(desc);
struct riscv_iommu_device *iommu = dev_get_drvdata(dev);
u16 idx = desc->msi_index;
u64 addr;
addr = ((u64)msg->address_hi << 32) | msg->address_lo;
if (addr != (addr & RISCV_IOMMU_MSI_CFG_TBL_ADDR)) {
dev_err_once(dev,
"uh oh, the IOMMU can't send MSIs to 0x%llx, sending to 0x%llx instead\n",
addr, addr & RISCV_IOMMU_MSI_CFG_TBL_ADDR);
}
addr &= RISCV_IOMMU_MSI_CFG_TBL_ADDR;
riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_ADDR(idx), addr);
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_DATA(idx), msg->data);
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_CTRL(idx), 0);
}
static int riscv_iommu_platform_probe(struct platform_device *pdev)
{
enum riscv_iommu_igs_settings igs;
struct device *dev = &pdev->dev;
struct riscv_iommu_device *iommu = NULL;
struct resource *res = NULL;
int vec;
int vec, ret;
iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
if (!iommu)
@ -40,16 +65,6 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
iommu->caps = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAPABILITIES);
iommu->fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL);
/* For now we only support WSI */
switch (FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps)) {
case RISCV_IOMMU_CAPABILITIES_IGS_WSI:
case RISCV_IOMMU_CAPABILITIES_IGS_BOTH:
break;
default:
return dev_err_probe(dev, -ENODEV,
"unable to use wire-signaled interrupts\n");
}
iommu->irqs_count = platform_irq_count(pdev);
if (iommu->irqs_count <= 0)
return dev_err_probe(dev, -ENODEV,
@ -57,13 +72,58 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
if (iommu->irqs_count > RISCV_IOMMU_INTR_COUNT)
iommu->irqs_count = RISCV_IOMMU_INTR_COUNT;
for (vec = 0; vec < iommu->irqs_count; vec++)
iommu->irqs[vec] = platform_get_irq(pdev, vec);
igs = FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps);
switch (igs) {
case RISCV_IOMMU_CAPABILITIES_IGS_BOTH:
case RISCV_IOMMU_CAPABILITIES_IGS_MSI:
if (is_of_node(dev->fwnode))
of_msi_configure(dev, to_of_node(dev->fwnode));
/* Enable wire-signaled interrupts, fctl.WSI */
if (!(iommu->fctl & RISCV_IOMMU_FCTL_WSI)) {
iommu->fctl |= RISCV_IOMMU_FCTL_WSI;
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl);
if (!dev_get_msi_domain(dev)) {
dev_warn(dev, "failed to find an MSI domain\n");
goto msi_fail;
}
ret = platform_device_msi_init_and_alloc_irqs(dev, iommu->irqs_count,
riscv_iommu_write_msi_msg);
if (ret) {
dev_warn(dev, "failed to allocate MSIs\n");
goto msi_fail;
}
for (vec = 0; vec < iommu->irqs_count; vec++)
iommu->irqs[vec] = msi_get_virq(dev, vec);
/* Enable message-signaled interrupts, fctl.WSI */
if (iommu->fctl & RISCV_IOMMU_FCTL_WSI) {
iommu->fctl ^= RISCV_IOMMU_FCTL_WSI;
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl);
}
dev_info(dev, "using MSIs\n");
break;
msi_fail:
if (igs != RISCV_IOMMU_CAPABILITIES_IGS_BOTH) {
return dev_err_probe(dev, -ENODEV,
"unable to use wire-signaled interrupts\n");
}
fallthrough;
case RISCV_IOMMU_CAPABILITIES_IGS_WSI:
for (vec = 0; vec < iommu->irqs_count; vec++)
iommu->irqs[vec] = platform_get_irq(pdev, vec);
/* Enable wire-signaled interrupts, fctl.WSI */
if (!(iommu->fctl & RISCV_IOMMU_FCTL_WSI)) {
iommu->fctl |= RISCV_IOMMU_FCTL_WSI;
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl);
}
dev_info(dev, "using wire-signaled interrupts\n");
break;
default:
return dev_err_probe(dev, -ENODEV, "invalid IGS\n");
}
return riscv_iommu_init(iommu);
@ -71,7 +131,13 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
static void riscv_iommu_platform_remove(struct platform_device *pdev)
{
riscv_iommu_remove(dev_get_drvdata(&pdev->dev));
struct riscv_iommu_device *iommu = dev_get_drvdata(&pdev->dev);
bool msi = !(iommu->fctl & RISCV_IOMMU_FCTL_WSI);
riscv_iommu_remove(iommu);
if (msi)
platform_device_msi_free_irqs_all(&pdev->dev);
};
static const struct of_device_id riscv_iommu_of_match[] = {

View File

@ -587,9 +587,6 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
* - IOMMU_DOMAIN_DMA: must use a dma domain
* - 0: use the default setting
* @default_domain_ops: the default ops for domains
* @remove_dev_pasid: Remove any translation configurations of a specific
* pasid, so that any DMA transactions with this pasid
* will be blocked by the hardware.
* @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind
* the @dev, as the set of virtualization resources shared/passed
* to user space IOMMU instance. And associate it with a nesting
@ -647,8 +644,6 @@ struct iommu_ops {
struct iommu_page_response *msg);
int (*def_domain_type)(struct device *dev);
void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain);
struct iommufd_viommu *(*viommu_alloc)(
struct device *dev, struct iommu_domain *parent_domain,