From 06c375053cefc3a2f383d200596abe5ab3fb35f9 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 13 Apr 2024 00:25:12 +0000 Subject: [PATCH 01/11] iommu/vt-d: add wrapper functions for page allocations In order to improve observability and accountability of IOMMU layer, we must account the number of pages that are allocated by functions that are calling directly into buddy allocator. This is achieved by first wrapping the allocation related functions into a separate inline functions in new file: drivers/iommu/iommu-pages.h Convert all page allocation calls under iommu/intel to use these new functions. Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240413002522.1101315-2-pasha.tatashin@soleen.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 16 +-- drivers/iommu/intel/iommu.c | 47 +++------ drivers/iommu/intel/iommu.h | 2 - drivers/iommu/intel/irq_remapping.c | 16 +-- drivers/iommu/intel/pasid.c | 18 ++-- drivers/iommu/intel/svm.c | 11 +- drivers/iommu/iommu-pages.h | 154 ++++++++++++++++++++++++++++ 7 files changed, 201 insertions(+), 63 deletions(-) create mode 100644 drivers/iommu/iommu-pages.h diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 36d7427b1202..87ad996e5257 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -32,6 +32,7 @@ #include "iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" #include "perf.h" #include "trace.h" #include "perfmon.h" @@ -1187,7 +1188,7 @@ static void free_iommu(struct intel_iommu *iommu) } if (iommu->qi) { - free_page((unsigned long)iommu->qi->desc); + iommu_free_page(iommu->qi->desc); kfree(iommu->qi->desc_status); kfree(iommu->qi); } @@ -1755,7 +1756,8 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) int dmar_enable_qi(struct intel_iommu *iommu) { struct q_inval *qi; - struct page *desc_page; + void *desc; + int order; if (!ecap_qis(iommu->ecap)) return -ENOENT; @@ -1776,19 +1778,19 @@ int dmar_enable_qi(struct intel_iommu *iommu) * Need two pages to accommodate 256 descriptors of 256 bits each * if the remapping hardware supports scalable mode translation. */ - desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, - !!ecap_smts(iommu->ecap)); - if (!desc_page) { + order = ecap_smts(iommu->ecap) ? 1 : 0; + desc = iommu_alloc_pages_node(iommu->node, GFP_ATOMIC, order); + if (!desc) { kfree(qi); iommu->qi = NULL; return -ENOMEM; } - qi->desc = page_address(desc_page); + qi->desc = desc; qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC); if (!qi->desc_status) { - free_page((unsigned long) qi->desc); + iommu_free_page(qi->desc); kfree(qi); iommu->qi = NULL; return -ENOMEM; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a7ecd90303dc..daaa7a22595e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -27,6 +27,7 @@ #include "iommu.h" #include "../dma-iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" #include "pasid.h" #include "cap_audit.h" #include "perfmon.h" @@ -298,22 +299,6 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -void *alloc_pgtable_page(int node, gfp_t gfp) -{ - struct page *page; - void *vaddr = NULL; - - page = alloc_pages_node(node, gfp | __GFP_ZERO, 0); - if (page) - vaddr = page_address(page); - return vaddr; -} - -void free_pgtable_page(void *vaddr) -{ - free_page((unsigned long)vaddr); -} - static int domain_type_is_si(struct dmar_domain *domain) { return domain->domain.type == IOMMU_DOMAIN_IDENTITY; @@ -545,7 +530,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, if (!alloc) return NULL; - context = alloc_pgtable_page(iommu->node, GFP_ATOMIC); + context = iommu_alloc_page_node(iommu->node, GFP_ATOMIC); if (!context) return NULL; @@ -719,17 +704,17 @@ static void free_context_table(struct intel_iommu *iommu) for (i = 0; i < ROOT_ENTRY_NR; i++) { context = iommu_context_addr(iommu, i, 0, 0); if (context) - free_pgtable_page(context); + iommu_free_page(context); if (!sm_supported(iommu)) continue; context = iommu_context_addr(iommu, i, 0x80, 0); if (context) - free_pgtable_page(context); + iommu_free_page(context); } - free_pgtable_page(iommu->root_entry); + iommu_free_page(iommu->root_entry); iommu->root_entry = NULL; } @@ -867,7 +852,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(domain->nid, gfp); + tmp_page = iommu_alloc_page_node(domain->nid, gfp); if (!tmp_page) return NULL; @@ -879,7 +864,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (cmpxchg64(&pte->val, 0ULL, pteval)) /* Someone else set it while we were thinking; use theirs. */ - free_pgtable_page(tmp_page); + iommu_free_page(tmp_page); else domain_flush_cache(domain, pte, sizeof(*pte)); } @@ -992,7 +977,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level, last_pfn < level_pfn + level_size(level) - 1)) { dma_clear_pte(pte); domain_flush_cache(domain, pte, sizeof(*pte)); - free_pgtable_page(level_pte); + iommu_free_page(level_pte); } next: pfn += level_size(level); @@ -1016,7 +1001,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, /* free pgd */ if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { - free_pgtable_page(domain->pgd); + iommu_free_page(domain->pgd); domain->pgd = NULL; } } @@ -1118,7 +1103,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - root = alloc_pgtable_page(iommu->node, GFP_ATOMIC); + root = iommu_alloc_page_node(iommu->node, GFP_ATOMIC); if (!root) { pr_err("Allocating root entry for %s failed\n", iommu->name); @@ -1841,7 +1826,7 @@ static void domain_exit(struct dmar_domain *domain) LIST_HEAD(freelist); domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist); - put_pages_list(&freelist); + iommu_put_pages_list(&freelist); } if (WARN_ON(!list_empty(&domain->devices))) @@ -2497,7 +2482,7 @@ static int copy_context_table(struct intel_iommu *iommu, if (!old_ce) goto out; - new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL); + new_ce = iommu_alloc_page_node(iommu->node, GFP_KERNEL); if (!new_ce) goto out_unmap; @@ -3426,7 +3411,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, start_vpfn, mhp->nr_pages, list_empty(&freelist), 0); rcu_read_unlock(); - put_pages_list(&freelist); + iommu_put_pages_list(&freelist); } break; } @@ -3833,7 +3818,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->max_addr = 0; /* always allocate the top pgd */ - domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC); + domain->pgd = iommu_alloc_page_node(domain->nid, GFP_ATOMIC); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); @@ -3987,7 +3972,7 @@ int prepare_domain_attach_device(struct iommu_domain *domain, pte = dmar_domain->pgd; if (dma_pte_present(pte)) { dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte)); - free_pgtable_page(pte); + iommu_free_page(pte); } dmar_domain->agaw--; } @@ -4141,7 +4126,7 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, if (dmar_domain->nested_parent) parent_domain_flush(dmar_domain, start_pfn, nrpages, list_empty(&gather->freelist)); - put_pages_list(&gather->freelist); + iommu_put_pages_list(&gather->freelist); } static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 404d2476a877..8d081d8c6f41 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1085,8 +1085,6 @@ void domain_update_iommu_cap(struct dmar_domain *domain); int dmar_ir_support(void); -void *alloc_pgtable_page(int node, gfp_t gfp); -void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, const struct iommu_user_data *user_data); diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 566297bc87dd..39cd9626eb8d 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -22,6 +22,7 @@ #include "iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" #include "cap_audit.h" enum irq_mode { @@ -527,7 +528,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) struct ir_table *ir_table; struct fwnode_handle *fn; unsigned long *bitmap; - struct page *pages; + void *ir_table_base; if (iommu->ir_table) return 0; @@ -536,9 +537,9 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) if (!ir_table) return -ENOMEM; - pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, - INTR_REMAP_PAGE_ORDER); - if (!pages) { + ir_table_base = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, + INTR_REMAP_PAGE_ORDER); + if (!ir_table_base) { pr_err("IR%d: failed to allocate pages of order %d\n", iommu->seq_id, INTR_REMAP_PAGE_ORDER); goto out_free_table; @@ -573,7 +574,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) else iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops; - ir_table->base = page_address(pages); + ir_table->base = ir_table_base; ir_table->bitmap = bitmap; iommu->ir_table = ir_table; @@ -622,7 +623,7 @@ out_free_fwnode: out_free_bitmap: bitmap_free(bitmap); out_free_pages: - __free_pages(pages, INTR_REMAP_PAGE_ORDER); + iommu_free_pages(ir_table_base, INTR_REMAP_PAGE_ORDER); out_free_table: kfree(ir_table); @@ -643,8 +644,7 @@ static void intel_teardown_irq_remapping(struct intel_iommu *iommu) irq_domain_free_fwnode(fn); iommu->ir_domain = NULL; } - free_pages((unsigned long)iommu->ir_table->base, - INTR_REMAP_PAGE_ORDER); + iommu_free_pages(iommu->ir_table->base, INTR_REMAP_PAGE_ORDER); bitmap_free(iommu->ir_table->bitmap); kfree(iommu->ir_table); iommu->ir_table = NULL; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 11f0b856d74c..abce19e2ad6f 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -20,6 +20,7 @@ #include "iommu.h" #include "pasid.h" +#include "../iommu-pages.h" /* * Intel IOMMU system wide PASID name space: @@ -38,7 +39,7 @@ int intel_pasid_alloc_table(struct device *dev) { struct device_domain_info *info; struct pasid_table *pasid_table; - struct page *pages; + struct pasid_dir_entry *dir; u32 max_pasid = 0; int order, size; @@ -59,14 +60,13 @@ int intel_pasid_alloc_table(struct device *dev) size = max_pasid >> (PASID_PDE_SHIFT - 3); order = size ? get_order(size) : 0; - pages = alloc_pages_node(info->iommu->node, - GFP_KERNEL | __GFP_ZERO, order); - if (!pages) { + dir = iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order); + if (!dir) { kfree(pasid_table); return -ENOMEM; } - pasid_table->table = page_address(pages); + pasid_table->table = dir; pasid_table->order = order; pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); info->pasid_table = pasid_table; @@ -97,10 +97,10 @@ void intel_pasid_free_table(struct device *dev) max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; for (i = 0; i < max_pde; i++) { table = get_pasid_table_from_pde(&dir[i]); - free_pgtable_page(table); + iommu_free_page(table); } - free_pages((unsigned long)pasid_table->table, pasid_table->order); + iommu_free_pages(pasid_table->table, pasid_table->order); kfree(pasid_table); } @@ -146,7 +146,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { - entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC); + entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC); if (!entries) return NULL; @@ -158,7 +158,7 @@ retry: */ if (cmpxchg64(&dir[dir_index].val, 0ULL, (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { - free_pgtable_page(entries); + iommu_free_page(entries); goto retry; } if (!ecap_coherent(info->iommu->ecap)) { diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index ee3b469e2da1..71c1b2a0ca16 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -22,6 +22,7 @@ #include "iommu.h" #include "pasid.h" #include "perf.h" +#include "../iommu-pages.h" #include "trace.h" static irqreturn_t prq_event_thread(int irq, void *d); @@ -63,16 +64,14 @@ svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) int intel_svm_enable_prq(struct intel_iommu *iommu) { struct iopf_queue *iopfq; - struct page *pages; int irq, ret; - pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); - if (!pages) { + iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER); + if (!iommu->prq) { pr_warn("IOMMU: %s: Failed to allocate page request queue\n", iommu->name); return -ENOMEM; } - iommu->prq = page_address(pages); irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu); if (irq <= 0) { @@ -117,7 +116,7 @@ free_hwirq: dmar_free_hwirq(irq); iommu->pr_irq = 0; free_prq: - free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu_free_pages(iommu->prq, PRQ_ORDER); iommu->prq = NULL; return ret; @@ -140,7 +139,7 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) iommu->iopf_queue = NULL; } - free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu_free_pages(iommu->prq, PRQ_ORDER); iommu->prq = NULL; return 0; diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h new file mode 100644 index 000000000000..5a222d0ad25c --- /dev/null +++ b/drivers/iommu/iommu-pages.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, Google LLC. + * Pasha Tatashin + */ + +#ifndef __IOMMU_PAGES_H +#define __IOMMU_PAGES_H + +#include +#include +#include + +/* + * All page allocations that should be reported to as "iommu-pagetables" to + * userspace must use one of the functions below. This includes allocations of + * page-tables and other per-iommu_domain configuration structures. + * + * This is necessary for the proper accounting as IOMMU state can be rather + * large, i.e. multiple gigabytes in size. + */ + +/** + * __iommu_alloc_pages - allocate a zeroed page of a given order. + * @gfp: buddy allocator flags + * @order: page order + * + * returns the head struct page of the allocated page. + */ +static inline struct page *__iommu_alloc_pages(gfp_t gfp, int order) +{ + struct page *page; + + page = alloc_pages(gfp | __GFP_ZERO, order); + if (unlikely(!page)) + return NULL; + + return page; +} + +/** + * __iommu_free_pages - free page of a given order + * @page: head struct page of the page + * @order: page order + */ +static inline void __iommu_free_pages(struct page *page, int order) +{ + if (!page) + return; + + __free_pages(page, order); +} + +/** + * iommu_alloc_pages_node - allocate a zeroed page of a given order from + * specific NUMA node. + * @nid: memory NUMA node id + * @gfp: buddy allocator flags + * @order: page order + * + * returns the virtual address of the allocated page + */ +static inline void *iommu_alloc_pages_node(int nid, gfp_t gfp, int order) +{ + struct page *page = alloc_pages_node(nid, gfp | __GFP_ZERO, order); + + if (unlikely(!page)) + return NULL; + + return page_address(page); +} + +/** + * iommu_alloc_pages - allocate a zeroed page of a given order + * @gfp: buddy allocator flags + * @order: page order + * + * returns the virtual address of the allocated page + */ +static inline void *iommu_alloc_pages(gfp_t gfp, int order) +{ + struct page *page = __iommu_alloc_pages(gfp, order); + + if (unlikely(!page)) + return NULL; + + return page_address(page); +} + +/** + * iommu_alloc_page_node - allocate a zeroed page at specific NUMA node. + * @nid: memory NUMA node id + * @gfp: buddy allocator flags + * + * returns the virtual address of the allocated page + */ +static inline void *iommu_alloc_page_node(int nid, gfp_t gfp) +{ + return iommu_alloc_pages_node(nid, gfp, 0); +} + +/** + * iommu_alloc_page - allocate a zeroed page + * @gfp: buddy allocator flags + * + * returns the virtual address of the allocated page + */ +static inline void *iommu_alloc_page(gfp_t gfp) +{ + return iommu_alloc_pages(gfp, 0); +} + +/** + * iommu_free_pages - free page of a given order + * @virt: virtual address of the page to be freed. + * @order: page order + */ +static inline void iommu_free_pages(void *virt, int order) +{ + if (!virt) + return; + + __iommu_free_pages(virt_to_page(virt), order); +} + +/** + * iommu_free_page - free page + * @virt: virtual address of the page to be freed. + */ +static inline void iommu_free_page(void *virt) +{ + iommu_free_pages(virt, 0); +} + +/** + * iommu_put_pages_list - free a list of pages. + * @page: the head of the lru list to be freed. + * + * There are no locking requirement for these pages, as they are going to be + * put on a free list as soon as refcount reaches 0. Pages are put on this LRU + * list once they are removed from the IOMMU page tables. However, they can + * still be access through debugfs. + */ +static inline void iommu_put_pages_list(struct list_head *page) +{ + while (!list_empty(page)) { + struct page *p = list_entry(page->prev, struct page, lru); + + list_del(&p->lru); + put_page(p); + } +} + +#endif /* __IOMMU_PAGES_H */ From 95b18ef9c69157ded5ece1136377cf8123b597f0 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 13 Apr 2024 00:25:13 +0000 Subject: [PATCH 02/11] iommu/dma: use iommu_put_pages_list() to releae freelist Free the IOMMU page tables via iommu_put_pages_list(). The page tables were allocated via iommu_alloc_* functions in architecture specific places, but are released in dma-iommu if the freelist is gathered during map/unmap operations into iommu_iotlb_gather data structure. Currently, only iommu/intel that does that. Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Link: https://lore.kernel.org/r/20240413002522.1101315-3-pasha.tatashin@soleen.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index e4cb26f6a943..16a7c4a4f3db 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -32,6 +32,7 @@ #include #include "dma-iommu.h" +#include "iommu-pages.h" struct iommu_dma_msi_page { struct list_head list; @@ -156,7 +157,7 @@ static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq if (fq->entries[idx].counter >= counter) break; - put_pages_list(&fq->entries[idx].freelist); + iommu_put_pages_list(&fq->entries[idx].freelist); free_iova_fast(&cookie->iovad, fq->entries[idx].iova_pfn, fq->entries[idx].pages); @@ -254,7 +255,7 @@ static void iommu_dma_free_fq_single(struct iova_fq *fq) int idx; fq_ring_for_each(idx, fq) - put_pages_list(&fq->entries[idx].freelist); + iommu_put_pages_list(&fq->entries[idx].freelist); vfree(fq); } @@ -267,7 +268,7 @@ static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq) struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu); fq_ring_for_each(idx, fq) - put_pages_list(&fq->entries[idx].freelist); + iommu_put_pages_list(&fq->entries[idx].freelist); } free_percpu(percpu_fq); From 75114cbaa136fc50de3a339c85124b20466a7c46 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 13 Apr 2024 00:25:14 +0000 Subject: [PATCH 03/11] iommu/amd: use page allocation function provided by iommu-pages.h Convert iommu/amd/* files to use the new page allocation functions provided in iommu-pages.h. Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240413002522.1101315-4-pasha.tatashin@soleen.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 8 --- drivers/iommu/amd/init.c | 91 ++++++++++++++----------------- drivers/iommu/amd/io_pgtable.c | 13 +++-- drivers/iommu/amd/io_pgtable_v2.c | 18 +++--- drivers/iommu/amd/iommu.c | 11 ++-- 5 files changed, 62 insertions(+), 79 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index f482aab420f7..e01409037206 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -134,14 +134,6 @@ static inline int get_pci_sbdf_id(struct pci_dev *pdev) return PCI_SEG_DEVID_TO_SBDF(seg, devid); } -static inline void *alloc_pgtable_page(int nid, gfp_t gfp) -{ - struct page *page; - - page = alloc_pages_node(nid, gfp | __GFP_ZERO, 0); - return page ? page_address(page) : NULL; -} - /* * This must be called after device probe completes. During probe * use rlookup_amd_iommu() get the iommu. diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index ac6754a85f35..40b3b9cffade 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -36,6 +36,7 @@ #include "amd_iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" /* * definitions for the ACPI scanning code @@ -649,8 +650,8 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_ /* Allocate per PCI segment device table */ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32, - get_order(pci_seg->dev_table_size)); + pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, + get_order(pci_seg->dev_table_size)); if (!pci_seg->dev_table) return -ENOMEM; @@ -659,17 +660,16 @@ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) { - free_pages((unsigned long)pci_seg->dev_table, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->dev_table, + get_order(pci_seg->dev_table_size)); pci_seg->dev_table = NULL; } /* Allocate per PCI segment IOMMU rlookup table. */ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->rlookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(pci_seg->rlookup_table_size)); + pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL, + get_order(pci_seg->rlookup_table_size)); if (pci_seg->rlookup_table == NULL) return -ENOMEM; @@ -678,16 +678,15 @@ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { - free_pages((unsigned long)pci_seg->rlookup_table, - get_order(pci_seg->rlookup_table_size)); + iommu_free_pages(pci_seg->rlookup_table, + get_order(pci_seg->rlookup_table_size)); pci_seg->rlookup_table = NULL; } static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->irq_lookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(pci_seg->rlookup_table_size)); + pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL, + get_order(pci_seg->rlookup_table_size)); kmemleak_alloc(pci_seg->irq_lookup_table, pci_seg->rlookup_table_size, 1, GFP_KERNEL); if (pci_seg->irq_lookup_table == NULL) @@ -699,8 +698,8 @@ static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_se static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { kmemleak_free(pci_seg->irq_lookup_table); - free_pages((unsigned long)pci_seg->irq_lookup_table, - get_order(pci_seg->rlookup_table_size)); + iommu_free_pages(pci_seg->irq_lookup_table, + get_order(pci_seg->rlookup_table_size)); pci_seg->irq_lookup_table = NULL; } @@ -708,8 +707,8 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) { int i; - pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL, - get_order(pci_seg->alias_table_size)); + pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL, + get_order(pci_seg->alias_table_size)); if (!pci_seg->alias_table) return -ENOMEM; @@ -724,8 +723,8 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) { - free_pages((unsigned long)pci_seg->alias_table, - get_order(pci_seg->alias_table_size)); + iommu_free_pages(pci_seg->alias_table, + get_order(pci_seg->alias_table_size)); pci_seg->alias_table = NULL; } @@ -736,8 +735,8 @@ static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) */ static int __init alloc_command_buffer(struct amd_iommu *iommu) { - iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(CMD_BUFFER_SIZE)); + iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL, + get_order(CMD_BUFFER_SIZE)); return iommu->cmd_buf ? 0 : -ENOMEM; } @@ -845,19 +844,19 @@ static void iommu_disable_command_buffer(struct amd_iommu *iommu) static void __init free_command_buffer(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); + iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, size_t size) { int order = get_order(size); - void *buf = (void *)__get_free_pages(gfp, order); + void *buf = iommu_alloc_pages(gfp, order); if (buf && check_feature(FEATURE_SNP) && set_memory_4k((unsigned long)buf, (1 << order))) { - free_pages((unsigned long)buf, order); + iommu_free_pages(buf, order); buf = NULL; } @@ -867,7 +866,7 @@ static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, /* allocates the memory where the IOMMU will log its events to */ static int __init alloc_event_buffer(struct amd_iommu *iommu) { - iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, + iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL, EVT_BUFFER_SIZE); return iommu->evt_buf ? 0 : -ENOMEM; @@ -901,14 +900,13 @@ static void iommu_disable_event_buffer(struct amd_iommu *iommu) static void __init free_event_buffer(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); + iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); } /* allocates the memory where the IOMMU will log its events to */ static int __init alloc_ppr_log(struct amd_iommu *iommu) { - iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, - PPR_LOG_SIZE); + iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL, PPR_LOG_SIZE); return iommu->ppr_log ? 0 : -ENOMEM; } @@ -937,14 +935,14 @@ static void iommu_enable_ppr_log(struct amd_iommu *iommu) static void __init free_ppr_log(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE)); + iommu_free_pages(iommu->ppr_log, get_order(PPR_LOG_SIZE)); } static void free_ga_log(struct amd_iommu *iommu) { #ifdef CONFIG_IRQ_REMAP - free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE)); - free_pages((unsigned long)iommu->ga_log_tail, get_order(8)); + iommu_free_pages(iommu->ga_log, get_order(GA_LOG_SIZE)); + iommu_free_pages(iommu->ga_log_tail, get_order(8)); #endif } @@ -989,13 +987,11 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) return 0; - iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(GA_LOG_SIZE)); + iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE)); if (!iommu->ga_log) goto err_out; - iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(8)); + iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8)); if (!iommu->ga_log_tail) goto err_out; @@ -1008,7 +1004,7 @@ err_out: static int __init alloc_cwwb_sem(struct amd_iommu *iommu) { - iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1); + iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1); return iommu->cmd_sem ? 0 : -ENOMEM; } @@ -1016,7 +1012,7 @@ static int __init alloc_cwwb_sem(struct amd_iommu *iommu) static void __init free_cwwb_sem(struct amd_iommu *iommu) { if (iommu->cmd_sem) - free_page((unsigned long)iommu->cmd_sem); + iommu_free_page((void *)iommu->cmd_sem); } static void iommu_enable_xt(struct amd_iommu *iommu) @@ -1081,7 +1077,6 @@ static bool __copy_device_table(struct amd_iommu *iommu) u32 lo, hi, devid, old_devtb_size; phys_addr_t old_devtb_phys; u16 dom_id, dte_v, irq_v; - gfp_t gfp_flag; u64 tmp; /* Each IOMMU use separate device table with the same size */ @@ -1115,9 +1110,8 @@ static bool __copy_device_table(struct amd_iommu *iommu) if (!old_devtb) return false; - gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32; - pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, - get_order(pci_seg->dev_table_size)); + pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, + get_order(pci_seg->dev_table_size)); if (pci_seg->old_dev_tbl_cpy == NULL) { pr_err("Failed to allocate memory for copying old device table!\n"); memunmap(old_devtb); @@ -2805,8 +2799,8 @@ static void early_enable_iommus(void) for_each_pci_segment(pci_seg) { if (pci_seg->old_dev_tbl_cpy != NULL) { - free_pages((unsigned long)pci_seg->old_dev_tbl_cpy, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->old_dev_tbl_cpy, + get_order(pci_seg->dev_table_size)); pci_seg->old_dev_tbl_cpy = NULL; } } @@ -2819,8 +2813,8 @@ static void early_enable_iommus(void) pr_info("Copied DEV table from previous kernel.\n"); for_each_pci_segment(pci_seg) { - free_pages((unsigned long)pci_seg->dev_table, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->dev_table, + get_order(pci_seg->dev_table_size)); pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; } @@ -3022,8 +3016,8 @@ static bool __init check_ioapic_information(void) static void __init free_dma_resources(void) { - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, - get_order(MAX_DOMAIN_ID/8)); + iommu_free_pages(amd_iommu_pd_alloc_bitmap, + get_order(MAX_DOMAIN_ID / 8)); amd_iommu_pd_alloc_bitmap = NULL; free_unity_maps(); @@ -3095,9 +3089,8 @@ static int __init early_amd_iommu_init(void) /* Device table - directly used by all IOMMUs */ ret = -ENOMEM; - amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(MAX_DOMAIN_ID/8)); + amd_iommu_pd_alloc_bitmap = iommu_alloc_pages(GFP_KERNEL, + get_order(MAX_DOMAIN_ID / 8)); if (amd_iommu_pd_alloc_bitmap == NULL) goto out; diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 2a0d1e97e52f..9d9a7fde59e7 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -22,6 +22,7 @@ #include "amd_iommu_types.h" #include "amd_iommu.h" +#include "../iommu-pages.h" static void v1_tlb_flush_all(void *cookie) { @@ -156,7 +157,7 @@ static bool increase_address_space(struct protection_domain *domain, bool ret = true; u64 *pte; - pte = alloc_pgtable_page(domain->nid, gfp); + pte = iommu_alloc_page_node(domain->nid, gfp); if (!pte) return false; @@ -187,7 +188,7 @@ static bool increase_address_space(struct protection_domain *domain, out: spin_unlock_irqrestore(&domain->lock, flags); - free_page((unsigned long)pte); + iommu_free_page(pte); return ret; } @@ -250,7 +251,7 @@ static u64 *alloc_pte(struct protection_domain *domain, if (!IOMMU_PTE_PRESENT(__pte) || pte_level == PAGE_MODE_NONE) { - page = alloc_pgtable_page(domain->nid, gfp); + page = iommu_alloc_page_node(domain->nid, gfp); if (!page) return NULL; @@ -259,7 +260,7 @@ static u64 *alloc_pte(struct protection_domain *domain, /* pte could have been changed somewhere. */ if (!try_cmpxchg64(pte, &__pte, __npte)) - free_page((unsigned long)page); + iommu_free_page(page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; @@ -431,7 +432,7 @@ out: } /* Everything flushed out, free pages now */ - put_pages_list(&freelist); + iommu_put_pages_list(&freelist); return ret; } @@ -580,7 +581,7 @@ static void v1_free_pgtable(struct io_pgtable *iop) /* Make changes visible to IOMMUs */ amd_iommu_domain_update(dom); - put_pages_list(&freelist); + iommu_put_pages_list(&freelist); } static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 93489d2db4e8..78ac37c5ccc1 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -18,6 +18,7 @@ #include "amd_iommu_types.h" #include "amd_iommu.h" +#include "../iommu-pages.h" #define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */ #define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */ @@ -99,11 +100,6 @@ static inline int page_size_to_level(u64 pg_size) return PAGE_MODE_1_LEVEL; } -static inline void free_pgtable_page(u64 *pt) -{ - free_page((unsigned long)pt); -} - static void free_pgtable(u64 *pt, int level) { u64 *p; @@ -125,10 +121,10 @@ static void free_pgtable(u64 *pt, int level) if (level > 2) free_pgtable(p, level - 1); else - free_pgtable_page(p); + iommu_free_page(p); } - free_pgtable_page(pt); + iommu_free_page(pt); } /* Allocate page table */ @@ -156,14 +152,14 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, } if (!IOMMU_PTE_PRESENT(__pte)) { - page = alloc_pgtable_page(nid, gfp); + page = iommu_alloc_page_node(nid, gfp); if (!page) return NULL; __npte = set_pgtable_attr(page); /* pte could have been changed somewhere. */ if (cmpxchg64(pte, __pte, __npte) != __pte) - free_pgtable_page(page); + iommu_free_page(page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; @@ -185,7 +181,7 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, if (pg_size == IOMMU_PAGE_SIZE_1G) free_pgtable(__pte, end_level - 1); else if (pg_size == IOMMU_PAGE_SIZE_2M) - free_pgtable_page(__pte); + iommu_free_page(__pte); } return pte; @@ -366,7 +362,7 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo struct protection_domain *pdom = (struct protection_domain *)cookie; int ias = IOMMU_IN_ADDR_BIT_SIZE; - pgtable->pgd = alloc_pgtable_page(pdom->nid, GFP_ATOMIC); + pgtable->pgd = iommu_alloc_page_node(pdom->nid, GFP_ATOMIC); if (!pgtable->pgd) return NULL; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index e692217fcb28..922f5d42c18e 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -42,6 +42,7 @@ #include "amd_iommu.h" #include "../dma-iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) @@ -1728,7 +1729,7 @@ static void free_gcr3_tbl_level1(u64 *tbl) ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK); - free_page((unsigned long)ptr); + iommu_free_page(ptr); } } @@ -1761,7 +1762,7 @@ static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info) /* Free per device domain ID */ domain_id_free(gcr3_info->domid); - free_page((unsigned long)gcr3_info->gcr3_tbl); + iommu_free_page(gcr3_info->gcr3_tbl); gcr3_info->gcr3_tbl = NULL; } @@ -1796,7 +1797,7 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info, /* Allocate per device domain ID */ gcr3_info->domid = domain_id_alloc(); - gcr3_info->gcr3_tbl = alloc_pgtable_page(nid, GFP_ATOMIC); + gcr3_info->gcr3_tbl = iommu_alloc_page_node(nid, GFP_ATOMIC); if (gcr3_info->gcr3_tbl == NULL) { domain_id_free(gcr3_info->domid); return -ENOMEM; @@ -2245,7 +2246,7 @@ static void protection_domain_free(struct protection_domain *domain) free_io_pgtable_ops(&domain->iop.iop.ops); if (domain->iop.root) - free_page((unsigned long)domain->iop.root); + iommu_free_page(domain->iop.root); if (domain->id) domain_id_free(domain->id); @@ -2260,7 +2261,7 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL); if (mode != PAGE_MODE_NONE) { - pt_root = (void *)get_zeroed_page(GFP_KERNEL); + pt_root = iommu_alloc_page(GFP_KERNEL); if (!pt_root) return -ENOMEM; } From 9a3dd4c1ee7a183229163320eb38f15b17342f78 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 13 Apr 2024 00:25:15 +0000 Subject: [PATCH 04/11] iommu/io-pgtable-arm: use page allocation function provided by iommu-pages.h Convert iommu/io-pgtable-arm.c to use the new page allocation functions provided in iommu-pages.h. Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240413002522.1101315-5-pasha.tatashin@soleen.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index f7828a7aad41..3d23b924cec1 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -21,6 +21,7 @@ #include #include "io-pgtable-arm.h" +#include "iommu-pages.h" #define ARM_LPAE_MAX_ADDR_BITS 52 #define ARM_LPAE_S2_MAX_CONCAT_PAGES 16 @@ -198,14 +199,10 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, VM_BUG_ON((gfp & __GFP_HIGHMEM)); - if (cfg->alloc) { + if (cfg->alloc) pages = cfg->alloc(cookie, size, gfp); - } else { - struct page *p; - - p = alloc_pages_node(dev_to_node(dev), gfp | __GFP_ZERO, order); - pages = p ? page_address(p) : NULL; - } + else + pages = iommu_alloc_pages_node(dev_to_node(dev), gfp, order); if (!pages) return NULL; @@ -233,7 +230,7 @@ out_free: if (cfg->free) cfg->free(cookie, pages, size); else - free_pages((unsigned long)pages, order); + iommu_free_pages(pages, order); return NULL; } @@ -249,7 +246,7 @@ static void __arm_lpae_free_pages(void *pages, size_t size, if (cfg->free) cfg->free(cookie, pages, size); else - free_pages((unsigned long)pages, get_order(size)); + iommu_free_pages(pages, get_order(size)); } static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries, From 4a0b77e7c899d9a64b597ae8c9624a066e95f555 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 13 Apr 2024 00:25:16 +0000 Subject: [PATCH 05/11] iommu/io-pgtable-dart: use page allocation function provided by iommu-pages.h Convert iommu/io-pgtable-dart.c to use the new page allocation functions provided in iommu-pages.h., and remove unnecessary struct io_pgtable_cfg argument from __dart_alloc_pages(). Signed-off-by: Pasha Tatashin Reviewed-by: Janne Grunau Acked-by: David Rientjes Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240413002522.1101315-6-pasha.tatashin@soleen.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-dart.c | 37 +++++++++++++-------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/io-pgtable-dart.c b/drivers/iommu/io-pgtable-dart.c index 74b1ef2b96be..ad28031e1e93 100644 --- a/drivers/iommu/io-pgtable-dart.c +++ b/drivers/iommu/io-pgtable-dart.c @@ -23,6 +23,7 @@ #include #include +#include "iommu-pages.h" #define DART1_MAX_ADDR_BITS 36 @@ -106,18 +107,12 @@ static phys_addr_t iopte_to_paddr(dart_iopte pte, return paddr; } -static void *__dart_alloc_pages(size_t size, gfp_t gfp, - struct io_pgtable_cfg *cfg) +static void *__dart_alloc_pages(size_t size, gfp_t gfp) { int order = get_order(size); - struct page *p; VM_BUG_ON((gfp & __GFP_HIGHMEM)); - p = alloc_pages(gfp | __GFP_ZERO, order); - if (!p) - return NULL; - - return page_address(p); + return iommu_alloc_pages(gfp, order); } static int dart_init_pte(struct dart_io_pgtable *data, @@ -262,13 +257,13 @@ static int dart_map_pages(struct io_pgtable_ops *ops, unsigned long iova, /* no L2 table present */ if (!pte) { - cptep = __dart_alloc_pages(tblsz, gfp, cfg); + cptep = __dart_alloc_pages(tblsz, gfp); if (!cptep) return -ENOMEM; pte = dart_install_table(cptep, ptep, 0, data); if (pte) - free_pages((unsigned long)cptep, get_order(tblsz)); + iommu_free_pages(cptep, get_order(tblsz)); /* L2 table is present (now) */ pte = READ_ONCE(*ptep); @@ -419,8 +414,7 @@ apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) cfg->apple_dart_cfg.n_ttbrs = 1 << data->tbl_bits; for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i) { - data->pgd[i] = __dart_alloc_pages(DART_GRANULE(data), GFP_KERNEL, - cfg); + data->pgd[i] = __dart_alloc_pages(DART_GRANULE(data), GFP_KERNEL); if (!data->pgd[i]) goto out_free_data; cfg->apple_dart_cfg.ttbr[i] = virt_to_phys(data->pgd[i]); @@ -429,9 +423,10 @@ apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) return &data->iop; out_free_data: - while (--i >= 0) - free_pages((unsigned long)data->pgd[i], - get_order(DART_GRANULE(data))); + while (--i >= 0) { + iommu_free_pages(data->pgd[i], + get_order(DART_GRANULE(data))); + } kfree(data); return NULL; } @@ -439,6 +434,7 @@ out_free_data: static void apple_dart_free_pgtable(struct io_pgtable *iop) { struct dart_io_pgtable *data = io_pgtable_to_data(iop); + int order = get_order(DART_GRANULE(data)); dart_iopte *ptep, *end; int i; @@ -449,15 +445,10 @@ static void apple_dart_free_pgtable(struct io_pgtable *iop) while (ptep != end) { dart_iopte pte = *ptep++; - if (pte) { - unsigned long page = - (unsigned long)iopte_deref(pte, data); - - free_pages(page, get_order(DART_GRANULE(data))); - } + if (pte) + iommu_free_pages(iopte_deref(pte, data), order); } - free_pages((unsigned long)data->pgd[i], - get_order(DART_GRANULE(data))); + iommu_free_pages(data->pgd[i], order); } kfree(data); From fe046f1bf820ab721fbd49aba6e5db39329c9eaa Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 13 Apr 2024 00:25:17 +0000 Subject: [PATCH 06/11] iommu/exynos: use page allocation function provided by iommu-pages.h Convert iommu/exynos-iommu.c to use the new page allocation functions provided in iommu-pages.h. Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Acked-by: Marek Szyprowski Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240413002522.1101315-7-pasha.tatashin@soleen.com Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index d98c9161948a..c666ecab955d 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -22,6 +22,8 @@ #include #include +#include "iommu-pages.h" + typedef u32 sysmmu_iova_t; typedef u32 sysmmu_pte_t; static struct iommu_domain exynos_identity_domain; @@ -900,11 +902,11 @@ static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev) if (!domain) return NULL; - domain->pgtable = (sysmmu_pte_t *)__get_free_pages(GFP_KERNEL, 2); + domain->pgtable = iommu_alloc_pages(GFP_KERNEL, 2); if (!domain->pgtable) goto err_pgtable; - domain->lv2entcnt = (short *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + domain->lv2entcnt = iommu_alloc_pages(GFP_KERNEL, 1); if (!domain->lv2entcnt) goto err_counter; @@ -930,9 +932,9 @@ static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev) return &domain->domain; err_lv2ent: - free_pages((unsigned long)domain->lv2entcnt, 1); + iommu_free_pages(domain->lv2entcnt, 1); err_counter: - free_pages((unsigned long)domain->pgtable, 2); + iommu_free_pages(domain->pgtable, 2); err_pgtable: kfree(domain); return NULL; @@ -973,8 +975,8 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain) phys_to_virt(base)); } - free_pages((unsigned long)domain->pgtable, 2); - free_pages((unsigned long)domain->lv2entcnt, 1); + iommu_free_pages(domain->pgtable, 2); + iommu_free_pages(domain->lv2entcnt, 1); kfree(domain); } From 5404ccaaff357d2d8d40e1a879446da9c9da5879 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 13 Apr 2024 00:25:18 +0000 Subject: [PATCH 07/11] iommu/rockchip: use page allocation function provided by iommu-pages.h Convert iommu/rockchip-iommu.c to use the new page allocation functions provided in iommu-pages.h. Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240413002522.1101315-8-pasha.tatashin@soleen.com Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index da79d9f4cf63..4b369419b32c 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -26,6 +26,8 @@ #include #include +#include "iommu-pages.h" + /** MMU register offsets */ #define RK_MMU_DTE_ADDR 0x00 /* Directory table address */ #define RK_MMU_STATUS 0x04 @@ -727,14 +729,14 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, if (rk_dte_is_pt_valid(dte)) goto done; - page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | rk_ops->gfp_flags); + page_table = iommu_alloc_page(GFP_ATOMIC | rk_ops->gfp_flags); if (!page_table) return ERR_PTR(-ENOMEM); pt_dma = dma_map_single(dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE); if (dma_mapping_error(dma_dev, pt_dma)) { dev_err(dma_dev, "DMA mapping error while allocating page table\n"); - free_page((unsigned long)page_table); + iommu_free_page(page_table); return ERR_PTR(-ENOMEM); } @@ -1061,7 +1063,7 @@ static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev) * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries. * Allocate one 4 KiB page for each table. */ - rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | rk_ops->gfp_flags); + rk_domain->dt = iommu_alloc_page(GFP_KERNEL | rk_ops->gfp_flags); if (!rk_domain->dt) goto err_free_domain; @@ -1083,7 +1085,7 @@ static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev) return &rk_domain->domain; err_free_dt: - free_page((unsigned long)rk_domain->dt); + iommu_free_page(rk_domain->dt); err_free_domain: kfree(rk_domain); @@ -1104,13 +1106,13 @@ static void rk_iommu_domain_free(struct iommu_domain *domain) u32 *page_table = phys_to_virt(pt_phys); dma_unmap_single(dma_dev, pt_phys, SPAGE_SIZE, DMA_TO_DEVICE); - free_page((unsigned long)page_table); + iommu_free_page(page_table); } } dma_unmap_single(dma_dev, rk_domain->dt_dma, SPAGE_SIZE, DMA_TO_DEVICE); - free_page((unsigned long)rk_domain->dt); + iommu_free_page(rk_domain->dt); kfree(rk_domain); } From cb06b259e166e69eaa07b348202a6205346e2791 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 13 Apr 2024 00:25:19 +0000 Subject: [PATCH 08/11] iommu/sun50i: use page allocation function provided by iommu-pages.h Convert iommu/sun50i-iommu.c to use the new page allocation functions provided in iommu-pages.h. Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Acked-by: Jernej Skrabec Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240413002522.1101315-9-pasha.tatashin@soleen.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index decd52cba998..c519b991749d 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -26,6 +26,8 @@ #include #include +#include "iommu-pages.h" + #define IOMMU_RESET_REG 0x010 #define IOMMU_RESET_RELEASE_ALL 0xffffffff #define IOMMU_ENABLE_REG 0x020 @@ -679,8 +681,7 @@ sun50i_iommu_domain_alloc_paging(struct device *dev) if (!sun50i_domain) return NULL; - sun50i_domain->dt = (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(DT_SIZE)); + sun50i_domain->dt = iommu_alloc_pages(GFP_KERNEL, get_order(DT_SIZE)); if (!sun50i_domain->dt) goto err_free_domain; @@ -702,7 +703,7 @@ static void sun50i_iommu_domain_free(struct iommu_domain *domain) { struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); - free_pages((unsigned long)sun50i_domain->dt, get_order(DT_SIZE)); + iommu_free_pages(sun50i_domain->dt, get_order(DT_SIZE)); sun50i_domain->dt = NULL; kfree(sun50i_domain); From 8e8b4ac5b0ab759fffcd4d802ab3e084e3609191 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 13 Apr 2024 00:25:20 +0000 Subject: [PATCH 09/11] iommu/tegra-smmu: use page allocation function provided by iommu-pages.h Convert iommu/tegra-smmu.c to use the new page allocation functions provided in iommu-pages.h. Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Acked-by: Thierry Reding Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240413002522.1101315-10-pasha.tatashin@soleen.com Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 14e525bd0d9b..f86c7ae91814 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -19,6 +19,8 @@ #include #include +#include "iommu-pages.h" + struct tegra_smmu_group { struct list_head list; struct tegra_smmu *smmu; @@ -282,7 +284,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev) as->attr = SMMU_PD_READABLE | SMMU_PD_WRITABLE | SMMU_PD_NONSECURE; - as->pd = alloc_page(GFP_KERNEL | __GFP_DMA | __GFP_ZERO); + as->pd = __iommu_alloc_pages(GFP_KERNEL | __GFP_DMA, 0); if (!as->pd) { kfree(as); return NULL; @@ -290,7 +292,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev) as->count = kcalloc(SMMU_NUM_PDE, sizeof(u32), GFP_KERNEL); if (!as->count) { - __free_page(as->pd); + __iommu_free_pages(as->pd, 0); kfree(as); return NULL; } @@ -298,7 +300,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev) as->pts = kcalloc(SMMU_NUM_PDE, sizeof(*as->pts), GFP_KERNEL); if (!as->pts) { kfree(as->count); - __free_page(as->pd); + __iommu_free_pages(as->pd, 0); kfree(as); return NULL; } @@ -599,14 +601,14 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, dma = dma_map_page(smmu->dev, page, 0, SMMU_SIZE_PT, DMA_TO_DEVICE); if (dma_mapping_error(smmu->dev, dma)) { - __free_page(page); + __iommu_free_pages(page, 0); return NULL; } if (!smmu_dma_addr_valid(smmu, dma)) { dma_unmap_page(smmu->dev, dma, SMMU_SIZE_PT, DMA_TO_DEVICE); - __free_page(page); + __iommu_free_pages(page, 0); return NULL; } @@ -649,7 +651,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) tegra_smmu_set_pde(as, iova, 0); dma_unmap_page(smmu->dev, pte_dma, SMMU_SIZE_PT, DMA_TO_DEVICE); - __free_page(page); + __iommu_free_pages(page, 0); as->pts[pde] = NULL; } } @@ -688,7 +690,7 @@ static struct page *as_get_pde_page(struct tegra_smmu_as *as, if (gfpflags_allow_blocking(gfp)) spin_unlock_irqrestore(&as->lock, *flags); - page = alloc_page(gfp | __GFP_DMA | __GFP_ZERO); + page = __iommu_alloc_pages(gfp | __GFP_DMA, 0); if (gfpflags_allow_blocking(gfp)) spin_lock_irqsave(&as->lock, *flags); @@ -700,7 +702,7 @@ static struct page *as_get_pde_page(struct tegra_smmu_as *as, */ if (as->pts[pde]) { if (page) - __free_page(page); + __iommu_free_pages(page, 0); page = as->pts[pde]; } From bd3520a93a84cd8c3897283e5891a9106fcf5acc Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 13 Apr 2024 00:25:21 +0000 Subject: [PATCH 10/11] iommu: observability of the IOMMU allocations Add NR_IOMMU_PAGES into node_stat_item that counts number of pages that are allocated by the IOMMU subsystem. The allocations can be view per-node via: /sys/devices/system/node/nodeN/vmstat. For example: $ grep iommu /sys/devices/system/node/node*/vmstat /sys/devices/system/node/node0/vmstat:nr_iommu_pages 106025 /sys/devices/system/node/node1/vmstat:nr_iommu_pages 3464 The value is in page-count, therefore, in the above example the iommu allocations amount to ~428M. Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240413002522.1101315-11-pasha.tatashin@soleen.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-pages.h | 30 ++++++++++++++++++++++++++++++ include/linux/mmzone.h | 3 +++ mm/vmstat.c | 3 +++ 3 files changed, 36 insertions(+) diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h index 5a222d0ad25c..1264b0f6b6c3 100644 --- a/drivers/iommu/iommu-pages.h +++ b/drivers/iommu/iommu-pages.h @@ -20,6 +20,30 @@ * large, i.e. multiple gigabytes in size. */ +/** + * __iommu_alloc_account - account for newly allocated page. + * @page: head struct page of the page. + * @order: order of the page + */ +static inline void __iommu_alloc_account(struct page *page, int order) +{ + const long pgcnt = 1l << order; + + mod_node_page_state(page_pgdat(page), NR_IOMMU_PAGES, pgcnt); +} + +/** + * __iommu_free_account - account a page that is about to be freed. + * @page: head struct page of the page. + * @order: order of the page + */ +static inline void __iommu_free_account(struct page *page, int order) +{ + const long pgcnt = 1l << order; + + mod_node_page_state(page_pgdat(page), NR_IOMMU_PAGES, -pgcnt); +} + /** * __iommu_alloc_pages - allocate a zeroed page of a given order. * @gfp: buddy allocator flags @@ -35,6 +59,8 @@ static inline struct page *__iommu_alloc_pages(gfp_t gfp, int order) if (unlikely(!page)) return NULL; + __iommu_alloc_account(page, order); + return page; } @@ -48,6 +74,7 @@ static inline void __iommu_free_pages(struct page *page, int order) if (!page) return; + __iommu_free_account(page, order); __free_pages(page, order); } @@ -67,6 +94,8 @@ static inline void *iommu_alloc_pages_node(int nid, gfp_t gfp, int order) if (unlikely(!page)) return NULL; + __iommu_alloc_account(page, order); + return page_address(page); } @@ -147,6 +176,7 @@ static inline void iommu_put_pages_list(struct list_head *page) struct page *p = list_entry(page->prev, struct page, lru); list_del(&p->lru); + __iommu_free_account(p, 0); put_page(p); } } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c11b7cde81ef..be17195f6f86 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -206,6 +206,9 @@ enum node_stat_item { #endif NR_PAGETABLE, /* used for pagetables */ NR_SECONDARY_PAGETABLE, /* secondary pagetables, e.g. KVM pagetables */ +#ifdef CONFIG_IOMMU_SUPPORT + NR_IOMMU_PAGES, /* # of pages allocated by IOMMU */ +#endif #ifdef CONFIG_SWAP NR_SWAPCACHE, #endif diff --git a/mm/vmstat.c b/mm/vmstat.c index db79935e4a54..8507c497218b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1242,6 +1242,9 @@ const char * const vmstat_text[] = { #endif "nr_page_table_pages", "nr_sec_page_table_pages", +#ifdef CONFIG_IOMMU_SUPPORT + "nr_iommu_pages", +#endif #ifdef CONFIG_SWAP "nr_swapcached", #endif From 212c5c078d83d780cf2873ca931df135771e8bb7 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 13 Apr 2024 00:25:22 +0000 Subject: [PATCH 11/11] iommu: account IOMMU allocated memory In order to be able to limit the amount of memory that is allocated by IOMMU subsystem, the memory must be accounted. Account IOMMU as part of the secondary pagetables as it was discussed at LPC. The value of SecPageTables now contains mmeory allocation by IOMMU and KVM. There is a difference between GFP_ACCOUNT and what NR_IOMMU_PAGES shows. GFP_ACCOUNT is set only where it makes sense to charge to user processes, i.e. IOMMU Page Tables, but there more IOMMU shared data that should not really be charged to a specific process. Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240413002522.1101315-12-pasha.tatashin@soleen.com Signed-off-by: Joerg Roedel --- Documentation/admin-guide/cgroup-v2.rst | 2 +- Documentation/filesystems/proc.rst | 4 ++-- drivers/iommu/iommu-pages.h | 2 ++ include/linux/mmzone.h | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 17e6e9565156..15f80fea8df7 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1432,7 +1432,7 @@ PAGE_SIZE multiple when read back. sec_pagetables Amount of memory allocated for secondary page tables, this currently includes KVM mmu allocations on x86 - and arm64. + and arm64 and IOMMU page tables. percpu (npn) Amount of memory used for storing per-cpu kernel diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index c6a6b9df2104..707e39280a9a 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -1110,8 +1110,8 @@ KernelStack PageTables Memory consumed by userspace page tables SecPageTables - Memory consumed by secondary page tables, this currently - currently includes KVM mmu allocations on x86 and arm64. + Memory consumed by secondary page tables, this currently includes + KVM mmu and IOMMU allocations on x86 and arm64. NFS_Unstable Always zero. Previous counted pages which had been written to the server, but has not been committed to stable storage. diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h index 1264b0f6b6c3..82ebf0033081 100644 --- a/drivers/iommu/iommu-pages.h +++ b/drivers/iommu/iommu-pages.h @@ -30,6 +30,7 @@ static inline void __iommu_alloc_account(struct page *page, int order) const long pgcnt = 1l << order; mod_node_page_state(page_pgdat(page), NR_IOMMU_PAGES, pgcnt); + mod_lruvec_page_state(page, NR_SECONDARY_PAGETABLE, pgcnt); } /** @@ -42,6 +43,7 @@ static inline void __iommu_free_account(struct page *page, int order) const long pgcnt = 1l << order; mod_node_page_state(page_pgdat(page), NR_IOMMU_PAGES, -pgcnt); + mod_lruvec_page_state(page, NR_SECONDARY_PAGETABLE, -pgcnt); } /** diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index be17195f6f86..8f9c9590a42c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -205,7 +205,7 @@ enum node_stat_item { NR_KERNEL_SCS_KB, /* measured in KiB */ #endif NR_PAGETABLE, /* used for pagetables */ - NR_SECONDARY_PAGETABLE, /* secondary pagetables, e.g. KVM pagetables */ + NR_SECONDARY_PAGETABLE, /* secondary pagetables, KVM & IOMMU */ #ifdef CONFIG_IOMMU_SUPPORT NR_IOMMU_PAGES, /* # of pages allocated by IOMMU */ #endif