mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 10:43:43 +00:00
hugetlb: convert hugetlb_fault() to use struct vm_fault
Patch series "Hugetlb fault path to use struct vm_fault", v2. This patchset converts the hugetlb fault path to use struct vm_fault. This helps make the code more readable, and alleviates the stack by allowing us to consolidate many fault-related variables into an individual pointer. This patch (of 3): Now that hugetlb_fault() has a vm_fault available for fault tracking, use it throughout. This cleans up the code by removing 2 variables, and prepares hugetlb_fault() to take in a struct vm_fault argument. Link: https://lkml.kernel.org/r/20240401202651.31440-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20240401202651.31440-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> Reviewed-by: Muchun Song <muchun.song@linux.dev> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
7edea4c6fd
commit
9b42fa1619
84
mm/hugetlb.c
84
mm/hugetlb.c
@ -6427,8 +6427,6 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx)
|
||||
vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned int flags)
|
||||
{
|
||||
pte_t *ptep, entry;
|
||||
spinlock_t *ptl;
|
||||
vm_fault_t ret;
|
||||
u32 hash;
|
||||
struct folio *folio = NULL;
|
||||
@ -6436,13 +6434,13 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
struct hstate *h = hstate_vma(vma);
|
||||
struct address_space *mapping;
|
||||
int need_wait_lock = 0;
|
||||
unsigned long haddr = address & huge_page_mask(h);
|
||||
struct vm_fault vmf = {
|
||||
.vma = vma,
|
||||
.address = haddr,
|
||||
.address = address & huge_page_mask(h),
|
||||
.real_address = address,
|
||||
.flags = flags,
|
||||
.pgoff = vma_hugecache_offset(h, vma, haddr),
|
||||
.pgoff = vma_hugecache_offset(h, vma,
|
||||
address & huge_page_mask(h)),
|
||||
/* TODO: Track hugetlb faults using vm_fault */
|
||||
|
||||
/*
|
||||
@ -6462,22 +6460,22 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
|
||||
/*
|
||||
* Acquire vma lock before calling huge_pte_alloc and hold
|
||||
* until finished with ptep. This prevents huge_pmd_unshare from
|
||||
* being called elsewhere and making the ptep no longer valid.
|
||||
* until finished with vmf.pte. This prevents huge_pmd_unshare from
|
||||
* being called elsewhere and making the vmf.pte no longer valid.
|
||||
*/
|
||||
hugetlb_vma_lock_read(vma);
|
||||
ptep = huge_pte_alloc(mm, vma, haddr, huge_page_size(h));
|
||||
if (!ptep) {
|
||||
vmf.pte = huge_pte_alloc(mm, vma, vmf.address, huge_page_size(h));
|
||||
if (!vmf.pte) {
|
||||
hugetlb_vma_unlock_read(vma);
|
||||
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
|
||||
return VM_FAULT_OOM;
|
||||
}
|
||||
|
||||
entry = huge_ptep_get(ptep);
|
||||
if (huge_pte_none_mostly(entry)) {
|
||||
if (is_pte_marker(entry)) {
|
||||
vmf.orig_pte = huge_ptep_get(vmf.pte);
|
||||
if (huge_pte_none_mostly(vmf.orig_pte)) {
|
||||
if (is_pte_marker(vmf.orig_pte)) {
|
||||
pte_marker marker =
|
||||
pte_marker_get(pte_to_swp_entry(entry));
|
||||
pte_marker_get(pte_to_swp_entry(vmf.orig_pte));
|
||||
|
||||
if (marker & PTE_MARKER_POISONED) {
|
||||
ret = VM_FAULT_HWPOISON_LARGE;
|
||||
@ -6492,20 +6490,20 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
* mutex internally, which make us return immediately.
|
||||
*/
|
||||
return hugetlb_no_page(mm, vma, mapping, vmf.pgoff, address,
|
||||
ptep, entry, flags, &vmf);
|
||||
vmf.pte, vmf.orig_pte, flags, &vmf);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
/*
|
||||
* entry could be a migration/hwpoison entry at this point, so this
|
||||
* check prevents the kernel from going below assuming that we have
|
||||
* an active hugepage in pagecache. This goto expects the 2nd page
|
||||
* fault, and is_hugetlb_entry_(migration|hwpoisoned) check will
|
||||
* properly handle it.
|
||||
* vmf.orig_pte could be a migration/hwpoison vmf.orig_pte at this
|
||||
* point, so this check prevents the kernel from going below assuming
|
||||
* that we have an active hugepage in pagecache. This goto expects
|
||||
* the 2nd page fault, and is_hugetlb_entry_(migration|hwpoisoned)
|
||||
* check will properly handle it.
|
||||
*/
|
||||
if (!pte_present(entry)) {
|
||||
if (unlikely(is_hugetlb_entry_migration(entry))) {
|
||||
if (!pte_present(vmf.orig_pte)) {
|
||||
if (unlikely(is_hugetlb_entry_migration(vmf.orig_pte))) {
|
||||
/*
|
||||
* Release the hugetlb fault lock now, but retain
|
||||
* the vma lock, because it is needed to guard the
|
||||
@ -6514,9 +6512,9 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
* be released there.
|
||||
*/
|
||||
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
|
||||
migration_entry_wait_huge(vma, ptep);
|
||||
migration_entry_wait_huge(vma, vmf.pte);
|
||||
return 0;
|
||||
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
|
||||
} else if (unlikely(is_hugetlb_entry_hwpoisoned(vmf.orig_pte)))
|
||||
ret = VM_FAULT_HWPOISON_LARGE |
|
||||
VM_FAULT_SET_HINDEX(hstate_index(h));
|
||||
goto out_mutex;
|
||||
@ -6530,13 +6528,13 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
* determine if a reservation has been consumed.
|
||||
*/
|
||||
if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
|
||||
!(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(entry)) {
|
||||
if (vma_needs_reservation(h, vma, haddr) < 0) {
|
||||
!(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(vmf.orig_pte)) {
|
||||
if (vma_needs_reservation(h, vma, vmf.address) < 0) {
|
||||
ret = VM_FAULT_OOM;
|
||||
goto out_mutex;
|
||||
}
|
||||
/* Just decrements count, does not deallocate */
|
||||
vma_end_reservation(h, vma, haddr);
|
||||
vma_end_reservation(h, vma, vmf.address);
|
||||
|
||||
pagecache_folio = filemap_lock_hugetlb_folio(h, mapping,
|
||||
vmf.pgoff);
|
||||
@ -6544,17 +6542,17 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
pagecache_folio = NULL;
|
||||
}
|
||||
|
||||
ptl = huge_pte_lock(h, mm, ptep);
|
||||
vmf.ptl = huge_pte_lock(h, mm, vmf.pte);
|
||||
|
||||
/* Check for a racing update before calling hugetlb_wp() */
|
||||
if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
|
||||
if (unlikely(!pte_same(vmf.orig_pte, huge_ptep_get(vmf.pte))))
|
||||
goto out_ptl;
|
||||
|
||||
/* Handle userfault-wp first, before trying to lock more pages */
|
||||
if (userfaultfd_wp(vma) && huge_pte_uffd_wp(huge_ptep_get(ptep)) &&
|
||||
(flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) {
|
||||
if (userfaultfd_wp(vma) && huge_pte_uffd_wp(huge_ptep_get(vmf.pte)) &&
|
||||
(flags & FAULT_FLAG_WRITE) && !huge_pte_write(vmf.orig_pte)) {
|
||||
if (!userfaultfd_wp_async(vma)) {
|
||||
spin_unlock(ptl);
|
||||
spin_unlock(vmf.ptl);
|
||||
if (pagecache_folio) {
|
||||
folio_unlock(pagecache_folio);
|
||||
folio_put(pagecache_folio);
|
||||
@ -6564,18 +6562,18 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
return handle_userfault(&vmf, VM_UFFD_WP);
|
||||
}
|
||||
|
||||
entry = huge_pte_clear_uffd_wp(entry);
|
||||
set_huge_pte_at(mm, haddr, ptep, entry,
|
||||
vmf.orig_pte = huge_pte_clear_uffd_wp(vmf.orig_pte);
|
||||
set_huge_pte_at(mm, vmf.address, vmf.pte, vmf.orig_pte,
|
||||
huge_page_size(hstate_vma(vma)));
|
||||
/* Fallthrough to CoW */
|
||||
}
|
||||
|
||||
/*
|
||||
* hugetlb_wp() requires page locks of pte_page(entry) and
|
||||
* hugetlb_wp() requires page locks of pte_page(vmf.orig_pte) and
|
||||
* pagecache_folio, so here we need take the former one
|
||||
* when folio != pagecache_folio or !pagecache_folio.
|
||||
*/
|
||||
folio = page_folio(pte_page(entry));
|
||||
folio = page_folio(pte_page(vmf.orig_pte));
|
||||
if (folio != pagecache_folio)
|
||||
if (!folio_trylock(folio)) {
|
||||
need_wait_lock = 1;
|
||||
@ -6585,24 +6583,24 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
folio_get(folio);
|
||||
|
||||
if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
|
||||
if (!huge_pte_write(entry)) {
|
||||
ret = hugetlb_wp(mm, vma, address, ptep, flags,
|
||||
pagecache_folio, ptl, &vmf);
|
||||
if (!huge_pte_write(vmf.orig_pte)) {
|
||||
ret = hugetlb_wp(mm, vma, address, vmf.pte, flags,
|
||||
pagecache_folio, vmf.ptl, &vmf);
|
||||
goto out_put_page;
|
||||
} else if (likely(flags & FAULT_FLAG_WRITE)) {
|
||||
entry = huge_pte_mkdirty(entry);
|
||||
vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte);
|
||||
}
|
||||
}
|
||||
entry = pte_mkyoung(entry);
|
||||
if (huge_ptep_set_access_flags(vma, haddr, ptep, entry,
|
||||
vmf.orig_pte = pte_mkyoung(vmf.orig_pte);
|
||||
if (huge_ptep_set_access_flags(vma, vmf.address, vmf.pte, vmf.orig_pte,
|
||||
flags & FAULT_FLAG_WRITE))
|
||||
update_mmu_cache(vma, haddr, ptep);
|
||||
update_mmu_cache(vma, vmf.address, vmf.pte);
|
||||
out_put_page:
|
||||
if (folio != pagecache_folio)
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
out_ptl:
|
||||
spin_unlock(ptl);
|
||||
spin_unlock(vmf.ptl);
|
||||
|
||||
if (pagecache_folio) {
|
||||
folio_unlock(pagecache_folio);
|
||||
|
Loading…
Reference in New Issue
Block a user