From 7c53dfbdb024915f23f03f972b33744309d4608b Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 28 Oct 2024 14:13:28 +0000 Subject: [PATCH] mm: add PTE_MARKER_GUARD PTE marker Add a new PTE marker that results in any access causing the accessing process to segfault. This is preferable to PTE_MARKER_POISONED, which results in the same handling as hardware poisoned memory, and is thus undesirable for cases where we simply wish to 'soft' poison a range. This is in preparation for implementing the ability to specify guard pages at the page table level, i.e. ranges that, when accessed, should cause process termination. Additionally, rename zap_drop_file_uffd_wp() to zap_drop_markers() - the function checks the ZAP_FLAG_DROP_MARKER flag so naming it for this single purpose was simply incorrect. We then reuse the same logic to determine whether a zap should clear a guard entry - this should only be performed on teardown and never on MADV_DONTNEED or MADV_FREE. We additionally add a WARN_ON_ONCE() in hugetlb logic should a guard marker be encountered there, as we explicitly do not support this operation and this should not occur. Link: https://lkml.kernel.org/r/f47f3d5acca2dcf9bbf655b6d33f3dc713e4a4a0.1730123433.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Suggested-by: Vlastimil Babka Suggested-by: Jann Horn Suggested-by: David Hildenbrand Cc: Arnd Bergmann Cc: Christian Brauner Cc: Christoph Hellwig Cc: Chris Zankel Cc: Helge Deller Cc: James E.J. Bottomley Cc: Jeff Xu Cc: John Hubbard Cc: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Matt Turner Cc: Max Filippov Cc: Muchun Song Cc: Paul E. McKenney Cc: Richard Henderson Cc: Shuah Khan Cc: Shuah Khan Cc: Sidhartha Kumar Cc: Suren Baghdasaryan Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 2 +- include/linux/swapops.h | 24 +++++++++++++++++++++++- mm/hugetlb.c | 4 ++++ mm/memory.c | 18 +++++++++++++++--- mm/mprotect.c | 6 ++++-- 5 files changed, 47 insertions(+), 7 deletions(-) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 355cf46a01a6..1b6a917fffa4 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -544,7 +544,7 @@ static inline pte_marker copy_pte_marker( { pte_marker srcm = pte_marker_get(entry); /* Always copy error entries. */ - pte_marker dstm = srcm & PTE_MARKER_POISONED; + pte_marker dstm = srcm & (PTE_MARKER_POISONED | PTE_MARKER_GUARD); /* Only copy PTE markers if UFFD register matches. */ if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma)) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index cb468e418ea1..96f26e29fefe 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -426,9 +426,19 @@ typedef unsigned long pte_marker; * "Poisoned" here is meant in the very general sense of "future accesses are * invalid", instead of referring very specifically to hardware memory errors. * This marker is meant to represent any of various different causes of this. + * + * Note that, when encountered by the faulting logic, PTEs with this marker will + * result in VM_FAULT_HWPOISON and thus regardless trigger hardware memory error + * logic. */ #define PTE_MARKER_POISONED BIT(1) -#define PTE_MARKER_MASK (BIT(2) - 1) +/* + * Indicates that, on fault, this PTE will case a SIGSEGV signal to be + * sent. This means guard markers behave in effect as if the region were mapped + * PROT_NONE, rather than if they were a memory hole or equivalent. + */ +#define PTE_MARKER_GUARD BIT(2) +#define PTE_MARKER_MASK (BIT(3) - 1) static inline swp_entry_t make_pte_marker_entry(pte_marker marker) { @@ -464,6 +474,18 @@ static inline int is_poisoned_swp_entry(swp_entry_t entry) { return is_pte_marker_entry(entry) && (pte_marker_get(entry) & PTE_MARKER_POISONED); + +} + +static inline swp_entry_t make_guard_swp_entry(void) +{ + return make_pte_marker_entry(PTE_MARKER_GUARD); +} + +static inline int is_guard_swp_entry(swp_entry_t entry) +{ + return is_pte_marker_entry(entry) && + (pte_marker_get(entry) & PTE_MARKER_GUARD); } /* diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 906294ac85dc..2c8c5da0f5d3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6353,6 +6353,10 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, ret = VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(hstate_index(h)); goto out_mutex; + } else if (WARN_ON_ONCE(marker & PTE_MARKER_GUARD)) { + /* This isn't supported in hugetlb. */ + ret = VM_FAULT_SIGSEGV; + goto out_mutex; } } diff --git a/mm/memory.c b/mm/memory.c index 2d32023d4eb8..75c2dfd04f72 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1455,7 +1455,7 @@ static inline bool should_zap_folio(struct zap_details *details, return !folio_test_anon(folio); } -static inline bool zap_drop_file_uffd_wp(struct zap_details *details) +static inline bool zap_drop_markers(struct zap_details *details) { if (!details) return false; @@ -1476,7 +1476,7 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma, if (vma_is_anonymous(vma)) return; - if (zap_drop_file_uffd_wp(details)) + if (zap_drop_markers(details)) return; for (;;) { @@ -1671,7 +1671,15 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, * drop the marker if explicitly requested. */ if (!vma_is_anonymous(vma) && - !zap_drop_file_uffd_wp(details)) + !zap_drop_markers(details)) + continue; + } else if (is_guard_swp_entry(entry)) { + /* + * Ordinary zapping should not remove guard PTE + * markers. Only do so if we should remove PTE markers + * in general. + */ + if (!zap_drop_markers(details)) continue; } else if (is_hwpoison_entry(entry) || is_poisoned_swp_entry(entry)) { @@ -4003,6 +4011,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf) if (marker & PTE_MARKER_POISONED) return VM_FAULT_HWPOISON; + /* Hitting a guard page is always a fatal condition. */ + if (marker & PTE_MARKER_GUARD) + return VM_FAULT_SIGSEGV; + if (pte_marker_entry_uffd_wp(entry)) return pte_marker_handle_uffd_wp(vmf); diff --git a/mm/mprotect.c b/mm/mprotect.c index 6f450af3252e..516b1d847e2c 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -236,9 +236,11 @@ static long change_pte_range(struct mmu_gather *tlb, } else if (is_pte_marker_entry(entry)) { /* * Ignore error swap entries unconditionally, - * because any access should sigbus anyway. + * because any access should sigbus/sigsegv + * anyway. */ - if (is_poisoned_swp_entry(entry)) + if (is_poisoned_swp_entry(entry) || + is_guard_swp_entry(entry)) continue; /* * If this is uffd-wp pte marker and we'd like