diff options
5 files changed, 555 insertions, 0 deletions
diff --git a/queue-5.10/hugetlb-unshare-some-pmds-when-splitting-vmas.patch b/queue-5.10/hugetlb-unshare-some-pmds-when-splitting-vmas.patch new file mode 100644 index 00000000000..de3eb52642e --- /dev/null +++ b/queue-5.10/hugetlb-unshare-some-pmds-when-splitting-vmas.patch @@ -0,0 +1,131 @@ +From b30c14cd61025eeea2f2e8569606cd167ba9ad2d Mon Sep 17 00:00:00 2001 +From: James Houghton <jthoughton@google.com> +Date: Wed, 4 Jan 2023 23:19:10 +0000 +Subject: hugetlb: unshare some PMDs when splitting VMAs + +From: James Houghton <jthoughton@google.com> + +commit b30c14cd61025eeea2f2e8569606cd167ba9ad2d upstream. + +PMD sharing can only be done in PUD_SIZE-aligned pieces of VMAs; however, +it is possible that HugeTLB VMAs are split without unsharing the PMDs +first. + +Without this fix, it is possible to hit the uffd-wp-related WARN_ON_ONCE +in hugetlb_change_protection [1]. The key there is that +hugetlb_unshare_all_pmds will not attempt to unshare PMDs in +non-PUD_SIZE-aligned sections of the VMA. + +It might seem ideal to unshare in hugetlb_vm_op_open, but we need to +unshare in both the new and old VMAs, so unsharing in hugetlb_vm_op_split +seems natural. + +[1]: https://lore.kernel.org/linux-mm/CADrL8HVeOkj0QH5VZZbRzybNE8CG-tEGFshnA+bG9nMgcWtBSg@mail.gmail.com/ + +Link: https://lkml.kernel.org/r/20230104231910.1464197-1-jthoughton@google.com +Fixes: 6dfeaff93be1 ("hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp") +Signed-off-by: James Houghton <jthoughton@google.com> +Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> +Acked-by: Peter Xu <peterx@redhat.com> +Cc: Axel Rasmussen <axelrasmussen@google.com> +Cc: Muchun Song <songmuchun@bytedance.com> +Cc: <stable@vger.kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +[backport notes: I believe the "Fixes" tag is somewhat wrong - kernels +before that commit already had an adjust_range_if_pmd_sharing_possible() +that assumes that shared PMDs can't straddle page table boundaries. +huge_pmd_unshare() takes different parameter type] +Signed-off-by: Jann Horn <jannh@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + mm/hugetlb.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 65 insertions(+) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -96,6 +96,8 @@ static inline void ClearPageHugeFreed(st + + /* Forward declaration */ + static int hugetlb_acct_memory(struct hstate *h, long delta); ++static void hugetlb_unshare_pmds(struct vm_area_struct *vma, ++ unsigned long start, unsigned long end); + + static inline void unlock_or_release_subpool(struct hugepage_subpool *spool) + { +@@ -3697,6 +3699,25 @@ static int hugetlb_vm_op_split(struct vm + { + if (addr & ~(huge_page_mask(hstate_vma(vma)))) + return -EINVAL; ++ ++ /* ++ * PMD sharing is only possible for PUD_SIZE-aligned address ranges ++ * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this ++ * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. ++ */ ++ if (addr & ~PUD_MASK) { ++ /* ++ * hugetlb_vm_op_split is called right before we attempt to ++ * split the VMA. We will need to unshare PMDs in the old and ++ * new VMAs, so let's unshare before we split. ++ */ ++ unsigned long floor = addr & PUD_MASK; ++ unsigned long ceil = floor + PUD_SIZE; ++ ++ if (floor >= vma->vm_start && ceil <= vma->vm_end) ++ hugetlb_unshare_pmds(vma, floor, ceil); ++ } ++ + return 0; + } + +@@ -5706,6 +5727,50 @@ void move_hugetlb_state(struct page *old + } + } + ++static void hugetlb_unshare_pmds(struct vm_area_struct *vma, ++ unsigned long start, ++ unsigned long end) ++{ ++ struct hstate *h = hstate_vma(vma); ++ unsigned long sz = huge_page_size(h); ++ struct mm_struct *mm = vma->vm_mm; ++ struct mmu_notifier_range range; ++ unsigned long address; ++ spinlock_t *ptl; ++ pte_t *ptep; ++ ++ if (!(vma->vm_flags & VM_MAYSHARE)) ++ return; ++ ++ if (start >= end) ++ return; ++ ++ flush_cache_range(vma, start, end); ++ /* ++ * No need to call adjust_range_if_pmd_sharing_possible(), because ++ * we have already done the PUD_SIZE alignment. ++ */ ++ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, ++ start, end); ++ mmu_notifier_invalidate_range_start(&range); ++ i_mmap_lock_write(vma->vm_file->f_mapping); ++ for (address = start; address < end; address += PUD_SIZE) { ++ ptep = huge_pte_offset(mm, address, sz); ++ if (!ptep) ++ continue; ++ ptl = huge_pte_lock(h, mm, ptep); ++ huge_pmd_unshare(mm, vma, &address, ptep); ++ spin_unlock(ptl); ++ } ++ flush_hugetlb_tlb_range(vma, start, end); ++ i_mmap_unlock_write(vma->vm_file->f_mapping); ++ /* ++ * No need to call mmu_notifier_invalidate_range(), see ++ * Documentation/mm/mmu_notifier.rst. ++ */ ++ mmu_notifier_invalidate_range_end(&range); ++} ++ + #ifdef CONFIG_CMA + static bool cma_reserve_called __initdata; + diff --git a/queue-5.10/mm-hugetlb-fix-huge_pmd_unshare-vs-gup-fast-race.patch b/queue-5.10/mm-hugetlb-fix-huge_pmd_unshare-vs-gup-fast-race.patch new file mode 100644 index 00000000000..d54d5bd1c81 --- /dev/null +++ b/queue-5.10/mm-hugetlb-fix-huge_pmd_unshare-vs-gup-fast-race.patch @@ -0,0 +1,55 @@ +From 1013af4f585fccc4d3e5c5824d174de2257f7d6d Mon Sep 17 00:00:00 2001 +From: Jann Horn <jannh@google.com> +Date: Tue, 27 May 2025 23:23:54 +0200 +Subject: mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race + +From: Jann Horn <jannh@google.com> + +commit 1013af4f585fccc4d3e5c5824d174de2257f7d6d upstream. + +huge_pmd_unshare() drops a reference on a page table that may have +previously been shared across processes, potentially turning it into a +normal page table used in another process in which unrelated VMAs can +afterwards be installed. + +If this happens in the middle of a concurrent gup_fast(), gup_fast() could +end up walking the page tables of another process. While I don't see any +way in which that immediately leads to kernel memory corruption, it is +really weird and unexpected. + +Fix it with an explicit broadcast IPI through tlb_remove_table_sync_one(), +just like we do in khugepaged when removing page tables for a THP +collapse. + +Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-2-1329349bad1a@google.com +Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-2-f4136f5ec58a@google.com +Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page") +Signed-off-by: Jann Horn <jannh@google.com> +Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +Cc: Liam Howlett <liam.howlett@oracle.com> +Cc: Muchun Song <muchun.song@linux.dev> +Cc: Oscar Salvador <osalvador@suse.de> +Cc: Vlastimil Babka <vbabka@suse.cz> +Cc: <stable@vger.kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + mm/hugetlb.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -5488,6 +5488,13 @@ int huge_pmd_unshare(struct mm_struct *m + return 0; + + pud_clear(pud); ++ /* ++ * Once our caller drops the rmap lock, some other process might be ++ * using this page table as a normal, non-hugetlb page table. ++ * Wait for pending gup_fast() in other threads to finish before letting ++ * that happen. ++ */ ++ tlb_remove_table_sync_one(); + atomic_dec(&virt_to_page(ptep)->pt_share_count); + mm_dec_nr_pmds(mm); + /* diff --git a/queue-5.10/mm-hugetlb-independent-pmd-page-table-shared-count.patch b/queue-5.10/mm-hugetlb-independent-pmd-page-table-shared-count.patch new file mode 100644 index 00000000000..3beeede6275 --- /dev/null +++ b/queue-5.10/mm-hugetlb-independent-pmd-page-table-shared-count.patch @@ -0,0 +1,162 @@ +From 59d9094df3d79443937add8700b2ef1a866b1081 Mon Sep 17 00:00:00 2001 +From: Liu Shixin <liushixin2@huawei.com> +Date: Mon, 16 Dec 2024 15:11:47 +0800 +Subject: mm: hugetlb: independent PMD page table shared count + +From: Liu Shixin <liushixin2@huawei.com> + +commit 59d9094df3d79443937add8700b2ef1a866b1081 upstream. + +The folio refcount may be increased unexpectly through try_get_folio() by +caller such as split_huge_pages. In huge_pmd_unshare(), we use refcount +to check whether a pmd page table is shared. The check is incorrect if +the refcount is increased by the above caller, and this can cause the page +table leaked: + + BUG: Bad page state in process sh pfn:109324 + page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x66 pfn:0x109324 + flags: 0x17ffff800000000(node=0|zone=2|lastcpupid=0xfffff) + page_type: f2(table) + raw: 017ffff800000000 0000000000000000 0000000000000000 0000000000000000 + raw: 0000000000000066 0000000000000000 00000000f2000000 0000000000000000 + page dumped because: nonzero mapcount + ... + CPU: 31 UID: 0 PID: 7515 Comm: sh Kdump: loaded Tainted: G B 6.13.0-rc2master+ #7 + Tainted: [B]=BAD_PAGE + Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 + Call trace: + show_stack+0x20/0x38 (C) + dump_stack_lvl+0x80/0xf8 + dump_stack+0x18/0x28 + bad_page+0x8c/0x130 + free_page_is_bad_report+0xa4/0xb0 + free_unref_page+0x3cc/0x620 + __folio_put+0xf4/0x158 + split_huge_pages_all+0x1e0/0x3e8 + split_huge_pages_write+0x25c/0x2d8 + full_proxy_write+0x64/0xd8 + vfs_write+0xcc/0x280 + ksys_write+0x70/0x110 + __arm64_sys_write+0x24/0x38 + invoke_syscall+0x50/0x120 + el0_svc_common.constprop.0+0xc8/0xf0 + do_el0_svc+0x24/0x38 + el0_svc+0x34/0x128 + el0t_64_sync_handler+0xc8/0xd0 + el0t_64_sync+0x190/0x198 + +The issue may be triggered by damon, offline_page, page_idle, etc, which +will increase the refcount of page table. + +1. The page table itself will be discarded after reporting the + "nonzero mapcount". + +2. The HugeTLB page mapped by the page table miss freeing since we + treat the page table as shared and a shared page table will not be + unmapped. + +Fix it by introducing independent PMD page table shared count. As +described by comment, pt_index/pt_mm/pt_frag_refcount are used for s390 +gmap, x86 pgds and powerpc, pt_share_count is used for x86/arm64/riscv +pmds, so we can reuse the field as pt_share_count. + +Link: https://lkml.kernel.org/r/20241216071147.3984217-1-liushixin2@huawei.com +Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page") +Signed-off-by: Liu Shixin <liushixin2@huawei.com> +Cc: Kefeng Wang <wangkefeng.wang@huawei.com> +Cc: Ken Chen <kenneth.w.chen@intel.com> +Cc: Muchun Song <muchun.song@linux.dev> +Cc: Nanyong Sun <sunnanyong@huawei.com> +Cc: Jane Chu <jane.chu@oracle.com> +Cc: <stable@vger.kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +[backport note: struct ptdesc did not exist yet, stuff it equivalently +into struct page instead] +Signed-off-by: Jann Horn <jannh@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/mm.h | 3 +++ + include/linux/mm_types.h | 3 +++ + mm/hugetlb.c | 18 ++++++++---------- + 3 files changed, 14 insertions(+), 10 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -2318,6 +2318,9 @@ static inline bool pgtable_pmd_page_ctor + if (!pmd_ptlock_init(page)) + return false; + __SetPageTable(page); ++#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE ++ atomic_set(&page->pt_share_count, 0); ++#endif + inc_zone_page_state(page, NR_PAGETABLE); + return true; + } +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -151,6 +151,9 @@ struct page { + union { + struct mm_struct *pt_mm; /* x86 pgds only */ + atomic_t pt_frag_refcount; /* powerpc */ ++#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE ++ atomic_t pt_share_count; ++#endif + }; + #if ALLOC_SPLIT_PTLOCKS + spinlock_t *ptl; +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -5442,7 +5442,7 @@ pte_t *huge_pmd_share(struct mm_struct * + spte = huge_pte_offset(svma->vm_mm, saddr, + vma_mmu_pagesize(svma)); + if (spte) { +- get_page(virt_to_page(spte)); ++ atomic_inc(&virt_to_page(spte)->pt_share_count); + break; + } + } +@@ -5457,7 +5457,7 @@ pte_t *huge_pmd_share(struct mm_struct * + (pmd_t *)((unsigned long)spte & PAGE_MASK)); + mm_inc_nr_pmds(mm); + } else { +- put_page(virt_to_page(spte)); ++ atomic_dec(&virt_to_page(spte)->pt_share_count); + } + spin_unlock(ptl); + out: +@@ -5468,11 +5468,7 @@ out: + /* + * unmap huge page backed by shared pte. + * +- * Hugetlb pte page is ref counted at the time of mapping. If pte is shared +- * indicated by page_count > 1, unmap is achieved by clearing pud and +- * decrementing the ref count. If count == 1, the pte page is not shared. +- * +- * Called with page table lock held and i_mmap_rwsem held in write mode. ++ * Called with page table lock held. + * + * returns: 1 successfully unmapped a shared pte page + * 0 the underlying pte page is not shared, or it is the last user +@@ -5480,17 +5476,19 @@ out: + int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) + { ++ unsigned long sz = huge_page_size(hstate_vma(vma)); + pgd_t *pgd = pgd_offset(mm, *addr); + p4d_t *p4d = p4d_offset(pgd, *addr); + pud_t *pud = pud_offset(p4d, *addr); + + i_mmap_assert_write_locked(vma->vm_file->f_mapping); +- BUG_ON(page_count(virt_to_page(ptep)) == 0); +- if (page_count(virt_to_page(ptep)) == 1) ++ if (sz != PMD_SIZE) ++ return 0; ++ if (!atomic_read(&virt_to_page(ptep)->pt_share_count)) + return 0; + + pud_clear(pud); +- put_page(virt_to_page(ptep)); ++ atomic_dec(&virt_to_page(ptep)->pt_share_count); + mm_dec_nr_pmds(mm); + /* + * This update of passed address optimizes loops sequentially diff --git a/queue-5.10/mm-hugetlb-unshare-page-tables-during-vma-split-not-before.patch b/queue-5.10/mm-hugetlb-unshare-page-tables-during-vma-split-not-before.patch new file mode 100644 index 00000000000..9a7ad113835 --- /dev/null +++ b/queue-5.10/mm-hugetlb-unshare-page-tables-during-vma-split-not-before.patch @@ -0,0 +1,203 @@ +From 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0 Mon Sep 17 00:00:00 2001 +From: Jann Horn <jannh@google.com> +Date: Tue, 27 May 2025 23:23:53 +0200 +Subject: mm/hugetlb: unshare page tables during VMA split, not before + +From: Jann Horn <jannh@google.com> + +commit 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0 upstream. + +Currently, __split_vma() triggers hugetlb page table unsharing through +vm_ops->may_split(). This happens before the VMA lock and rmap locks are +taken - which is too early, it allows racing VMA-locked page faults in our +process and racing rmap walks from other processes to cause page tables to +be shared again before we actually perform the split. + +Fix it by explicitly calling into the hugetlb unshare logic from +__split_vma() in the same place where THP splitting also happens. At that +point, both the VMA and the rmap(s) are write-locked. + +An annoying detail is that we can now call into the helper +hugetlb_unshare_pmds() from two different locking contexts: + +1. from hugetlb_split(), holding: + - mmap lock (exclusively) + - VMA lock + - file rmap lock (exclusively) +2. hugetlb_unshare_all_pmds(), which I think is designed to be able to + call us with only the mmap lock held (in shared mode), but currently + only runs while holding mmap lock (exclusively) and VMA lock + +Backporting note: +This commit fixes a racy protection that was introduced in commit +b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that +commit claimed to fix an issue introduced in 5.13, but it should actually +also go all the way back. + +[jannh@google.com: v2] + Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1a@google.com +Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1a@google.com +Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58a@google.com +Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page") +Signed-off-by: Jann Horn <jannh@google.com> +Cc: Liam Howlett <liam.howlett@oracle.com> +Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +Reviewed-by: Oscar Salvador <osalvador@suse.de> +Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +Cc: Vlastimil Babka <vbabka@suse.cz> +Cc: <stable@vger.kernel.org> [b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs] +Cc: <stable@vger.kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +[stable backport: code got moved around, VMA splitting is in +__vma_adjust, hugetlb lock wasn't used back then] +Signed-off-by: Jann Horn <jannh@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/hugetlb.h | 6 +++++ + mm/hugetlb.c | 53 ++++++++++++++++++++++++++++++++++++------------ + mm/mmap.c | 8 +++++++ + 3 files changed, 54 insertions(+), 13 deletions(-) + +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -188,6 +188,8 @@ unsigned long hugetlb_change_protection( + unsigned long address, unsigned long end, pgprot_t newprot); + + bool is_hugetlb_entry_migration(pte_t pte); ++void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); ++void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); + + #else /* !CONFIG_HUGETLB_PAGE */ + +@@ -369,6 +371,10 @@ static inline vm_fault_t hugetlb_fault(s + return 0; + } + ++static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } ++ ++static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} ++ + #endif /* !CONFIG_HUGETLB_PAGE */ + /* + * hugepages at page global directory. If arch support +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -97,7 +97,7 @@ static inline void ClearPageHugeFreed(st + /* Forward declaration */ + static int hugetlb_acct_memory(struct hstate *h, long delta); + static void hugetlb_unshare_pmds(struct vm_area_struct *vma, +- unsigned long start, unsigned long end); ++ unsigned long start, unsigned long end, bool take_locks); + + static inline void unlock_or_release_subpool(struct hugepage_subpool *spool) + { +@@ -3699,26 +3699,40 @@ static int hugetlb_vm_op_split(struct vm + { + if (addr & ~(huge_page_mask(hstate_vma(vma)))) + return -EINVAL; ++ return 0; ++} + ++void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) ++{ + /* + * PMD sharing is only possible for PUD_SIZE-aligned address ranges + * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this + * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. ++ * This function is called in the middle of a VMA split operation, with ++ * MM, VMA and rmap all write-locked to prevent concurrent page table ++ * walks (except hardware and gup_fast()). + */ ++ mmap_assert_write_locked(vma->vm_mm); ++ i_mmap_assert_write_locked(vma->vm_file->f_mapping); ++ + if (addr & ~PUD_MASK) { +- /* +- * hugetlb_vm_op_split is called right before we attempt to +- * split the VMA. We will need to unshare PMDs in the old and +- * new VMAs, so let's unshare before we split. +- */ + unsigned long floor = addr & PUD_MASK; + unsigned long ceil = floor + PUD_SIZE; + +- if (floor >= vma->vm_start && ceil <= vma->vm_end) +- hugetlb_unshare_pmds(vma, floor, ceil); ++ if (floor >= vma->vm_start && ceil <= vma->vm_end) { ++ /* ++ * Locking: ++ * Use take_locks=false here. ++ * The file rmap lock is already held. ++ * The hugetlb VMA lock can't be taken when we already ++ * hold the file rmap lock, and we don't need it because ++ * its purpose is to synchronize against concurrent page ++ * table walks, which are not possible thanks to the ++ * locks held by our caller. ++ */ ++ hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false); ++ } + } +- +- return 0; + } + + static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) +@@ -5727,9 +5741,16 @@ void move_hugetlb_state(struct page *old + } + } + ++/* ++ * If @take_locks is false, the caller must ensure that no concurrent page table ++ * access can happen (except for gup_fast() and hardware page walks). ++ * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like ++ * concurrent page fault handling) and the file rmap lock. ++ */ + static void hugetlb_unshare_pmds(struct vm_area_struct *vma, + unsigned long start, +- unsigned long end) ++ unsigned long end, ++ bool take_locks) + { + struct hstate *h = hstate_vma(vma); + unsigned long sz = huge_page_size(h); +@@ -5753,7 +5774,11 @@ static void hugetlb_unshare_pmds(struct + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, + start, end); + mmu_notifier_invalidate_range_start(&range); +- i_mmap_lock_write(vma->vm_file->f_mapping); ++ if (take_locks) { ++ i_mmap_lock_write(vma->vm_file->f_mapping); ++ } else { ++ i_mmap_assert_write_locked(vma->vm_file->f_mapping); ++ } + for (address = start; address < end; address += PUD_SIZE) { + ptep = huge_pte_offset(mm, address, sz); + if (!ptep) +@@ -5763,7 +5788,9 @@ static void hugetlb_unshare_pmds(struct + spin_unlock(ptl); + } + flush_hugetlb_tlb_range(vma, start, end); +- i_mmap_unlock_write(vma->vm_file->f_mapping); ++ if (take_locks) { ++ i_mmap_unlock_write(vma->vm_file->f_mapping); ++ } + /* + * No need to call mmu_notifier_invalidate_range(), see + * Documentation/mm/mmu_notifier.rst. +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -832,7 +832,15 @@ int __vma_adjust(struct vm_area_struct * + } + } + again: ++ /* ++ * Get rid of huge pages and shared page tables straddling the split ++ * boundary. ++ */ + vma_adjust_trans_huge(orig_vma, start, end, adjust_next); ++ if (is_vm_hugetlb_page(orig_vma)) { ++ hugetlb_split(orig_vma, start); ++ hugetlb_split(orig_vma, end); ++ } + + if (file) { + mapping = file->f_mapping; diff --git a/queue-5.10/series b/queue-5.10/series index 157d0873877..c83948c30a7 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -296,3 +296,7 @@ arm64-restrict-pagetable-teardown-to-avoid-false-warning.patch alsa-usb-audio-rename-alsa-kcontrol-pcm-and-pcm1-for-the-ktmicro-sound-card.patch alsa-hda-intel-add-thinkpad-e15-to-pm-deny-list.patch alsa-hda-realtek-enable-headset-mic-on-latitude-5420-rugged.patch +hugetlb-unshare-some-pmds-when-splitting-vmas.patch +mm-hugetlb-unshare-page-tables-during-vma-split-not-before.patch +mm-hugetlb-independent-pmd-page-table-shared-count.patch +mm-hugetlb-fix-huge_pmd_unshare-vs-gup-fast-race.patch |