aboutsummaryrefslogtreecommitdiffstats
diff options
-rw-r--r--queue-5.10/hugetlb-unshare-some-pmds-when-splitting-vmas.patch131
-rw-r--r--queue-5.10/mm-hugetlb-fix-huge_pmd_unshare-vs-gup-fast-race.patch55
-rw-r--r--queue-5.10/mm-hugetlb-independent-pmd-page-table-shared-count.patch162
-rw-r--r--queue-5.10/mm-hugetlb-unshare-page-tables-during-vma-split-not-before.patch203
-rw-r--r--queue-5.10/series4
5 files changed, 555 insertions, 0 deletions
diff --git a/queue-5.10/hugetlb-unshare-some-pmds-when-splitting-vmas.patch b/queue-5.10/hugetlb-unshare-some-pmds-when-splitting-vmas.patch
new file mode 100644
index 00000000000..de3eb52642e
--- /dev/null
+++ b/queue-5.10/hugetlb-unshare-some-pmds-when-splitting-vmas.patch
@@ -0,0 +1,131 @@
+From b30c14cd61025eeea2f2e8569606cd167ba9ad2d Mon Sep 17 00:00:00 2001
+From: James Houghton <jthoughton@google.com>
+Date: Wed, 4 Jan 2023 23:19:10 +0000
+Subject: hugetlb: unshare some PMDs when splitting VMAs
+
+From: James Houghton <jthoughton@google.com>
+
+commit b30c14cd61025eeea2f2e8569606cd167ba9ad2d upstream.
+
+PMD sharing can only be done in PUD_SIZE-aligned pieces of VMAs; however,
+it is possible that HugeTLB VMAs are split without unsharing the PMDs
+first.
+
+Without this fix, it is possible to hit the uffd-wp-related WARN_ON_ONCE
+in hugetlb_change_protection [1]. The key there is that
+hugetlb_unshare_all_pmds will not attempt to unshare PMDs in
+non-PUD_SIZE-aligned sections of the VMA.
+
+It might seem ideal to unshare in hugetlb_vm_op_open, but we need to
+unshare in both the new and old VMAs, so unsharing in hugetlb_vm_op_split
+seems natural.
+
+[1]: https://lore.kernel.org/linux-mm/CADrL8HVeOkj0QH5VZZbRzybNE8CG-tEGFshnA+bG9nMgcWtBSg@mail.gmail.com/
+
+Link: https://lkml.kernel.org/r/20230104231910.1464197-1-jthoughton@google.com
+Fixes: 6dfeaff93be1 ("hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp")
+Signed-off-by: James Houghton <jthoughton@google.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Acked-by: Peter Xu <peterx@redhat.com>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[backport notes: I believe the "Fixes" tag is somewhat wrong - kernels
+before that commit already had an adjust_range_if_pmd_sharing_possible()
+that assumes that shared PMDs can't straddle page table boundaries.
+huge_pmd_unshare() takes different parameter type]
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 65 insertions(+)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -96,6 +96,8 @@ static inline void ClearPageHugeFreed(st
+
+ /* Forward declaration */
+ static int hugetlb_acct_memory(struct hstate *h, long delta);
++static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
++ unsigned long start, unsigned long end);
+
+ static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
+ {
+@@ -3697,6 +3699,25 @@ static int hugetlb_vm_op_split(struct vm
+ {
+ if (addr & ~(huge_page_mask(hstate_vma(vma))))
+ return -EINVAL;
++
++ /*
++ * PMD sharing is only possible for PUD_SIZE-aligned address ranges
++ * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
++ * split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
++ */
++ if (addr & ~PUD_MASK) {
++ /*
++ * hugetlb_vm_op_split is called right before we attempt to
++ * split the VMA. We will need to unshare PMDs in the old and
++ * new VMAs, so let's unshare before we split.
++ */
++ unsigned long floor = addr & PUD_MASK;
++ unsigned long ceil = floor + PUD_SIZE;
++
++ if (floor >= vma->vm_start && ceil <= vma->vm_end)
++ hugetlb_unshare_pmds(vma, floor, ceil);
++ }
++
+ return 0;
+ }
+
+@@ -5706,6 +5727,50 @@ void move_hugetlb_state(struct page *old
+ }
+ }
+
++static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
++ unsigned long start,
++ unsigned long end)
++{
++ struct hstate *h = hstate_vma(vma);
++ unsigned long sz = huge_page_size(h);
++ struct mm_struct *mm = vma->vm_mm;
++ struct mmu_notifier_range range;
++ unsigned long address;
++ spinlock_t *ptl;
++ pte_t *ptep;
++
++ if (!(vma->vm_flags & VM_MAYSHARE))
++ return;
++
++ if (start >= end)
++ return;
++
++ flush_cache_range(vma, start, end);
++ /*
++ * No need to call adjust_range_if_pmd_sharing_possible(), because
++ * we have already done the PUD_SIZE alignment.
++ */
++ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
++ start, end);
++ mmu_notifier_invalidate_range_start(&range);
++ i_mmap_lock_write(vma->vm_file->f_mapping);
++ for (address = start; address < end; address += PUD_SIZE) {
++ ptep = huge_pte_offset(mm, address, sz);
++ if (!ptep)
++ continue;
++ ptl = huge_pte_lock(h, mm, ptep);
++ huge_pmd_unshare(mm, vma, &address, ptep);
++ spin_unlock(ptl);
++ }
++ flush_hugetlb_tlb_range(vma, start, end);
++ i_mmap_unlock_write(vma->vm_file->f_mapping);
++ /*
++ * No need to call mmu_notifier_invalidate_range(), see
++ * Documentation/mm/mmu_notifier.rst.
++ */
++ mmu_notifier_invalidate_range_end(&range);
++}
++
+ #ifdef CONFIG_CMA
+ static bool cma_reserve_called __initdata;
+
diff --git a/queue-5.10/mm-hugetlb-fix-huge_pmd_unshare-vs-gup-fast-race.patch b/queue-5.10/mm-hugetlb-fix-huge_pmd_unshare-vs-gup-fast-race.patch
new file mode 100644
index 00000000000..d54d5bd1c81
--- /dev/null
+++ b/queue-5.10/mm-hugetlb-fix-huge_pmd_unshare-vs-gup-fast-race.patch
@@ -0,0 +1,55 @@
+From 1013af4f585fccc4d3e5c5824d174de2257f7d6d Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Tue, 27 May 2025 23:23:54 +0200
+Subject: mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race
+
+From: Jann Horn <jannh@google.com>
+
+commit 1013af4f585fccc4d3e5c5824d174de2257f7d6d upstream.
+
+huge_pmd_unshare() drops a reference on a page table that may have
+previously been shared across processes, potentially turning it into a
+normal page table used in another process in which unrelated VMAs can
+afterwards be installed.
+
+If this happens in the middle of a concurrent gup_fast(), gup_fast() could
+end up walking the page tables of another process. While I don't see any
+way in which that immediately leads to kernel memory corruption, it is
+really weird and unexpected.
+
+Fix it with an explicit broadcast IPI through tlb_remove_table_sync_one(),
+just like we do in khugepaged when removing page tables for a THP
+collapse.
+
+Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-2-1329349bad1a@google.com
+Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-2-f4136f5ec58a@google.com
+Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
+Signed-off-by: Jann Horn <jannh@google.com>
+Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5488,6 +5488,13 @@ int huge_pmd_unshare(struct mm_struct *m
+ return 0;
+
+ pud_clear(pud);
++ /*
++ * Once our caller drops the rmap lock, some other process might be
++ * using this page table as a normal, non-hugetlb page table.
++ * Wait for pending gup_fast() in other threads to finish before letting
++ * that happen.
++ */
++ tlb_remove_table_sync_one();
+ atomic_dec(&virt_to_page(ptep)->pt_share_count);
+ mm_dec_nr_pmds(mm);
+ /*
diff --git a/queue-5.10/mm-hugetlb-independent-pmd-page-table-shared-count.patch b/queue-5.10/mm-hugetlb-independent-pmd-page-table-shared-count.patch
new file mode 100644
index 00000000000..3beeede6275
--- /dev/null
+++ b/queue-5.10/mm-hugetlb-independent-pmd-page-table-shared-count.patch
@@ -0,0 +1,162 @@
+From 59d9094df3d79443937add8700b2ef1a866b1081 Mon Sep 17 00:00:00 2001
+From: Liu Shixin <liushixin2@huawei.com>
+Date: Mon, 16 Dec 2024 15:11:47 +0800
+Subject: mm: hugetlb: independent PMD page table shared count
+
+From: Liu Shixin <liushixin2@huawei.com>
+
+commit 59d9094df3d79443937add8700b2ef1a866b1081 upstream.
+
+The folio refcount may be increased unexpectly through try_get_folio() by
+caller such as split_huge_pages. In huge_pmd_unshare(), we use refcount
+to check whether a pmd page table is shared. The check is incorrect if
+the refcount is increased by the above caller, and this can cause the page
+table leaked:
+
+ BUG: Bad page state in process sh pfn:109324
+ page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x66 pfn:0x109324
+ flags: 0x17ffff800000000(node=0|zone=2|lastcpupid=0xfffff)
+ page_type: f2(table)
+ raw: 017ffff800000000 0000000000000000 0000000000000000 0000000000000000
+ raw: 0000000000000066 0000000000000000 00000000f2000000 0000000000000000
+ page dumped because: nonzero mapcount
+ ...
+ CPU: 31 UID: 0 PID: 7515 Comm: sh Kdump: loaded Tainted: G B 6.13.0-rc2master+ #7
+ Tainted: [B]=BAD_PAGE
+ Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
+ Call trace:
+ show_stack+0x20/0x38 (C)
+ dump_stack_lvl+0x80/0xf8
+ dump_stack+0x18/0x28
+ bad_page+0x8c/0x130
+ free_page_is_bad_report+0xa4/0xb0
+ free_unref_page+0x3cc/0x620
+ __folio_put+0xf4/0x158
+ split_huge_pages_all+0x1e0/0x3e8
+ split_huge_pages_write+0x25c/0x2d8
+ full_proxy_write+0x64/0xd8
+ vfs_write+0xcc/0x280
+ ksys_write+0x70/0x110
+ __arm64_sys_write+0x24/0x38
+ invoke_syscall+0x50/0x120
+ el0_svc_common.constprop.0+0xc8/0xf0
+ do_el0_svc+0x24/0x38
+ el0_svc+0x34/0x128
+ el0t_64_sync_handler+0xc8/0xd0
+ el0t_64_sync+0x190/0x198
+
+The issue may be triggered by damon, offline_page, page_idle, etc, which
+will increase the refcount of page table.
+
+1. The page table itself will be discarded after reporting the
+ "nonzero mapcount".
+
+2. The HugeTLB page mapped by the page table miss freeing since we
+ treat the page table as shared and a shared page table will not be
+ unmapped.
+
+Fix it by introducing independent PMD page table shared count. As
+described by comment, pt_index/pt_mm/pt_frag_refcount are used for s390
+gmap, x86 pgds and powerpc, pt_share_count is used for x86/arm64/riscv
+pmds, so we can reuse the field as pt_share_count.
+
+Link: https://lkml.kernel.org/r/20241216071147.3984217-1-liushixin2@huawei.com
+Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
+Signed-off-by: Liu Shixin <liushixin2@huawei.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Ken Chen <kenneth.w.chen@intel.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: Nanyong Sun <sunnanyong@huawei.com>
+Cc: Jane Chu <jane.chu@oracle.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[backport note: struct ptdesc did not exist yet, stuff it equivalently
+into struct page instead]
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm.h | 3 +++
+ include/linux/mm_types.h | 3 +++
+ mm/hugetlb.c | 18 ++++++++----------
+ 3 files changed, 14 insertions(+), 10 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2318,6 +2318,9 @@ static inline bool pgtable_pmd_page_ctor
+ if (!pmd_ptlock_init(page))
+ return false;
+ __SetPageTable(page);
++#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
++ atomic_set(&page->pt_share_count, 0);
++#endif
+ inc_zone_page_state(page, NR_PAGETABLE);
+ return true;
+ }
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -151,6 +151,9 @@ struct page {
+ union {
+ struct mm_struct *pt_mm; /* x86 pgds only */
+ atomic_t pt_frag_refcount; /* powerpc */
++#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
++ atomic_t pt_share_count;
++#endif
+ };
+ #if ALLOC_SPLIT_PTLOCKS
+ spinlock_t *ptl;
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5442,7 +5442,7 @@ pte_t *huge_pmd_share(struct mm_struct *
+ spte = huge_pte_offset(svma->vm_mm, saddr,
+ vma_mmu_pagesize(svma));
+ if (spte) {
+- get_page(virt_to_page(spte));
++ atomic_inc(&virt_to_page(spte)->pt_share_count);
+ break;
+ }
+ }
+@@ -5457,7 +5457,7 @@ pte_t *huge_pmd_share(struct mm_struct *
+ (pmd_t *)((unsigned long)spte & PAGE_MASK));
+ mm_inc_nr_pmds(mm);
+ } else {
+- put_page(virt_to_page(spte));
++ atomic_dec(&virt_to_page(spte)->pt_share_count);
+ }
+ spin_unlock(ptl);
+ out:
+@@ -5468,11 +5468,7 @@ out:
+ /*
+ * unmap huge page backed by shared pte.
+ *
+- * Hugetlb pte page is ref counted at the time of mapping. If pte is shared
+- * indicated by page_count > 1, unmap is achieved by clearing pud and
+- * decrementing the ref count. If count == 1, the pte page is not shared.
+- *
+- * Called with page table lock held and i_mmap_rwsem held in write mode.
++ * Called with page table lock held.
+ *
+ * returns: 1 successfully unmapped a shared pte page
+ * 0 the underlying pte page is not shared, or it is the last user
+@@ -5480,17 +5476,19 @@ out:
+ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep)
+ {
++ unsigned long sz = huge_page_size(hstate_vma(vma));
+ pgd_t *pgd = pgd_offset(mm, *addr);
+ p4d_t *p4d = p4d_offset(pgd, *addr);
+ pud_t *pud = pud_offset(p4d, *addr);
+
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+- BUG_ON(page_count(virt_to_page(ptep)) == 0);
+- if (page_count(virt_to_page(ptep)) == 1)
++ if (sz != PMD_SIZE)
++ return 0;
++ if (!atomic_read(&virt_to_page(ptep)->pt_share_count))
+ return 0;
+
+ pud_clear(pud);
+- put_page(virt_to_page(ptep));
++ atomic_dec(&virt_to_page(ptep)->pt_share_count);
+ mm_dec_nr_pmds(mm);
+ /*
+ * This update of passed address optimizes loops sequentially
diff --git a/queue-5.10/mm-hugetlb-unshare-page-tables-during-vma-split-not-before.patch b/queue-5.10/mm-hugetlb-unshare-page-tables-during-vma-split-not-before.patch
new file mode 100644
index 00000000000..9a7ad113835
--- /dev/null
+++ b/queue-5.10/mm-hugetlb-unshare-page-tables-during-vma-split-not-before.patch
@@ -0,0 +1,203 @@
+From 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0 Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Tue, 27 May 2025 23:23:53 +0200
+Subject: mm/hugetlb: unshare page tables during VMA split, not before
+
+From: Jann Horn <jannh@google.com>
+
+commit 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0 upstream.
+
+Currently, __split_vma() triggers hugetlb page table unsharing through
+vm_ops->may_split(). This happens before the VMA lock and rmap locks are
+taken - which is too early, it allows racing VMA-locked page faults in our
+process and racing rmap walks from other processes to cause page tables to
+be shared again before we actually perform the split.
+
+Fix it by explicitly calling into the hugetlb unshare logic from
+__split_vma() in the same place where THP splitting also happens. At that
+point, both the VMA and the rmap(s) are write-locked.
+
+An annoying detail is that we can now call into the helper
+hugetlb_unshare_pmds() from two different locking contexts:
+
+1. from hugetlb_split(), holding:
+ - mmap lock (exclusively)
+ - VMA lock
+ - file rmap lock (exclusively)
+2. hugetlb_unshare_all_pmds(), which I think is designed to be able to
+ call us with only the mmap lock held (in shared mode), but currently
+ only runs while holding mmap lock (exclusively) and VMA lock
+
+Backporting note:
+This commit fixes a racy protection that was introduced in commit
+b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that
+commit claimed to fix an issue introduced in 5.13, but it should actually
+also go all the way back.
+
+[jannh@google.com: v2]
+ Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1a@google.com
+Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1a@google.com
+Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58a@google.com
+Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
+Signed-off-by: Jann Horn <jannh@google.com>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org> [b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs]
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[stable backport: code got moved around, VMA splitting is in
+__vma_adjust, hugetlb lock wasn't used back then]
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hugetlb.h | 6 +++++
+ mm/hugetlb.c | 53 ++++++++++++++++++++++++++++++++++++------------
+ mm/mmap.c | 8 +++++++
+ 3 files changed, 54 insertions(+), 13 deletions(-)
+
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -188,6 +188,8 @@ unsigned long hugetlb_change_protection(
+ unsigned long address, unsigned long end, pgprot_t newprot);
+
+ bool is_hugetlb_entry_migration(pte_t pte);
++void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
++void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
+
+ #else /* !CONFIG_HUGETLB_PAGE */
+
+@@ -369,6 +371,10 @@ static inline vm_fault_t hugetlb_fault(s
+ return 0;
+ }
+
++static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
++
++static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
++
+ #endif /* !CONFIG_HUGETLB_PAGE */
+ /*
+ * hugepages at page global directory. If arch support
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -97,7 +97,7 @@ static inline void ClearPageHugeFreed(st
+ /* Forward declaration */
+ static int hugetlb_acct_memory(struct hstate *h, long delta);
+ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
+- unsigned long start, unsigned long end);
++ unsigned long start, unsigned long end, bool take_locks);
+
+ static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
+ {
+@@ -3699,26 +3699,40 @@ static int hugetlb_vm_op_split(struct vm
+ {
+ if (addr & ~(huge_page_mask(hstate_vma(vma))))
+ return -EINVAL;
++ return 0;
++}
+
++void hugetlb_split(struct vm_area_struct *vma, unsigned long addr)
++{
+ /*
+ * PMD sharing is only possible for PUD_SIZE-aligned address ranges
+ * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
+ * split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
++ * This function is called in the middle of a VMA split operation, with
++ * MM, VMA and rmap all write-locked to prevent concurrent page table
++ * walks (except hardware and gup_fast()).
+ */
++ mmap_assert_write_locked(vma->vm_mm);
++ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
++
+ if (addr & ~PUD_MASK) {
+- /*
+- * hugetlb_vm_op_split is called right before we attempt to
+- * split the VMA. We will need to unshare PMDs in the old and
+- * new VMAs, so let's unshare before we split.
+- */
+ unsigned long floor = addr & PUD_MASK;
+ unsigned long ceil = floor + PUD_SIZE;
+
+- if (floor >= vma->vm_start && ceil <= vma->vm_end)
+- hugetlb_unshare_pmds(vma, floor, ceil);
++ if (floor >= vma->vm_start && ceil <= vma->vm_end) {
++ /*
++ * Locking:
++ * Use take_locks=false here.
++ * The file rmap lock is already held.
++ * The hugetlb VMA lock can't be taken when we already
++ * hold the file rmap lock, and we don't need it because
++ * its purpose is to synchronize against concurrent page
++ * table walks, which are not possible thanks to the
++ * locks held by our caller.
++ */
++ hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false);
++ }
+ }
+-
+- return 0;
+ }
+
+ static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
+@@ -5727,9 +5741,16 @@ void move_hugetlb_state(struct page *old
+ }
+ }
+
++/*
++ * If @take_locks is false, the caller must ensure that no concurrent page table
++ * access can happen (except for gup_fast() and hardware page walks).
++ * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like
++ * concurrent page fault handling) and the file rmap lock.
++ */
+ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
+ unsigned long start,
+- unsigned long end)
++ unsigned long end,
++ bool take_locks)
+ {
+ struct hstate *h = hstate_vma(vma);
+ unsigned long sz = huge_page_size(h);
+@@ -5753,7 +5774,11 @@ static void hugetlb_unshare_pmds(struct
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
+ start, end);
+ mmu_notifier_invalidate_range_start(&range);
+- i_mmap_lock_write(vma->vm_file->f_mapping);
++ if (take_locks) {
++ i_mmap_lock_write(vma->vm_file->f_mapping);
++ } else {
++ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
++ }
+ for (address = start; address < end; address += PUD_SIZE) {
+ ptep = huge_pte_offset(mm, address, sz);
+ if (!ptep)
+@@ -5763,7 +5788,9 @@ static void hugetlb_unshare_pmds(struct
+ spin_unlock(ptl);
+ }
+ flush_hugetlb_tlb_range(vma, start, end);
+- i_mmap_unlock_write(vma->vm_file->f_mapping);
++ if (take_locks) {
++ i_mmap_unlock_write(vma->vm_file->f_mapping);
++ }
+ /*
+ * No need to call mmu_notifier_invalidate_range(), see
+ * Documentation/mm/mmu_notifier.rst.
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -832,7 +832,15 @@ int __vma_adjust(struct vm_area_struct *
+ }
+ }
+ again:
++ /*
++ * Get rid of huge pages and shared page tables straddling the split
++ * boundary.
++ */
+ vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
++ if (is_vm_hugetlb_page(orig_vma)) {
++ hugetlb_split(orig_vma, start);
++ hugetlb_split(orig_vma, end);
++ }
+
+ if (file) {
+ mapping = file->f_mapping;
diff --git a/queue-5.10/series b/queue-5.10/series
index 157d0873877..c83948c30a7 100644
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -296,3 +296,7 @@ arm64-restrict-pagetable-teardown-to-avoid-false-warning.patch
alsa-usb-audio-rename-alsa-kcontrol-pcm-and-pcm1-for-the-ktmicro-sound-card.patch
alsa-hda-intel-add-thinkpad-e15-to-pm-deny-list.patch
alsa-hda-realtek-enable-headset-mic-on-latitude-5420-rugged.patch
+hugetlb-unshare-some-pmds-when-splitting-vmas.patch
+mm-hugetlb-unshare-page-tables-during-vma-split-not-before.patch
+mm-hugetlb-independent-pmd-page-table-shared-count.patch
+mm-hugetlb-fix-huge_pmd_unshare-vs-gup-fast-race.patch