aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
authorNico Pache <npache@redhat.com>2026-05-22 09:00:00 -0600
committerAndrew Morton <akpm@linux-foundation.org>2026-05-28 21:31:34 -0700
commitf64fe31f1d981c5259c9bb6f85f656c39fdd3f48 (patch)
treebf9487bb2085d9617c9fc08f3e0f0a7180d0d9c8 /mm
parentdefe6e949b888028363abf2b452e2a07bb0b9031 (diff)
downloadlinux-next-history-f64fe31f1d981c5259c9bb6f85f656c39fdd3f48.tar.gz
mm/khugepaged: require collapse_huge_page to enter/exit with the lock dropped
Currently the collapse_huge_page function requires the mmap_read_lock to enter with it held, and exit with it dropped. This function moves the unlock into its parent caller, and changes this semantic to requiring it to enter/exit with it always unlocked. In future patches, we need this expectation, as for in mTHP collapse, we may have already have dropped the lock, and do not want to conditionally check for this by passing through the lock_dropped variable. No functional change is expected as one of the first things the collapse_huge_page function does is drop this lock before allocating the hugepage. Link: https://lore.kernel.org/20260522150009.121603-6-npache@redhat.com Signed-off-by: Nico Pache <npache@redhat.com> Acked-by: David Hildenbrand (Arm) <david@kernel.org> Cc: Alistair Popple <apopple@nvidia.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Bagas Sanjaya <bagasdotme@gmail.com> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Barry Song <baohua@kernel.org> Cc: Brendan Jackman <jackmanb@google.com> Cc: Byungchul Park <byungchul@sk.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: David Rientjes <rientjes@google.com> Cc: Dev Jain <dev.jain@arm.com> Cc: Gregory Price <gourry@gourry.net> Cc: "Huang, Ying" <ying.huang@linux.alibaba.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: Jann Horn <jannh@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Joshua Hahn <joshua.hahnjy@gmail.com> Cc: Kefeng Wang <wangkefeng.wang@huawei.com> Cc: Lance Yang <lance.yang@linux.dev> Cc: Liam R. Howlett <liam@infradead.org> Cc: Lorenzo Stoakes <ljs@kernel.org> Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Nanyong Sun <sunnanyong@huawei.com> Cc: Pedro Falcato <pfalcato@suse.de> Cc: Peter Xu <peterx@redhat.com> Cc: Rafael Aquini <raquini@redhat.com> Cc: Rakie Kim <rakie.kim@sk.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Shivank Garg <shivankg@amd.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Takashi Iwai (SUSE) <tiwai@suse.de> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Usama Arif <usamaarif642@gmail.com> Cc: Usama Arif <usama.arif@linux.dev> Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Wei Yang <richard.weiyang@gmail.com> Cc: Will Deacon <will@kernel.org> Cc: Yang Shi <yang@os.amperecomputing.com> Cc: Zach O'Keefe <zokeefe@google.com> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/khugepaged.c16
1 files changed, 8 insertions, 8 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 6b442f760ce1e..ce69611e2fb26 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1210,6 +1210,12 @@ static enum scan_result alloc_charge_folio(struct folio **foliop, struct mm_stru
return SCAN_SUCCEED;
}
+/*
+ * collapse_huge_page expects the mmap_lock to be unlocked before entering and
+ * will always return with the lock unlocked, to avoid holding the mmap_lock
+ * while allocating a THP, as that could trigger direct reclaim/compaction.
+ * Note that the VMA must be rechecked after grabbing the mmap_lock again.
+ */
static enum scan_result collapse_huge_page(struct mm_struct *mm, unsigned long address,
int referenced, int unmapped, struct collapse_control *cc)
{
@@ -1225,14 +1231,6 @@ static enum scan_result collapse_huge_page(struct mm_struct *mm, unsigned long a
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- /*
- * Before allocating the hugepage, release the mmap_lock read lock.
- * The allocation can take potentially a long time if it involves
- * sync compaction, and we do not need to hold the mmap_lock during
- * that. We will recheck the vma after taking it again in write mode.
- */
- mmap_read_unlock(mm);
-
result = alloc_charge_folio(&folio, mm, cc, HPAGE_PMD_ORDER);
if (result != SCAN_SUCCEED)
goto out_nolock;
@@ -1537,6 +1535,8 @@ static enum scan_result collapse_scan_pmd(struct mm_struct *mm,
out_unmap:
pte_unmap_unlock(pte, ptl);
if (result == SCAN_SUCCEED) {
+ /* collapse_huge_page expects the lock to be dropped before calling */
+ mmap_read_unlock(mm);
result = collapse_huge_page(mm, start_addr, referenced,
unmapped, cc);
/* collapse_huge_page will return with the mmap_lock released */