aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
authorZi Yan <ziy@nvidia.com>2026-05-17 09:54:04 -0400
committerAndrew Morton <akpm@linux-foundation.org>2026-06-21 11:37:14 -0700
commit4e3d769bf0fc13f2b44c9e693e587176b15200b8 (patch)
tree9cdc8136632cae17611d16c105bb0b6aa9bdff82 /mm
parentcd2d3d1f26c27eace8c70bd481889afd3c34a42c (diff)
downloadath-4e3d769bf0fc13f2b44c9e693e587176b15200b8.tar.gz
mm/khugepaged: add folio dirty check after try_to_unmap()
This check ensures the correctness of read-only PMD folio collapse after it is enabled for all FSes supporting PMD pagecache folios and replaces READ_ONLY_THP_FOR_FS. READ_ONLY_THP_FOR_FS only supports read-only fd and uses mapping->nr_thps and inode->i_writecount to prevent any write to read-only to-be-collapsed folios. In upcoming commits, READ_ONLY_THP_FOR_FS will be removed and the aforementioned mechanism will go away too. To ensure khugepaged functions as expected after the changes, skip if any folio is dirty after try_to_unmap(), since a dirty folio at that point means this read-only folio can get writes between try_to_unmap() and try_to_unmap_flush() via cached TLB entries and khugepaged does not support writable pagecache folio collapse yet. Link: https://lore.kernel.org/20260517135416.1434539-3-ziy@nvidia.com Signed-off-by: Zi Yan <ziy@nvidia.com> Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> Acked-by: David Hildenbrand (Arm) <david@kernel.org> Reviewed-by: Lance Yang <lance.yang@linux.dev> Reviewed-by: Nico Pache <npache@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Barry Song <baohua@kernel.org> Cc: Chris Mason <clm@fb.com> Cc: Christian Brauner <brauner@kernel.org> Cc: David Sterba <dsterba@suse.com> Cc: Dev Jain <dev.jain@arm.com> Cc: Jan Kara <jack@suse.cz> Cc: Liam Howlett <liam@infradead.org> Cc: Lorenzo Stoakes <ljs@kernel.org> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Song Liu <songliubraving@fb.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Vlastimil Babka <vbabka@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/khugepaged.c28
1 files changed, 24 insertions, 4 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 06be690fe8d23..eca22f1185d9f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2338,8 +2338,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
}
} else if (folio_test_dirty(folio)) {
/*
- * khugepaged only works on read-only fd,
- * so this page is dirty because it hasn't
+ * This page is dirty because it hasn't
* been flushed since first write. There
* won't be new dirty pages.
*
@@ -2397,8 +2396,8 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
if (!is_shmem && (folio_test_dirty(folio) ||
folio_test_writeback(folio))) {
/*
- * khugepaged only works on read-only fd, so this
- * folio is dirty because it hasn't been flushed
+ * khugepaged only works on clean file-backed folios,
+ * so this folio is dirty because it hasn't been flushed
* since first write.
*/
result = SCAN_PAGE_DIRTY_OR_WRITEBACK;
@@ -2443,6 +2442,27 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
}
/*
+ * At this point, the folio is locked and unmapped. If the PTE
+ * was dirty, try_to_unmap() has transferred the dirty bit to
+ * the folio and we must not collapse it into a clean
+ * file-backed folio.
+ *
+ * If the folio is clean here, no one can write it until we
+ * drop the folio lock. A write through a stale TLB entry came
+ * from a clean PTE and must fault because the PTE has been
+ * cleared; the fault path has to take the folio lock before
+ * installing a writable mapping. Buffered write paths also
+ * have to take the folio lock before modifying file contents
+ * without a mapping, typically via write_begin_get_folio().
+ */
+ if (!is_shmem && folio_test_dirty(folio)) {
+ result = SCAN_PAGE_DIRTY_OR_WRITEBACK;
+ xas_unlock_irq(&xas);
+ folio_putback_lru(folio);
+ goto out_unlock;
+ }
+
+ /*
* Accumulate the folios that are being collapsed.
*/
list_add_tail(&folio->lru, &pagelist);