aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
authorBarry Song (Xiaomi) <baohua@kernel.org>2026-05-26 21:09:38 +0800
committerAndrew Morton <akpm@linux-foundation.org>2026-05-28 21:31:27 -0700
commit1aeb2103004eacb2d98cef3a49c63f883621bd9b (patch)
tree6b4349f7713f2c20d96904215d93b16ffd3d6dd4 /mm
parentb298cb5a2a6f54d4f1aff976416ea664983ee076 (diff)
downloadlinux-next-history-1aeb2103004eacb2d98cef3a49c63f883621bd9b.tar.gz
mm/mglru: use folio_mark_accessed to replace folio_set_active
MGLRU gives high priority to folios mapped in page tables. As a result, folio_set_active() is invoked for all folios read during page faults. In practice, however, readahead can bring in many folios that are never accessed via page tables. A previous attempt by Lei Liu proposed introducing a separate LRU for readahead[1] to make readahead pages easier to reclaim, but that approach is likely over-engineered. Before commit 4d5d14a01e2c ("mm/mglru: rework workingset protection"), folios with PG_active were always placed in the youngest generation, leading to over-protection and increased refaults. After that commit, PG_active folios are placed in the second youngest generation, which is still too optimistic given the presence of readahead. In contrast, the classic active/inactive scheme is more conservative. This patch switches to using folio_mark_accessed() and begins prefaulted file folios from the second oldest generation instead of active generations. We should also adjust the following accordingly: - WORKINGSET_ACTIVATE: aligned with setting active for refaulted workingset folios; - lru_gen_folio_seq(): place (pre)faulted file folios into the second oldest generation; - promote second-scanned folios to workingset in folio_check_references(): we now have to depend on folio_lru_refs() > 1, since we previously relied on PG_referenced being set during the first scan, but PG_referenced is now set earlier. On x86, running a kernel build inside a memcg with a 1GB memory limit using 20 threads. w/o patch: real 1m50.764s user 25m32.305s sys 4m0.012s pswpin: 1333245 pswpout: 4366443 pgpgin: 6962592 pgpgout: 17780712 swpout_zero: 1019603 swpin_zero: 14764 refault_file: 287794 refault_anon: 1347963 w/ patch: real 1m48.879s user 25m29.224s sys 3m37.421s pswpin: 568480 pswpout: 2322657 pgpgin: 4073416 pgpgout: 9613408 swpout_zero: 593275 swpin_zero: 9118 refault_file: 262505 refault_anon: 577550 active/inactive LRU: real 1m49.928s user 25m28.196s sys 3m40.740s pswpin: 463452 pswpout: 2309119 pgpgin: 4438856 pgpgout: 9568628 swpout_zero: 743704 swpin_zero: 7244 refault_file: 562555 refault_anon: 470694 Lance and Xueyuan made a huge contribution to this patch through testing. Link: https://lore.kernel.org/20260526130938.66253-1-baohua@kernel.org Link: https://lore.kernel.org/linux-mm/20250916072226.220426-1-liulei.rjpt@vivo.com/ [1] Signed-off-by: Barry Song (Xiaomi) <baohua@kernel.org> Tested-by: Lance Yang <lance.yang@linux.dev> Tested-by: Xueyuan Chen <xueyuan.chen21@gmail.com> Cc: Pedro Falcato <pfalcato@suse.de> Cc: Kairui Song <kasong@tencent.com> Cc: Qi Zheng <qi.zheng@linux.dev> Cc: Shakeel Butt <shakeel.butt@linux.dev> Cc: wangzicheng <wangzicheng@honor.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Lei Liu <liulei.rjpt@vivo.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Yuanchu Xie <yuanchu@google.com> Cc: Wei Xu <weixugc@google.com> Cc: Will Deacon <will@kernel.org> Cc: Kalesh Singh <kaleshsingh@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/swap.c16
-rw-r--r--mm/vmscan.c6
-rw-r--r--mm/workingset.c10
3 files changed, 24 insertions, 8 deletions
diff --git a/mm/swap.c b/mm/swap.c
index 2dd84813f4dde..588f50d8f1a8c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -544,10 +544,20 @@ void folio_add_lru(struct folio *folio)
folio_test_unevictable(folio), folio);
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
- /* see the comment in lru_gen_folio_seq() */
+ /*
+ * For refaulted workingset folios, set PG_active so they
+ * can be added to active generations.
+ * For prefaulted file folios, folio_mark_accessed() sets
+ * PG_referenced so lru_gen_folio_seq() places them into
+ * the second oldest generation.
+ */
if (lru_gen_enabled() && !folio_test_unevictable(folio) &&
- lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
- folio_set_active(folio);
+ lru_gen_in_fault() && !(current->flags & PF_MEMALLOC)) {
+ if (folio_test_workingset(folio))
+ folio_set_active(folio);
+ else if (!folio_test_referenced(folio))
+ folio_mark_accessed(folio);
+ }
folio_batch_add_and_move(folio, lru_add);
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3c856a78c0a59..76193a84a2afc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -850,7 +850,11 @@ static bool lru_gen_set_refs(struct folio *folio)
return false;
}
- set_mask_bits(&folio->flags.f, LRU_REFS_FLAGS, BIT(PG_workingset));
+ /* Promote on second access */
+ if (folio_lru_refs(folio) > 1)
+ set_mask_bits(&folio->flags.f, LRU_REFS_FLAGS, BIT(PG_workingset));
+ else
+ folio_mark_accessed(folio);
return true;
}
#else
diff --git a/mm/workingset.c b/mm/workingset.c
index 07e6836d05020..f351798e723ac 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -319,11 +319,13 @@ static void lru_gen_refault(struct folio *folio, void *shadow)
atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]);
- /* see folio_add_lru() where folio_set_active() will be called */
- if (lru_gen_in_fault())
- mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta);
-
if (workingset) {
+ /*
+ * see folio_add_lru(), where folio_set_active() is
+ * called for workingset folios
+ */
+ if (lru_gen_in_fault())
+ mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta);
folio_set_workingset(folio);
mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta);
} else