aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
authorKairui Song <kasong@tencent.com>2026-05-17 23:39:46 +0800
committerAndrew Morton <akpm@linux-foundation.org>2026-05-28 21:31:07 -0700
commit406d964f8eecf29bcaa44021b05821671050e610 (patch)
treed2f91325ca1d1eb821d88b0e20724bb3e65793b7 /mm
parentc42326c0563cbde56a3c43312ac88f51a3b7eb66 (diff)
downloadlinux-next-history-406d964f8eecf29bcaa44021b05821671050e610.tar.gz
mm, swap: support flexible batch freeing of slots in different memcgs
Instead of requiring the caller to ensure all slots are in the same memcg, make the function handle different memcgs at once. This is both a micro optimization and required for removing the memcg lookup in the page table layer, so it can be unified at the swap layer. We are not removing the memcg lookup in the page table in this commit. It has to be done after the memcg lookup is deferred to the swap layer. Link: https://lore.kernel.org/20260517-swap-table-p4-v5-7-88ae43e064c7@tencent.com Signed-off-by: Kairui Song <kasong@tencent.com> Acked-by: Chris Li <chrisl@kernel.org> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Baoquan He <bhe@redhat.com> Cc: Barry Song <baohua@kernel.org> Cc: Chengming Zhou <chengming.zhou@linux.dev> Cc: David Hildenbrand <david@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kemeng Shi <shikemeng@huaweicloud.com> Cc: Lorenzo Stoakes <ljs@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Cc: Nhat Pham <nphamcs@gmail.com> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Shakeel Butt <shakeel.butt@linux.dev> Cc: Youngjun Park <youngjun.park@lge.com> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/swapfile.c33
1 files changed, 29 insertions, 4 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4875b3d3e6588..60d8f0df3f32c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1899,21 +1899,46 @@ void __swap_cluster_free_entries(struct swap_info_struct *si,
unsigned int ci_start, unsigned int nr_pages)
{
unsigned long old_tb;
+ unsigned int type = si->type;
+ unsigned short batch_id = 0, id_cur;
unsigned int ci_off = ci_start, ci_end = ci_start + nr_pages;
- unsigned long offset = cluster_offset(si, ci) + ci_start;
+ unsigned long ci_head = cluster_offset(si, ci);
+ unsigned int batch_off = ci_off;
+ swp_entry_t entry;
VM_WARN_ON(ci->count < nr_pages);
ci->count -= nr_pages;
do {
old_tb = __swap_table_get(ci, ci_off);
- /* Release the last ref, or after swap cache is dropped */
+ /*
+ * Freeing is done after release of the last swap count
+ * ref, or after swap cache is dropped
+ */
VM_WARN_ON(!swp_tb_is_shadow(old_tb) || __swp_tb_get_count(old_tb) > 1);
__swap_table_set(ci, ci_off, null_to_swp_tb());
+
+ /*
+ * Uncharge swap slots by memcg in batches. Consecutive
+ * slots with the same cgroup id are uncharged together.
+ */
+ entry = swp_entry(type, ci_head + ci_off);
+ id_cur = lookup_swap_cgroup_id(entry);
+ if (batch_id != id_cur) {
+ if (batch_id)
+ mem_cgroup_uncharge_swap(swp_entry(type, ci_head + batch_off),
+ ci_off - batch_off);
+ batch_id = id_cur;
+ batch_off = ci_off;
+ }
} while (++ci_off < ci_end);
- mem_cgroup_uncharge_swap(swp_entry(si->type, offset), nr_pages);
- swap_range_free(si, offset, nr_pages);
+ if (batch_id) {
+ mem_cgroup_uncharge_swap(swp_entry(type, ci_head + batch_off),
+ ci_off - batch_off);
+ }
+
+ swap_range_free(si, ci_head + ci_start, nr_pages);
swap_cluster_assert_empty(ci, ci_start, nr_pages, false);
if (!ci->count)