aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
authorYoungjun Park <youngjun.park@lge.com>2026-03-24 01:08:21 +0900
committerAndrew Morton <akpm@linux-foundation.org>2026-06-08 18:21:31 -0700
commitc13a0316aef5f4b73e8b4bf6943737f836d65e1d (patch)
tree4ea1353de01527b7da6e23cb0333f0ab19347519 /mm
parenta51cbdf02aec619f90db7e9f06e295adb8009d4d (diff)
downloadath-c13a0316aef5f4b73e8b4bf6943737f836d65e1d.tar.gz
mm/swap, PM: hibernate: fix swapoff race in uswsusp by pinning swap device
Patch series "mm/swap, PM: hibernate: fix swapoff race in uswsusp by pinning swap device", v8. Currently, in the uswsusp path, only the swap type value is retrieved at lookup time without holding a reference. If swapoff races after the type is acquired, subsequent slot allocations operate on a stale swap device. Additionally, grabbing and releasing the swap device reference on every slot allocation is inefficient across the entire hibernation swap path. This patch series addresses these issues: - Patch 1: Fixes the swapoff race in uswsusp by pinning the swap device from the point it is looked up until the session completes. - Patch 2: Removes the overhead of per-slot reference counting in alloc/free paths and cleans up the redundant SWP_WRITEOK check. This patch (of 2): Hibernation via uswsusp (/dev/snapshot ioctls) has a race window: after selecting the resume swap area but before user space is frozen, swapoff may run and invalidate the selected swap device. Fix this by pinning the swap device with SWP_HIBERNATION while it is in use. The pin is exclusive, which is sufficient since hibernate_acquire() already prevents concurrent hibernation sessions. The kernel swsusp path (sysfs-based hibernate/resume) uses find_hibernation_swap_type() which is not affected by the pin. It freezes user space before touching swap, so swapoff cannot race. Introduce dedicated helpers: - pin_hibernation_swap_type(): Look up and pin the swap device. Used by the uswsusp path. - find_hibernation_swap_type(): Lookup without pinning. Used by the kernel swsusp path. - unpin_hibernation_swap_type(): Clear the hibernation pin. While a swap device is pinned, swapoff is prevented from proceeding. Link: https://lore.kernel.org/20260323160822.1409904-1-youngjun.park@lge.com Link: https://lore.kernel.org/20260323160822.1409904-2-youngjun.park@lge.com Signed-off-by: Youngjun Park <youngjun.park@lge.com> Reviewed-by: Kairui Song <kasong@tencent.com> Cc: Baoquan He <bhe@redhat.com> Cc: Barry Song <baohua@kernel.org> Cc: Chris Li <chrisl@kernel.org> Cc: Kemeng Shi <shikemeng@huaweicloud.com> Cc: Nhat Pham <nphamcs@gmail.com> Cc: "Rafael J . Wysocki" <rafael@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/swapfile.c135
1 files changed, 119 insertions, 16 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 615d908671113..5e1e605ad9a1e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -132,7 +132,7 @@ static DEFINE_PER_CPU(struct percpu_swap_cluster, percpu_swap_cluster) = {
/* May return NULL on invalid type, caller must check for NULL return */
static struct swap_info_struct *swap_type_to_info(int type)
{
- if (type >= MAX_SWAPFILES)
+ if (type < 0 || type >= MAX_SWAPFILES)
return NULL;
return READ_ONCE(swap_info[type]); /* rcu_dereference() */
}
@@ -2199,22 +2199,15 @@ void swap_free_hibernation_slot(swp_entry_t entry)
put_swap_device(si);
}
-/*
- * Find the swap type that corresponds to given device (if any).
- *
- * @offset - number of the PAGE_SIZE-sized block of the device, starting
- * from 0, in which the swap header is expected to be located.
- *
- * This is needed for the suspend to disk (aka swsusp).
- */
-int swap_type_of(dev_t device, sector_t offset)
+static int __find_hibernation_swap_type(dev_t device, sector_t offset)
{
int type;
+ lockdep_assert_held(&swap_lock);
+
if (!device)
- return -1;
+ return -EINVAL;
- spin_lock(&swap_lock);
for (type = 0; type < nr_swapfiles; type++) {
struct swap_info_struct *sis = swap_info[type];
@@ -2224,16 +2217,118 @@ int swap_type_of(dev_t device, sector_t offset)
if (device == sis->bdev->bd_dev) {
struct swap_extent *se = first_se(sis);
- if (se->start_block == offset) {
- spin_unlock(&swap_lock);
+ if (se->start_block == offset)
return type;
- }
}
}
- spin_unlock(&swap_lock);
return -ENODEV;
}
+/**
+ * pin_hibernation_swap_type - Pin the swap device for hibernation
+ * @device: Block device containing the resume image
+ * @offset: Offset identifying the swap area
+ *
+ * Locate the swap device for @device/@offset and mark it as pinned
+ * for hibernation. While pinned, swapoff() is prevented.
+ *
+ * Only one uswsusp context may pin a swap device at a time.
+ * If already pinned, this function returns -EBUSY.
+ *
+ * Return:
+ * >= 0 on success (swap type).
+ * -EINVAL if @device is invalid.
+ * -ENODEV if the swap device is not found.
+ * -EBUSY if the device is already pinned for hibernation.
+ */
+int pin_hibernation_swap_type(dev_t device, sector_t offset)
+{
+ int type;
+ struct swap_info_struct *si;
+
+ spin_lock(&swap_lock);
+
+ type = __find_hibernation_swap_type(device, offset);
+ if (type < 0) {
+ spin_unlock(&swap_lock);
+ return type;
+ }
+
+ si = swap_type_to_info(type);
+ if (WARN_ON_ONCE(!si)) {
+ spin_unlock(&swap_lock);
+ return -ENODEV;
+ }
+
+ /*
+ * hibernate_acquire() prevents concurrent hibernation sessions.
+ * This check additionally guards against double-pinning within
+ * the same session.
+ */
+ if (WARN_ON_ONCE(si->flags & SWP_HIBERNATION)) {
+ spin_unlock(&swap_lock);
+ return -EBUSY;
+ }
+
+ si->flags |= SWP_HIBERNATION;
+
+ spin_unlock(&swap_lock);
+ return type;
+}
+
+/**
+ * unpin_hibernation_swap_type - Unpin the swap device for hibernation
+ * @type: Swap type previously returned by pin_hibernation_swap_type()
+ *
+ * Clear the hibernation pin on the given swap device, allowing
+ * swapoff() to proceed normally.
+ *
+ * If @type does not refer to a valid swap device, this function
+ * does nothing.
+ */
+void unpin_hibernation_swap_type(int type)
+{
+ struct swap_info_struct *si;
+
+ spin_lock(&swap_lock);
+ si = swap_type_to_info(type);
+ if (!si) {
+ spin_unlock(&swap_lock);
+ return;
+ }
+ si->flags &= ~SWP_HIBERNATION;
+ spin_unlock(&swap_lock);
+}
+
+/**
+ * find_hibernation_swap_type - Find swap type for hibernation
+ * @device: Block device containing the resume image
+ * @offset: Offset within the device identifying the swap area
+ *
+ * Locate the swap device corresponding to @device and @offset.
+ *
+ * Unlike pin_hibernation_swap_type(), this function only performs a
+ * lookup and does not mark the swap device as pinned for hibernation.
+ *
+ * This is safe in the sysfs-based hibernation path where user space
+ * is already frozen and swapoff() cannot run concurrently.
+ *
+ * Return:
+ * A non-negative swap type on success.
+ * -EINVAL if @device is invalid.
+ * -ENODEV if no matching swap device is found.
+ */
+int find_hibernation_swap_type(dev_t device, sector_t offset)
+{
+ int type;
+
+ spin_lock(&swap_lock);
+ type = __find_hibernation_swap_type(device, offset);
+ spin_unlock(&swap_lock);
+
+ return type;
+}
+
int find_first_swap(dev_t *device)
{
int type;
@@ -2996,6 +3091,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
spin_unlock(&swap_lock);
goto out_dput;
}
+
+ /* Refuse swapoff while the device is pinned for hibernation */
+ if (p->flags & SWP_HIBERNATION) {
+ err = -EBUSY;
+ spin_unlock(&swap_lock);
+ goto out_dput;
+ }
+
if (!security_vm_enough_memory_mm(current->mm, p->pages))
vm_unacct_memory(p->pages);
else {