diff options
| author | Youngjun Park <youngjun.park@lge.com> | 2026-03-24 01:08:21 +0900 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2026-06-08 18:21:31 -0700 |
| commit | c13a0316aef5f4b73e8b4bf6943737f836d65e1d (patch) | |
| tree | 4ea1353de01527b7da6e23cb0333f0ab19347519 /mm | |
| parent | a51cbdf02aec619f90db7e9f06e295adb8009d4d (diff) | |
| download | ath-c13a0316aef5f4b73e8b4bf6943737f836d65e1d.tar.gz | |
mm/swap, PM: hibernate: fix swapoff race in uswsusp by pinning swap device
Patch series "mm/swap, PM: hibernate: fix swapoff race in uswsusp by
pinning swap device", v8.
Currently, in the uswsusp path, only the swap type value is retrieved at
lookup time without holding a reference. If swapoff races after the type
is acquired, subsequent slot allocations operate on a stale swap device.
Additionally, grabbing and releasing the swap device reference on every
slot allocation is inefficient across the entire hibernation swap path.
This patch series addresses these issues:
- Patch 1: Fixes the swapoff race in uswsusp by pinning the swap device
from the point it is looked up until the session completes.
- Patch 2: Removes the overhead of per-slot reference counting in alloc/free
paths and cleans up the redundant SWP_WRITEOK check.
This patch (of 2):
Hibernation via uswsusp (/dev/snapshot ioctls) has a race window: after
selecting the resume swap area but before user space is frozen, swapoff
may run and invalidate the selected swap device.
Fix this by pinning the swap device with SWP_HIBERNATION while it is in
use. The pin is exclusive, which is sufficient since hibernate_acquire()
already prevents concurrent hibernation sessions.
The kernel swsusp path (sysfs-based hibernate/resume) uses
find_hibernation_swap_type() which is not affected by the pin. It freezes
user space before touching swap, so swapoff cannot race.
Introduce dedicated helpers:
- pin_hibernation_swap_type(): Look up and pin the swap device.
Used by the uswsusp path.
- find_hibernation_swap_type(): Lookup without pinning.
Used by the kernel swsusp path.
- unpin_hibernation_swap_type(): Clear the hibernation pin.
While a swap device is pinned, swapoff is prevented from proceeding.
Link: https://lore.kernel.org/20260323160822.1409904-1-youngjun.park@lge.com
Link: https://lore.kernel.org/20260323160822.1409904-2-youngjun.park@lge.com
Signed-off-by: Youngjun Park <youngjun.park@lge.com>
Reviewed-by: Kairui Song <kasong@tencent.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: "Rafael J . Wysocki" <rafael@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/swapfile.c | 135 |
1 files changed, 119 insertions, 16 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index 615d908671113..5e1e605ad9a1e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -132,7 +132,7 @@ static DEFINE_PER_CPU(struct percpu_swap_cluster, percpu_swap_cluster) = { /* May return NULL on invalid type, caller must check for NULL return */ static struct swap_info_struct *swap_type_to_info(int type) { - if (type >= MAX_SWAPFILES) + if (type < 0 || type >= MAX_SWAPFILES) return NULL; return READ_ONCE(swap_info[type]); /* rcu_dereference() */ } @@ -2199,22 +2199,15 @@ void swap_free_hibernation_slot(swp_entry_t entry) put_swap_device(si); } -/* - * Find the swap type that corresponds to given device (if any). - * - * @offset - number of the PAGE_SIZE-sized block of the device, starting - * from 0, in which the swap header is expected to be located. - * - * This is needed for the suspend to disk (aka swsusp). - */ -int swap_type_of(dev_t device, sector_t offset) +static int __find_hibernation_swap_type(dev_t device, sector_t offset) { int type; + lockdep_assert_held(&swap_lock); + if (!device) - return -1; + return -EINVAL; - spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { struct swap_info_struct *sis = swap_info[type]; @@ -2224,16 +2217,118 @@ int swap_type_of(dev_t device, sector_t offset) if (device == sis->bdev->bd_dev) { struct swap_extent *se = first_se(sis); - if (se->start_block == offset) { - spin_unlock(&swap_lock); + if (se->start_block == offset) return type; - } } } - spin_unlock(&swap_lock); return -ENODEV; } +/** + * pin_hibernation_swap_type - Pin the swap device for hibernation + * @device: Block device containing the resume image + * @offset: Offset identifying the swap area + * + * Locate the swap device for @device/@offset and mark it as pinned + * for hibernation. While pinned, swapoff() is prevented. + * + * Only one uswsusp context may pin a swap device at a time. + * If already pinned, this function returns -EBUSY. + * + * Return: + * >= 0 on success (swap type). + * -EINVAL if @device is invalid. + * -ENODEV if the swap device is not found. + * -EBUSY if the device is already pinned for hibernation. + */ +int pin_hibernation_swap_type(dev_t device, sector_t offset) +{ + int type; + struct swap_info_struct *si; + + spin_lock(&swap_lock); + + type = __find_hibernation_swap_type(device, offset); + if (type < 0) { + spin_unlock(&swap_lock); + return type; + } + + si = swap_type_to_info(type); + if (WARN_ON_ONCE(!si)) { + spin_unlock(&swap_lock); + return -ENODEV; + } + + /* + * hibernate_acquire() prevents concurrent hibernation sessions. + * This check additionally guards against double-pinning within + * the same session. + */ + if (WARN_ON_ONCE(si->flags & SWP_HIBERNATION)) { + spin_unlock(&swap_lock); + return -EBUSY; + } + + si->flags |= SWP_HIBERNATION; + + spin_unlock(&swap_lock); + return type; +} + +/** + * unpin_hibernation_swap_type - Unpin the swap device for hibernation + * @type: Swap type previously returned by pin_hibernation_swap_type() + * + * Clear the hibernation pin on the given swap device, allowing + * swapoff() to proceed normally. + * + * If @type does not refer to a valid swap device, this function + * does nothing. + */ +void unpin_hibernation_swap_type(int type) +{ + struct swap_info_struct *si; + + spin_lock(&swap_lock); + si = swap_type_to_info(type); + if (!si) { + spin_unlock(&swap_lock); + return; + } + si->flags &= ~SWP_HIBERNATION; + spin_unlock(&swap_lock); +} + +/** + * find_hibernation_swap_type - Find swap type for hibernation + * @device: Block device containing the resume image + * @offset: Offset within the device identifying the swap area + * + * Locate the swap device corresponding to @device and @offset. + * + * Unlike pin_hibernation_swap_type(), this function only performs a + * lookup and does not mark the swap device as pinned for hibernation. + * + * This is safe in the sysfs-based hibernation path where user space + * is already frozen and swapoff() cannot run concurrently. + * + * Return: + * A non-negative swap type on success. + * -EINVAL if @device is invalid. + * -ENODEV if no matching swap device is found. + */ +int find_hibernation_swap_type(dev_t device, sector_t offset) +{ + int type; + + spin_lock(&swap_lock); + type = __find_hibernation_swap_type(device, offset); + spin_unlock(&swap_lock); + + return type; +} + int find_first_swap(dev_t *device) { int type; @@ -2996,6 +3091,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) spin_unlock(&swap_lock); goto out_dput; } + + /* Refuse swapoff while the device is pinned for hibernation */ + if (p->flags & SWP_HIBERNATION) { + err = -EBUSY; + spin_unlock(&swap_lock); + goto out_dput; + } + if (!security_vm_enough_memory_mm(current->mm, p->pages)) vm_unacct_memory(p->pages); else { |
