aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
authorQu Wenruo <wqu@suse.com>2026-04-30 10:37:23 +0930
committerDavid Sterba <dsterba@suse.com>2026-05-24 03:01:04 +0200
commitfef9c3eefdc484c91ef6681241371c22cf450bde (patch)
tree7339427515b1969512cf9d1f81cea27f02ddde2c /fs
parent6c6415890df2122c59ce4116cbfaf967a4678858 (diff)
downloadlinux-next-history-fef9c3eefdc484c91ef6681241371c22cf450bde.tar.gz
btrfs: warn about extent buffer that can not be released
When we unmount the fs or during mount failures, btrfs will call invalidate_inode_pages() to release all btree inode folios. However that function can return -EBUSY if any folios can not be invalidated. This can be caused by: - Some extent buffers are still held by btrfs This is a logic error, as we should release all tree root nodes during unmount and mount failure handling. - Some extent buffers are under readahead and haven't yet finished These are much rarer but valid cases. In that case we should wait for those extent buffers. Introduce a new helper invalidate_and_check_btree_folios() which will: - Call invalidate_inode_pages2() and catch its return value If it returned 0 as expected, that's great and we can call it a day. - Otherwise go through each extent buffer in buffer_tree Increase the ref by one first for the eb we're checking. This is to ensure the eb won't be freed after the readahead is finished. For ebs that still have EXTENT_BUFFER_READING flag, wait for them to finish first. After waiting for the readahead, check the refs of the eb and if it's still dirty. If the eb ref count is greater than 2 (one for the buffer tree, one held by us), it means we are still holding the extent buffer somewhere else, which is a code bug. If the eb is still dirty, it means a bug in transaction handling, e.g. the bug fixed by patch "btrfs: only release the dirty pages io tree after successful writes". For either case, show a warning message about the eb, including its bytenr, owner, refs and flags. And if it's a debug build, also trigger WARN_ON_ONCE() so that fstests can properly catch such situation. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221270 Reported-by: AHN SEOK-YOUNG <iamsyahn@gmail.com> CC: Teng Liu <27rabbitlt@gmail.com> Tested-by: Teng Liu <27rabbitlt@gmail.com> Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/disk-io.c53
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/extent_io.h6
3 files changed, 57 insertions, 8 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f28cef8217dea..ffeb1d7d8ad9d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3272,6 +3272,55 @@ static bool fs_is_full_ro(const struct btrfs_fs_info *fs_info)
return false;
}
+/*
+ * Try to wait for any metadata readahead, and invalidate all btree folios.
+ *
+ * If the invalidation failed, report any dirty/held extent buffers.
+ */
+static void invalidate_and_check_btree_folios(struct btrfs_fs_info *fs_info)
+{
+ unsigned long index = 0;
+ struct extent_buffer *eb;
+ int ret;
+
+ ret = invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
+ if (likely(ret == 0))
+ return;
+
+ /*
+ * Some btree pages can not be invalidated, this happens when some tree
+ * blocks are still held (either by readahead or some task is holding a ref).
+ */
+ rcu_read_lock();
+ xa_for_each(&fs_info->buffer_tree, index, eb) {
+ /* Increase the ref so that the eb won't disappear. */
+ if (!refcount_inc_not_zero(&eb->refs))
+ continue;
+ rcu_read_unlock();
+
+ /* Wait for any readahead first. */
+ if (test_bit(EXTENT_BUFFER_READING, &eb->bflags))
+ wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING,
+ TASK_UNINTERRUPTIBLE);
+ /*
+ * The refs threshold is 2, one held by us at the beginning
+ * of the loop, one for the ownership in the buffer tree.
+ */
+ if (unlikely(refcount_read(&eb->refs) > 2 || extent_buffer_under_io(eb))) {
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ btrfs_warn(fs_info,
+ "unable to release extent buffer %llu owner %llu gen %llu refs %u flags 0x%lx",
+ eb->start, btrfs_header_owner(eb),
+ btrfs_header_generation(eb),
+ refcount_read(&eb->refs), eb->bflags);
+ }
+ free_extent_buffer(eb);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+ invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
+}
+
int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices)
{
u32 sectorsize;
@@ -3709,7 +3758,7 @@ fail_tree_roots:
if (fs_info->data_reloc_root)
btrfs_drop_and_free_fs_root(fs_info, fs_info->data_reloc_root);
free_root_pointers(fs_info, true);
- invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
+ invalidate_and_check_btree_folios(fs_info);
fail_sb_buffer:
btrfs_stop_all_workers(fs_info);
@@ -4438,7 +4487,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
* We must make sure there is not any read request to
* submit after we stop all workers.
*/
- invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
+ invalidate_and_check_btree_folios(fs_info);
btrfs_stop_all_workers(fs_info);
/*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e9ca4f6f47d1e..1b7550b344ca5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2882,12 +2882,6 @@ next:
return try_release_extent_state(io_tree, folio);
}
-static int extent_buffer_under_io(const struct extent_buffer *eb)
-{
- return (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
- test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
-}
-
static bool folio_range_has_eb(struct folio *folio)
{
struct btrfs_folio_state *bfs;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index b310a5145cf69..7b4152387d886 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -327,6 +327,12 @@ static inline bool extent_buffer_uptodate(const struct extent_buffer *eb)
return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
}
+static inline bool extent_buffer_under_io(const struct extent_buffer *eb)
+{
+ return (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
+ test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+}
+
int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
unsigned long start, unsigned long len);
void read_extent_buffer(const struct extent_buffer *eb, void *dst,