diff options
| author | Jan Kara <jack@suse.cz> | 2026-04-23 11:30:53 +0200 |
|---|---|---|
| committer | David Sterba <dsterba@suse.com> | 2026-05-24 03:05:03 +0200 |
| commit | 964f569c14d7778c8f29ce81ec35d7a8fca31adf (patch) | |
| tree | 55b77f1a5b79e9fd585fd3460e16b0d7ca4c9714 /fs | |
| parent | f13342e15deafb7538a7a8577ed5f4c33c56f64e (diff) | |
| download | linux-next-history-964f569c14d7778c8f29ce81ec35d7a8fca31adf.tar.gz | |
btrfs: limit size of bios submitted from writeback
Currently btrfs_writepages() just accumulates as large bio as possible
(within writeback_control constraints) and then submits it. This can
however lead to significant latency in writeback IO submission (I have
observed tens of milliseconds) because the submitted bio easily has over
hundred of megabytes. Consequently this leads to IO pipeline stalls and
reduced throughput.
At the same time beyond certain size submitting so large bio provides
diminishing returns because the bio is split by the block layer
immediately anyway. So compute (estimate of) bio size beyond which we
are unlikely to improve performance and just submit the bio for
writeback once we accumulate that much to keep the IO pipeline busy.
This improves writeback throughput for sequential writes by about 15% on
the test machine I was using.
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
[ Fix the handling of missing device to avoid NULL pointer dereference. ]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/btrfs/disk-io.c | 7 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 10 | ||||
| -rw-r--r-- | fs/btrfs/fs.h | 1 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 31 | ||||
| -rw-r--r-- | fs/btrfs/volumes.h | 1 |
5 files changed, 50 insertions, 0 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c0a30bb213d7a..27d7a24ff97ae 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3591,6 +3591,13 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device } } + ret = btrfs_init_writeback_bio_size(fs_info); + if (ret) { + btrfs_err(fs_info, "failed to get optimum writeback size: %d", + ret); + goto fail_sysfs; + } + btrfs_free_zone_cache(fs_info); btrfs_check_active_zone_reservation(fs_info); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 45d56421ac508..56435d472dbbb 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -857,6 +857,16 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl, /* Ordered extent boundary: move on to a new bio. */ if (bio_ctrl->len_to_oe_boundary == 0) submit_one_bio(bio_ctrl); + /* + * If we have accumulated decent amount of IO, send it to the + * block layer so that IO can run while we are accumulating + * more folios to write. + */ + else if (bio_ctrl->wbc && + bio_ctrl->bbio->bio.bi_iter.bi_size >= + inode->root->fs_info->writeback_bio_size) + submit_one_bio(bio_ctrl); + } while (size); } diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index a8aa086a4df86..1782f228c45c0 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -881,6 +881,7 @@ struct btrfs_fs_info { u32 block_min_order; u32 block_max_order; u32 stripesize; + u32 writeback_bio_size; u32 csum_size; u32 csums_per_leaf; u32 csum_type; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 16e9239879580..9fd828846fcd0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -8179,6 +8179,37 @@ out: return ret; } +int btrfs_init_writeback_bio_size(struct btrfs_fs_info *fs_info) +{ + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + struct btrfs_device *device; + u32 writeback_bio_size = fs_info->sectorsize; + + mutex_lock(&fs_devices->device_list_mutex); + /* + * Let's take maximum over optimal request sizes for all devices. For + * RAID profiles writeback will submit stripe (64k) sized bios anyway + * so our value doesn't matter and for simple profiles this is a good + * approximation of sensible IO chunking. + */ + list_for_each_entry(device, &fs_devices->devices, dev_list) { + struct request_queue *queue; + unsigned int io_opt; + + if (!device->bdev || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) + continue; + queue = bdev_get_queue(device->bdev); + io_opt = queue_io_opt(queue) ? : + queue_max_sectors(queue) << SECTOR_SHIFT; + writeback_bio_size = max(writeback_bio_size, io_opt); + } + mutex_unlock(&fs_devices->device_list_mutex); + + fs_info->writeback_bio_size = writeback_bio_size; + + return 0; +} + static int update_dev_stat_item(struct btrfs_trans_handle *trans, struct btrfs_device *device) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 0082c166af91f..96904d18f686b 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -784,6 +784,7 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_get_dev_stats *stats); int btrfs_init_devices_late(struct btrfs_fs_info *fs_info); int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); +int btrfs_init_writeback_bio_size(struct btrfs_fs_info *fs_info); int btrfs_run_dev_stats(struct btrfs_trans_handle *trans); void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev); |
