diff options
| author | Mark Brown <broonie@kernel.org> | 2026-05-29 12:59:20 +0100 |
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2026-05-29 12:59:20 +0100 |
| commit | 26da87fd412077a278884c397ef65fd33dabaf58 (patch) | |
| tree | d85478dda73e54670dc0ca34a98e216b3b1d5552 /fs | |
| parent | c2c3ead1e71425082397ef2e887a5ae2d2a47cdf (diff) | |
| parent | c0b65f6129c7fbb526e921dd60261650f1b2bef9 (diff) | |
| download | linux-next-history-26da87fd412077a278884c397ef65fd33dabaf58.tar.gz | |
Merge branch 'dev' of https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/f2fs/compress.c | 2 | ||||
| -rw-r--r-- | fs/f2fs/data.c | 47 | ||||
| -rw-r--r-- | fs/f2fs/dir.c | 2 | ||||
| -rw-r--r-- | fs/f2fs/extent_cache.c | 19 | ||||
| -rw-r--r-- | fs/f2fs/f2fs.h | 32 | ||||
| -rw-r--r-- | fs/f2fs/file.c | 37 | ||||
| -rw-r--r-- | fs/f2fs/gc.c | 61 | ||||
| -rw-r--r-- | fs/f2fs/inline.c | 3 | ||||
| -rw-r--r-- | fs/f2fs/inode.c | 25 | ||||
| -rw-r--r-- | fs/f2fs/node.c | 16 | ||||
| -rw-r--r-- | fs/f2fs/node.h | 8 | ||||
| -rw-r--r-- | fs/f2fs/recovery.c | 2 | ||||
| -rw-r--r-- | fs/f2fs/segment.c | 23 | ||||
| -rw-r--r-- | fs/f2fs/super.c | 70 | ||||
| -rw-r--r-- | fs/f2fs/sysfs.c | 2 | ||||
| -rw-r--r-- | fs/f2fs/verity.c | 2 | ||||
| -rw-r--r-- | fs/f2fs/xattr.c | 6 |
17 files changed, 309 insertions, 48 deletions
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 881e76158b967..caf522d667d61 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -14,6 +14,7 @@ #include <linux/lz4.h> #include <linux/zstd.h> #include <linux/folio_batch.h> +#include <linux/fserror.h> #include "f2fs.h" #include "node.h" @@ -760,6 +761,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) /* Avoid f2fs_commit_super in irq context */ f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION); + fserror_report_file_metadata(dic->inode, ret, GFP_NOFS); goto out_release; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8d4f1e75dee3e..d83a21998ec2d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -20,6 +20,7 @@ #include <linux/sched/signal.h> #include <linux/fiemap.h> #include <linux/iomap.h> +#include <linux/fserror.h> #include "f2fs.h" #include "node.h" @@ -377,9 +378,10 @@ static void f2fs_write_end_io(struct bio *bio) if (unlikely(bio->bi_status != BLK_STS_OK)) { mapping_set_error(folio->mapping, -EIO); - if (type == F2FS_WB_CP_DATA) + if (type == F2FS_WB_CP_DATA) { f2fs_stop_checkpoint(sbi, true, STOP_CP_REASON_WRITE_FAIL); + } } if (is_node_folio(folio)) { @@ -509,6 +511,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) bio->bi_private = sbi; bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, fio->type, fio->temp); + bio->bi_write_stream = f2fs_io_type_to_write_stream(bdev, fio->type, + fio->temp); } iostat_alloc_and_bind_ctx(sbi, bio, NULL); @@ -943,6 +947,35 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, } } +void f2fs_submit_all_merged_ipu_writes(struct f2fs_sb_info *sbi) +{ + struct bio_entry *be, *tmp; + struct f2fs_bio_info *io; + enum temp_type temp; + + for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { + LIST_HEAD(list); + + io = sbi->write_io[DATA] + temp; + + /* A lockless list_empty() check is safe here: any bios from + * other kworkers that we miss will be submitted by those + * kworkers accordingly. + */ + if (list_empty(&io->bio_list)) + continue; + + f2fs_down_write(&io->bio_list_lock); + list_splice_init(&io->bio_list, &list); + f2fs_up_write(&io->bio_list_lock); + + list_for_each_entry_safe(be, tmp, &list, list) { + f2fs_submit_write_bio(sbi, be->bio, DATA); + del_bio_entry(be); + } + } +} + int f2fs_merge_page_bio(struct f2fs_io_info *fio) { struct bio *bio = *fio->bio; @@ -1748,6 +1781,7 @@ next_block: err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_CORRUPTED_CLUSTER); + fserror_report_file_metadata(inode, err, GFP_NOFS); goto sync_out; } @@ -2495,7 +2529,7 @@ static int f2fs_read_data_large_folio(struct inode *inode, unsigned nrpages; struct f2fs_folio_state *ffs; int ret = 0; - bool folio_in_bio; + bool folio_in_bio = false; if (!IS_IMMUTABLE(inode) || f2fs_compressed_file(inode)) { if (folio) @@ -2611,18 +2645,17 @@ submit_and_realloc: } trace_f2fs_read_folio(folio, DATA); err_out: - if (!folio_in_bio) { + if (!folio_in_bio) folio_end_read(folio, !ret); - if (ret) - return ret; - } + if (ret) + goto out; if (rac) { folio = readahead_folio(rac); goto next_folio; } out: f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); - if (ret) { + if (ret && folio_in_bio) { /* Wait bios and clear uptodate. */ folio_lock(folio); folio_clear_uptodate(folio); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 38802ee2e40de..b1697194c3c4d 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -11,6 +11,7 @@ #include <linux/filelock.h> #include <linux/sched/signal.h> #include <linux/unicode.h> +#include <linux/fserror.h> #include "f2fs.h" #include "node.h" #include "acl.h" @@ -1020,6 +1021,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_CORRUPTED_DIRENT); + fserror_report_file_metadata(d->inode, err, GFP_NOFS); goto out; } diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index d2e006420f040..61f6b97143663 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -119,10 +119,9 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type) if (!__init_may_extent_tree(inode, type)) return false; - if (is_inode_flag_set(inode, FI_NO_EXTENT)) - return false; - if (type == EX_READ) { + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return false; if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && !f2fs_sb_has_readonly(F2FS_I_SB(inode))) return false; @@ -645,14 +644,10 @@ static unsigned int __destroy_extent_node(struct inode *inode, while (atomic_read(&et->node_cnt)) { write_lock(&et->lock); - if (!is_inode_flag_set(inode, FI_NO_EXTENT)) - set_inode_flag(inode, FI_NO_EXTENT); node_cnt += __free_extent_tree(sbi, et, nr_shrink); write_unlock(&et->lock); } - f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); - return node_cnt; } @@ -691,12 +686,12 @@ static void __update_extent_tree_range(struct inode *inode, write_lock(&et->lock); - if (is_inode_flag_set(inode, FI_NO_EXTENT)) { - write_unlock(&et->lock); - return; - } - if (type == EX_READ) { + if (is_inode_flag_set(inode, FI_NO_EXTENT)) { + write_unlock(&et->lock); + return; + } + prev = et->largest; dei.len = 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 91f506e7c9cfb..9f24287de4c31 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -96,6 +96,15 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define DEFAULT_FAILURE_RETRY_COUNT 1 #endif +enum { + REPORT_FAULT_NEED_FSCK, + REPORT_FAULT_STOP_CP, + REPORT_FAULT_MAX, +}; + +void f2fs_fault_report(struct super_block *sb, unsigned int err_code, + const char *func, unsigned int data); + /* * For mount options */ @@ -1980,6 +1989,7 @@ struct f2fs_sb_info { unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */ spinlock_t error_lock; /* protect errors/stop_reason array */ bool error_dirty; /* errors of sb is dirty */ + bool stop_reason_dirty; /* stop reason of sb is dirty */ /* For reclaimed segs statistics per each GC mode */ unsigned int gc_segment_mode; /* GC state for reclaimed segments */ @@ -2125,12 +2135,12 @@ static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) { unsigned long now = jiffies; - sbi->last_time[type] = now; + WRITE_ONCE(sbi->last_time[type], now); /* DISCARD_TIME and GC_TIME are based on REQ_TIME */ if (type == REQ_TIME) { - sbi->last_time[DISCARD_TIME] = now; - sbi->last_time[GC_TIME] = now; + WRITE_ONCE(sbi->last_time[DISCARD_TIME], now); + WRITE_ONCE(sbi->last_time[GC_TIME], now); } } @@ -2138,7 +2148,7 @@ static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) { unsigned long interval = sbi->interval_time[type] * HZ; - return time_after(jiffies, sbi->last_time[type] + interval); + return time_after(jiffies, READ_ONCE(sbi->last_time[type]) + interval); } static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, @@ -2148,7 +2158,7 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, unsigned int wait_ms = 0; long delta; - delta = (sbi->last_time[type] + interval) - jiffies; + delta = (READ_ONCE(sbi->last_time[type]) + interval) - jiffies; if (delta > 0) wait_ms = jiffies_to_msecs(delta); @@ -2279,11 +2289,18 @@ static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type) return test_bit(type, &sbi->s_flag); } -static inline void set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) +static inline void __set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) { set_bit(type, &sbi->s_flag); } +#define set_sbi_flag(sbi, type) \ +do { \ + __set_sbi_flag(sbi, type); \ + if ((type) == SBI_NEED_FSCK) \ + f2fs_fault_report(sbi->sb, REPORT_FAULT_NEED_FSCK, __func__, __LINE__); \ +} while (0) + static inline void clear_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) { clear_bit(type, &sbi->s_flag); @@ -4061,6 +4078,8 @@ void f2fs_destroy_segment_manager_caches(void); int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint); enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp); +u8 f2fs_io_type_to_write_stream(struct block_device *bdev, + enum page_type type, enum temp_type temp); unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi); unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno); @@ -4153,6 +4172,7 @@ void f2fs_submit_merged_write_folio(struct f2fs_sb_info *sbi, struct folio *folio, enum page_type type); void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, struct bio **bio, struct folio *folio); +void f2fs_submit_all_merged_ipu_writes(struct f2fs_sb_info *sbi); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); int f2fs_merge_page_bio(struct f2fs_io_info *fio); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index fb12c5c9affda..6edf0105dbc8f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4784,6 +4784,30 @@ static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb, return true; } +#ifdef CONFIG_F2FS_IOSTAT +static void f2fs_dio_end_bio(struct bio *bio) +{ + struct bio_iostat_ctx *iostat_ctx = bio->bi_private; + void *orig_bi_private = iostat_ctx->post_read_ctx; + + iostat_update_and_unbind_ctx(bio); + bio->bi_private = orig_bi_private; + iomap_dio_bio_end_io(bio); +} + +static void f2fs_dio_iostat_start(struct f2fs_sb_info *sbi, struct bio *bio) +{ + void *bi_private = bio->bi_private; + + iostat_alloc_and_bind_ctx(sbi, bio, bi_private); + iostat_update_submit_ctx(bio, DATA); + bio->bi_end_io = f2fs_dio_end_bio; +} +#else +static inline void f2fs_dio_iostat_start(struct f2fs_sb_info *sbi, + struct bio *bio) {} +#endif + static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error, unsigned int flags) { @@ -4796,8 +4820,18 @@ static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error, return 0; } +static void f2fs_dio_read_submit_io(const struct iomap_iter *iter, + struct bio *bio, loff_t file_offset) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(iter->inode); + + f2fs_dio_iostat_start(sbi, bio); + blk_crypto_submit_bio(bio); +} + static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = { .end_io = f2fs_dio_read_end_io, + .submit_io = f2fs_dio_read_submit_io, }; static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -5076,6 +5110,9 @@ static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, enum temp_type temp = f2fs_get_segment_temp(sbi, type); bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); + bio->bi_write_stream = + f2fs_io_type_to_write_stream(bio->bi_bdev, DATA, temp); + f2fs_dio_iostat_start(sbi, bio); blk_crypto_submit_bio(bio); } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ba93010924c06..69e0a867219d1 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1220,8 +1220,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static int ra_data_block(struct inode *inode, pgoff_t index) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct address_space *mapping = f2fs_is_cow_file(inode) ? - F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; + struct address_space *mapping = inode->i_mapping; + struct inode *atomic_inode = NULL; struct dnode_of_data dn; struct folio *folio, *efolio; struct f2fs_io_info fio = { @@ -1236,9 +1236,22 @@ static int ra_data_block(struct inode *inode, pgoff_t index) }; int err = 0; + f2fs_down_read(&F2FS_I(inode)->i_sem); + if (f2fs_is_cow_file(inode)) { + atomic_inode = igrab(F2FS_I(inode)->atomic_inode); + if (!atomic_inode) { + f2fs_up_read(&F2FS_I(inode)->i_sem); + return -EBUSY; + } + mapping = atomic_inode->i_mapping; + } + f2fs_up_read(&F2FS_I(inode)->i_sem); + folio = f2fs_grab_cache_folio(mapping, index, true); - if (IS_ERR(folio)) - return PTR_ERR(folio); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); + goto out_iput; + } if (f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { @@ -1299,11 +1312,16 @@ got_it: f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE); f2fs_update_iostat(sbi, NULL, FS_GDATA_READ_IO, F2FS_BLKSIZE); + if (atomic_inode) + iput(atomic_inode); return 0; put_encrypted_page: f2fs_put_page(fio.encrypted_page, true); put_folio: f2fs_folio_put(folio, true); +out_iput: + if (atomic_inode) + iput(atomic_inode); return err; } @@ -1314,8 +1332,8 @@ put_folio: static int move_data_block(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { - struct address_space *mapping = f2fs_is_cow_file(inode) ? - F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; + struct address_space *mapping = inode->i_mapping; + struct inode *atomic_inode = NULL; struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .ino = inode->i_ino, @@ -1337,10 +1355,23 @@ static int move_data_block(struct inode *inode, block_t bidx, (fio.sbi->gc_mode != GC_URGENT_HIGH) ? CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA; + f2fs_down_read(&F2FS_I(inode)->i_sem); + if (f2fs_is_cow_file(inode)) { + atomic_inode = igrab(F2FS_I(inode)->atomic_inode); + if (!atomic_inode) { + f2fs_up_read(&F2FS_I(inode)->i_sem); + return -EBUSY; + } + mapping = atomic_inode->i_mapping; + } + f2fs_up_read(&F2FS_I(inode)->i_sem); + /* do not read out */ folio = f2fs_grab_cache_folio(mapping, bidx, false); - if (IS_ERR(folio)) - return PTR_ERR(folio); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); + goto out_iput; + } if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { err = -ENOENT; @@ -1473,6 +1504,9 @@ out: folio_unlock(folio); folio_end_dropbehind(folio); folio_put(folio); +out_iput: + if (atomic_inode) + iput(atomic_inode); return err; } @@ -1754,9 +1788,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi); unsigned int sec_end_segno; int seg_freed = 0, migrated = 0; - unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? - SUM_TYPE_DATA : SUM_TYPE_NODE; - unsigned char data_type = (type == SUM_TYPE_DATA) ? DATA : NODE; + unsigned char type; + unsigned char data_type; int submitted = 0, sum_blk_cnt; if (__is_large_section(sbi)) { @@ -1855,6 +1888,12 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, migrated >= sbi->migration_granularity) continue; + if (migrated == 0) { + type = IS_DATASEG(get_seg_entry(sbi, cur_segno)->type) ? + SUM_TYPE_DATA : SUM_TYPE_NODE; + data_type = (type == SUM_TYPE_DATA) ? DATA : NODE; + } + sum = SUM_BLK_PAGE_ADDR(sbi, sum_folio, cur_segno); if (type != GET_SUM_TYPE(sum_footer(sbi, sum))) { f2fs_err(sbi, "Inconsistent segment (%u) type " diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 7aabfc9b43cb8..099f720897016 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -9,6 +9,7 @@ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/fiemap.h> +#include <linux/fserror.h> #include "f2fs.h" #include "node.h" @@ -179,6 +180,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio) f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%llu, i_addr[0]:0x%x, run fsck to fix.", __func__, dn->inode->i_ino, dn->data_blkaddr); f2fs_handle_error(fio.sbi, ERROR_INVALID_BLKADDR); + fserror_report_file_metadata(dn->inode, -EFSCORRUPTED, GFP_NOFS); return -EFSCORRUPTED; } @@ -435,6 +437,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct folio *ifolio, __func__, dir->i_ino, dn.data_blkaddr); f2fs_handle_error(F2FS_F_SB(folio), ERROR_INVALID_BLKADDR); err = -EFSCORRUPTED; + fserror_report_file_metadata(dn.inode, err, GFP_NOFS); goto out; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index c6dcda447882f..25f30b8eadc53 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -11,6 +11,7 @@ #include <linux/sched/mm.h> #include <linux/lz4.h> #include <linux/zstd.h> +#include <linux/fserror.h> #include "f2fs.h" #include "node.h" @@ -480,6 +481,7 @@ static int do_read_inode(struct inode *inode) f2fs_folio_put(node_folio, true); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); + fserror_report_file_metadata(inode, -EFSCORRUPTED, GFP_NOFS); return -EFSCORRUPTED; } @@ -541,6 +543,7 @@ static int do_read_inode(struct inode *inode) if (!sanity_check_extent_cache(inode, node_folio)) { f2fs_folio_put(node_folio, true); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); + fserror_report_file_metadata(inode, -EFSCORRUPTED, GFP_NOFS); return -EFSCORRUPTED; } @@ -561,8 +564,13 @@ static int do_read_inode(struct inode *inode) static bool is_meta_ino(struct f2fs_sb_info *sbi, unsigned int ino) { - return ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi) || - ino == F2FS_COMPRESS_INO(sbi); + if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi)) + return true; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (test_opt(sbi, COMPRESS_CACHE) && ino == F2FS_COMPRESS_INO(sbi)) + return true; +#endif + return false; } struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) @@ -583,6 +591,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) trace_f2fs_iget_exit(inode, ret); iput(inode); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); + fserror_report_file_metadata(inode, ret, GFP_NOFS); return ERR_PTR(ret); } @@ -787,6 +796,7 @@ retry: if (err == -ENOMEM || ++count <= DEFAULT_RETRY_IO_COUNT) goto retry; stop_checkpoint: + fserror_report_file_metadata(inode, -EFSCORRUPTED, GFP_NOFS); f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_UPDATE_INODE); return; } @@ -858,10 +868,15 @@ void f2fs_evict_inode(struct inode *inode) f2fs_abort_atomic_write(inode, true); if (fi->cow_inode && f2fs_is_cow_file(fi->cow_inode)) { - clear_inode_flag(fi->cow_inode, FI_COW_FILE); - F2FS_I(fi->cow_inode)->atomic_inode = NULL; - iput(fi->cow_inode); + struct inode *cow_inode = fi->cow_inode; + + f2fs_down_write(&F2FS_I(cow_inode)->i_sem); + clear_inode_flag(cow_inode, FI_COW_FILE); + F2FS_I(cow_inode)->atomic_inode = NULL; fi->cow_inode = NULL; + f2fs_up_write(&F2FS_I(cow_inode)->i_sem); + + iput(cow_inode); } trace_f2fs_evict_inode(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4e5bd9e4cfc32..cd5a394f61114 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -12,6 +12,7 @@ #include <linux/blkdev.h> #include <linux/folio_batch.h> #include <linux/swap.h> +#include <linux/fserror.h> #include "f2fs.h" #include "node.h" @@ -72,7 +73,11 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct free_nid)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { - mem_size = (nm_i->nat_cnt[TOTAL_NAT] * + /* + * nat_cnt[] is heuristic accounting. Sample it locklessly here + * to avoid taking nat_tree_lock in the balance path. + */ + mem_size = (data_race(READ_ONCE(nm_i->nat_cnt[TOTAL_NAT])) * sizeof(struct nat_entry)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); if (excess_cached_nats(sbi)) @@ -1265,6 +1270,8 @@ skip_partial: if (err == -ENOENT) { set_sbi_flag(F2FS_F_SB(folio), SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + fserror_report_file_metadata(dn.inode, -EFSCORRUPTED, + GFP_NOFS); f2fs_err_ratelimited(sbi, "truncate node fail, ino:%llu, nid:%u, " "offset[0]:%d, offset[1]:%d, nofs:%d", @@ -1556,6 +1563,8 @@ out_err: next_blkaddr_of_node(folio)); f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); + fserror_report_file_metadata(folio->mapping->host, + -EFSCORRUPTED, in_irq ? GFP_NOWAIT : GFP_NOFS); return -EFSCORRUPTED; } @@ -1778,6 +1787,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool do_fsync, if (f2fs_sanity_check_node_footer(sbi, folio, nid, NODE_TYPE_REGULAR, false)) { + fserror_report_metadata(sbi->sb, -EFSCORRUPTED, GFP_NOFS); f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_NID); goto redirty_out; } @@ -1875,7 +1885,7 @@ int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, } if (!__write_node_folio(node_folio, false, false, NULL, - &wbc, false, FS_GC_NODE_IO, NULL)) + &wbc, false, io_type, NULL)) err = -EAGAIN; goto release_folio; out_folio: @@ -2703,6 +2713,8 @@ retry: spin_unlock(&nm_i->nid_list_lock); f2fs_err(sbi, "Corrupted nid %u in free_nid_list", i->nid); + fserror_report_metadata(sbi->sb, -EFSCORRUPTED, + GFP_NOFS); f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_NID); return false; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index bcf2034e42639..5e114f3520998 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -129,13 +129,17 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi) { - return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid * + /* nat_cnt[] is heuristic accounting sampled locklessly here. */ + return data_race(READ_ONCE(NM_I(sbi)->nat_cnt[DIRTY_NAT])) >= + NM_I(sbi)->max_nid * NM_I(sbi)->dirty_nats_ratio / 100; } static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) { - return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD; + /* nat_cnt[] is heuristic accounting sampled locklessly here. */ + return data_race(READ_ONCE(NM_I(sbi)->nat_cnt[TOTAL_NAT])) >= + DEF_NAT_CACHE_THRESHOLD; } enum mem_type { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 3d3dacec94825..89af8407b6673 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -9,6 +9,7 @@ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/sched/mm.h> +#include <linux/fserror.h> #include "f2fs.h" #include "node.h" #include "segment.h" @@ -679,6 +680,7 @@ retry_dn: ofs_of_node(folio)); err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); + fserror_report_file_metadata(dn.inode, err, GFP_NOFS); goto err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 788f8b0502492..1ef4edb770784 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -17,6 +17,7 @@ #include <linux/freezer.h> #include <linux/sched/signal.h> #include <linux/random.h> +#include <linux/fserror.h> #include "f2fs.h" #include "segment.h" @@ -444,6 +445,13 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) if (has_enough_free_secs(sbi, 0, 0)) return; + /* + * Submit all cached OPU/IPU DATA bios before triggering + * foreground GC to avoid potential deadlocks. + */ + f2fs_submit_merged_write(sbi, DATA); + f2fs_submit_all_merged_ipu_writes(sbi); + if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && sbi->gc_thread->f2fs_gc_task) { DEFINE_WAIT(wait); @@ -462,6 +470,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) .should_migrate_blocks = false, .err_gc_skipped = false, .nr_free_secs = 1 }; + f2fs_down_write_trace(&sbi->gc_lock, &gc_control.lc); stat_inc_gc_call_count(sbi, FOREGROUND); f2fs_gc(sbi, &gc_control); @@ -2896,6 +2905,7 @@ got_it: /* set it as dirty segment in free segmap */ if (test_bit(segno, free_i->free_segmap)) { ret = -EFSCORRUPTED; + fserror_report_metadata(sbi->sb, -EFSCORRUPTED, GFP_NOFS); f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_FREE_BITMAP); goto out_unlock; } @@ -3636,6 +3646,19 @@ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, } } +u8 f2fs_io_type_to_write_stream(struct block_device *bdev, + enum page_type type, enum temp_type temp) +{ + unsigned short nr = bdev_max_write_streams(bdev); + + if (type != DATA || !nr) + return 0; + if (nr < NR_TEMP_TYPE) + return temp == COLD ? nr : HOT + 1; + + return temp + 1; +} + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ccf806b676f53..87f816f010679 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -29,6 +29,7 @@ #include <linux/lz4.h> #include <linux/ctype.h> #include <linux/fs_parser.h> +#include <linux/fserror.h> #include "f2fs.h" #include "node.h" @@ -2632,6 +2633,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) /* check if we need more GC first */ unusable = f2fs_get_unusable_blocks(sbi); + + f2fs_info(sbi, "%s starts, unusable: %u", __func__, unusable); + if (!f2fs_disable_cp_again(sbi, unusable)) goto skip_gc; @@ -2639,6 +2643,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) sbi->gc_mode = GC_URGENT_HIGH; + f2fs_info(sbi, "%s: run f2fs_gc() to migrate blocks", __func__); + while (!f2fs_time_over(sbi, DISABLE_TIME)) { struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, @@ -2659,6 +2665,12 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) break; } + f2fs_info(sbi, "%s: call sync_filesystem() to persist meta: %lld, node: %lld, data: %lld", + __func__, + get_pages(sbi, F2FS_DIRTY_META), + get_pages(sbi, F2FS_DIRTY_NODES), + get_pages(sbi, F2FS_DIRTY_DATA)); + ret = sync_filesystem(sbi->sb); if (ret || err) { err = ret ? ret : err; @@ -2672,6 +2684,12 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) } skip_gc: + f2fs_info(sbi, "%s: call f2fs_write_checkpoint(), meta: %lld, node: %lld, data: %lld", + __func__, + get_pages(sbi, F2FS_DIRTY_META), + get_pages(sbi, F2FS_DIRTY_NODES), + get_pages(sbi, F2FS_DIRTY_DATA)); + f2fs_down_write_trace(&sbi->gc_lock, &lc); cpc.reason = CP_PAUSE; set_sbi_flag(sbi, SBI_CP_DISABLED); @@ -2689,7 +2707,7 @@ out_unlock: restore_flag: sbi->gc_mode = gc_mode; sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ - f2fs_info(sbi, "f2fs_disable_checkpoint() finish, err:%d", err); + f2fs_info(sbi, "%s finishes, err:%d", __func__, err); return err; } @@ -4608,6 +4626,7 @@ static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason) spin_lock_irqsave(&sbi->error_lock, flags); if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0)) sbi->stop_reason[reason]++; + sbi->stop_reason_dirty = true; spin_unlock_irqrestore(&sbi->error_lock, flags); } @@ -4615,6 +4634,7 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); unsigned long flags; + bool report_shutdown = false; int err; f2fs_down_write(&sbi->sb_lock); @@ -4626,6 +4646,10 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi) sbi->error_dirty = false; } memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON); + if (sbi->stop_reason_dirty) { + report_shutdown = true; + sbi->stop_reason_dirty = false; + } spin_unlock_irqrestore(&sbi->error_lock, flags); err = f2fs_commit_super(sbi, false); @@ -4635,6 +4659,9 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi) f2fs_err_ratelimited(sbi, "f2fs_commit_super fails to record stop_reason, err:%d", err); + + if (report_shutdown) + fserror_report_shutdown(sbi->sb, GFP_NOFS); } void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag) @@ -4649,6 +4676,27 @@ void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag) spin_unlock_irqrestore(&sbi->error_lock, flags); } +static void f2fs_report_fserror(struct f2fs_sb_info *sbi, unsigned char error) +{ + switch (error) { + case ERROR_INVALID_BLKADDR: + case ERROR_CORRUPTED_INODE: + case ERROR_INCONSISTENT_SUMMARY: + case ERROR_INCONSISTENT_SUM_TYPE: + case ERROR_CORRUPTED_JOURNAL: + case ERROR_INCONSISTENT_NODE_COUNT: + case ERROR_INCONSISTENT_BLOCK_COUNT: + case ERROR_INVALID_CURSEG: + case ERROR_INCONSISTENT_SIT: + case ERROR_INVALID_NODE_REFERENCE: + case ERROR_INCONSISTENT_NAT: + fserror_report_metadata(sbi->sb, -EFSCORRUPTED, GFP_NOFS); + break; + default: + return; + } +} + void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error) { f2fs_save_errors(sbi, error); @@ -4658,6 +4706,8 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error) if (!test_bit(error, (unsigned long *)sbi->errors)) return; schedule_work(&sbi->s_error_work); + + f2fs_report_fserror(sbi, error); } static bool system_going_down(void) @@ -4724,9 +4774,18 @@ static void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, */ } +void f2fs_fault_report(struct super_block *sb, unsigned int err_code, + const char *func, unsigned int data) +{ + trace_f2fs_fault_report(sb, err_code, func, data); +} + void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, unsigned char reason) { + if (reason != STOP_CP_REASON_SHUTDOWN) + f2fs_fault_report(sbi->sb, REPORT_FAULT_STOP_CP, __func__, reason); + f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL); if (!end_io) f2fs_flush_merged_writes(sbi); @@ -5107,6 +5166,13 @@ try_onemore: goto free_page_array_cache; } + /* + * Initialize ino entry info early so f2fs_drop_inode -> + * f2fs_exist_written_data can safely take im->ino_lock if mount + * fails after this point and triggers iput on cleanup. + */ + f2fs_init_ino_entry_info(sbi); + err = f2fs_get_valid_checkpoint(sbi); if (err) { f2fs_err(sbi, "Failed to get valid F2FS checkpoint"); @@ -5151,8 +5217,6 @@ try_onemore: f2fs_init_extent_cache_info(sbi); - f2fs_init_ino_entry_info(sbi); - f2fs_init_fsync_node_info(sbi); /* setup checkpoint request control and start checkpoint issue thread */ diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 352e96ad5c3a5..665687244c939 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1399,6 +1399,7 @@ F2FS_FEATURE_RO_ATTR(pin_file); F2FS_FEATURE_RO_ATTR(linear_lookup); #endif F2FS_FEATURE_RO_ATTR(packed_ssa); +F2FS_FEATURE_RO_ATTR(fserror); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -1566,6 +1567,7 @@ static struct attribute *f2fs_feat_attrs[] = { BASE_ATTR_LIST(linear_lookup), #endif BASE_ATTR_LIST(packed_ssa), + BASE_ATTR_LIST(fserror), NULL, }; ATTRIBUTE_GROUPS(f2fs_feat); diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 92ebcc19cab09..39f4825154452 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -25,6 +25,7 @@ */ #include <linux/f2fs_fs.h> +#include <linux/fserror.h> #include "f2fs.h" #include "xattr.h" @@ -243,6 +244,7 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr"); f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_VERITY_XATTR); + fserror_report_file_metadata(inode, -EFSCORRUPTED, GFP_NOFS); return -EFSCORRUPTED; } if (buf_size) { diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 610d5810074dc..24cef7e1f56a5 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -19,6 +19,7 @@ #include <linux/f2fs_fs.h> #include <linux/security.h> #include <linux/posix_acl_xattr.h> +#include <linux/fserror.h> #include "f2fs.h" #include "xattr.h" #include "segment.h" @@ -371,6 +372,7 @@ static int lookup_all_xattrs(struct inode *inode, struct folio *ifolio, err = -ENODATA; f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_XATTR); + fserror_report_file_metadata(inode, err, GFP_NOFS); goto out; } check: @@ -590,6 +592,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_XATTR); + fserror_report_file_metadata(inode, + -EFSCORRUPTED, GFP_NOFS); break; } @@ -677,6 +681,7 @@ retry: error = -EFSCORRUPTED; f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_XATTR); + fserror_report_file_metadata(inode, error, GFP_NOFS); goto exit; } @@ -705,6 +710,7 @@ retry: error = -EFSCORRUPTED; f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_XATTR); + fserror_report_file_metadata(inode, error, GFP_NOFS); goto exit; } last = XATTR_NEXT_ENTRY(last); |
