aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
authorMark Brown <broonie@kernel.org>2026-05-29 12:59:20 +0100
committerMark Brown <broonie@kernel.org>2026-05-29 12:59:20 +0100
commit26da87fd412077a278884c397ef65fd33dabaf58 (patch)
treed85478dda73e54670dc0ca34a98e216b3b1d5552 /fs
parentc2c3ead1e71425082397ef2e887a5ae2d2a47cdf (diff)
parentc0b65f6129c7fbb526e921dd60261650f1b2bef9 (diff)
downloadlinux-next-history-26da87fd412077a278884c397ef65fd33dabaf58.tar.gz
Merge branch 'dev' of https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
Diffstat (limited to 'fs')
-rw-r--r--fs/f2fs/compress.c2
-rw-r--r--fs/f2fs/data.c47
-rw-r--r--fs/f2fs/dir.c2
-rw-r--r--fs/f2fs/extent_cache.c19
-rw-r--r--fs/f2fs/f2fs.h32
-rw-r--r--fs/f2fs/file.c37
-rw-r--r--fs/f2fs/gc.c61
-rw-r--r--fs/f2fs/inline.c3
-rw-r--r--fs/f2fs/inode.c25
-rw-r--r--fs/f2fs/node.c16
-rw-r--r--fs/f2fs/node.h8
-rw-r--r--fs/f2fs/recovery.c2
-rw-r--r--fs/f2fs/segment.c23
-rw-r--r--fs/f2fs/super.c70
-rw-r--r--fs/f2fs/sysfs.c2
-rw-r--r--fs/f2fs/verity.c2
-rw-r--r--fs/f2fs/xattr.c6
17 files changed, 309 insertions, 48 deletions
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 881e76158b967..caf522d667d61 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -14,6 +14,7 @@
#include <linux/lz4.h>
#include <linux/zstd.h>
#include <linux/folio_batch.h>
+#include <linux/fserror.h>
#include "f2fs.h"
#include "node.h"
@@ -760,6 +761,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
/* Avoid f2fs_commit_super in irq context */
f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION);
+ fserror_report_file_metadata(dic->inode, ret, GFP_NOFS);
goto out_release;
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8d4f1e75dee3e..d83a21998ec2d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -20,6 +20,7 @@
#include <linux/sched/signal.h>
#include <linux/fiemap.h>
#include <linux/iomap.h>
+#include <linux/fserror.h>
#include "f2fs.h"
#include "node.h"
@@ -377,9 +378,10 @@ static void f2fs_write_end_io(struct bio *bio)
if (unlikely(bio->bi_status != BLK_STS_OK)) {
mapping_set_error(folio->mapping, -EIO);
- if (type == F2FS_WB_CP_DATA)
+ if (type == F2FS_WB_CP_DATA) {
f2fs_stop_checkpoint(sbi, true,
STOP_CP_REASON_WRITE_FAIL);
+ }
}
if (is_node_folio(folio)) {
@@ -509,6 +511,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
bio->bi_private = sbi;
bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
fio->type, fio->temp);
+ bio->bi_write_stream = f2fs_io_type_to_write_stream(bdev, fio->type,
+ fio->temp);
}
iostat_alloc_and_bind_ctx(sbi, bio, NULL);
@@ -943,6 +947,35 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
}
}
+void f2fs_submit_all_merged_ipu_writes(struct f2fs_sb_info *sbi)
+{
+ struct bio_entry *be, *tmp;
+ struct f2fs_bio_info *io;
+ enum temp_type temp;
+
+ for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+ LIST_HEAD(list);
+
+ io = sbi->write_io[DATA] + temp;
+
+ /* A lockless list_empty() check is safe here: any bios from
+ * other kworkers that we miss will be submitted by those
+ * kworkers accordingly.
+ */
+ if (list_empty(&io->bio_list))
+ continue;
+
+ f2fs_down_write(&io->bio_list_lock);
+ list_splice_init(&io->bio_list, &list);
+ f2fs_up_write(&io->bio_list_lock);
+
+ list_for_each_entry_safe(be, tmp, &list, list) {
+ f2fs_submit_write_bio(sbi, be->bio, DATA);
+ del_bio_entry(be);
+ }
+ }
+}
+
int f2fs_merge_page_bio(struct f2fs_io_info *fio)
{
struct bio *bio = *fio->bio;
@@ -1748,6 +1781,7 @@ next_block:
err = -EFSCORRUPTED;
f2fs_handle_error(sbi,
ERROR_CORRUPTED_CLUSTER);
+ fserror_report_file_metadata(inode, err, GFP_NOFS);
goto sync_out;
}
@@ -2495,7 +2529,7 @@ static int f2fs_read_data_large_folio(struct inode *inode,
unsigned nrpages;
struct f2fs_folio_state *ffs;
int ret = 0;
- bool folio_in_bio;
+ bool folio_in_bio = false;
if (!IS_IMMUTABLE(inode) || f2fs_compressed_file(inode)) {
if (folio)
@@ -2611,18 +2645,17 @@ submit_and_realloc:
}
trace_f2fs_read_folio(folio, DATA);
err_out:
- if (!folio_in_bio) {
+ if (!folio_in_bio)
folio_end_read(folio, !ret);
- if (ret)
- return ret;
- }
+ if (ret)
+ goto out;
if (rac) {
folio = readahead_folio(rac);
goto next_folio;
}
out:
f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
- if (ret) {
+ if (ret && folio_in_bio) {
/* Wait bios and clear uptodate. */
folio_lock(folio);
folio_clear_uptodate(folio);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 38802ee2e40de..b1697194c3c4d 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -11,6 +11,7 @@
#include <linux/filelock.h>
#include <linux/sched/signal.h>
#include <linux/unicode.h>
+#include <linux/fserror.h>
#include "f2fs.h"
#include "node.h"
#include "acl.h"
@@ -1020,6 +1021,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
set_sbi_flag(sbi, SBI_NEED_FSCK);
err = -EFSCORRUPTED;
f2fs_handle_error(sbi, ERROR_CORRUPTED_DIRENT);
+ fserror_report_file_metadata(d->inode, err, GFP_NOFS);
goto out;
}
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index d2e006420f040..61f6b97143663 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -119,10 +119,9 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type)
if (!__init_may_extent_tree(inode, type))
return false;
- if (is_inode_flag_set(inode, FI_NO_EXTENT))
- return false;
-
if (type == EX_READ) {
+ if (is_inode_flag_set(inode, FI_NO_EXTENT))
+ return false;
if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
!f2fs_sb_has_readonly(F2FS_I_SB(inode)))
return false;
@@ -645,14 +644,10 @@ static unsigned int __destroy_extent_node(struct inode *inode,
while (atomic_read(&et->node_cnt)) {
write_lock(&et->lock);
- if (!is_inode_flag_set(inode, FI_NO_EXTENT))
- set_inode_flag(inode, FI_NO_EXTENT);
node_cnt += __free_extent_tree(sbi, et, nr_shrink);
write_unlock(&et->lock);
}
- f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
-
return node_cnt;
}
@@ -691,12 +686,12 @@ static void __update_extent_tree_range(struct inode *inode,
write_lock(&et->lock);
- if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
- write_unlock(&et->lock);
- return;
- }
-
if (type == EX_READ) {
+ if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
+ write_unlock(&et->lock);
+ return;
+ }
+
prev = et->largest;
dei.len = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 91f506e7c9cfb..9f24287de4c31 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -96,6 +96,15 @@ extern const char *f2fs_fault_name[FAULT_MAX];
#define DEFAULT_FAILURE_RETRY_COUNT 1
#endif
+enum {
+ REPORT_FAULT_NEED_FSCK,
+ REPORT_FAULT_STOP_CP,
+ REPORT_FAULT_MAX,
+};
+
+void f2fs_fault_report(struct super_block *sb, unsigned int err_code,
+ const char *func, unsigned int data);
+
/*
* For mount options
*/
@@ -1980,6 +1989,7 @@ struct f2fs_sb_info {
unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */
spinlock_t error_lock; /* protect errors/stop_reason array */
bool error_dirty; /* errors of sb is dirty */
+ bool stop_reason_dirty; /* stop reason of sb is dirty */
/* For reclaimed segs statistics per each GC mode */
unsigned int gc_segment_mode; /* GC state for reclaimed segments */
@@ -2125,12 +2135,12 @@ static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
{
unsigned long now = jiffies;
- sbi->last_time[type] = now;
+ WRITE_ONCE(sbi->last_time[type], now);
/* DISCARD_TIME and GC_TIME are based on REQ_TIME */
if (type == REQ_TIME) {
- sbi->last_time[DISCARD_TIME] = now;
- sbi->last_time[GC_TIME] = now;
+ WRITE_ONCE(sbi->last_time[DISCARD_TIME], now);
+ WRITE_ONCE(sbi->last_time[GC_TIME], now);
}
}
@@ -2138,7 +2148,7 @@ static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
{
unsigned long interval = sbi->interval_time[type] * HZ;
- return time_after(jiffies, sbi->last_time[type] + interval);
+ return time_after(jiffies, READ_ONCE(sbi->last_time[type]) + interval);
}
static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi,
@@ -2148,7 +2158,7 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi,
unsigned int wait_ms = 0;
long delta;
- delta = (sbi->last_time[type] + interval) - jiffies;
+ delta = (READ_ONCE(sbi->last_time[type]) + interval) - jiffies;
if (delta > 0)
wait_ms = jiffies_to_msecs(delta);
@@ -2279,11 +2289,18 @@ static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type)
return test_bit(type, &sbi->s_flag);
}
-static inline void set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type)
+static inline void __set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type)
{
set_bit(type, &sbi->s_flag);
}
+#define set_sbi_flag(sbi, type) \
+do { \
+ __set_sbi_flag(sbi, type); \
+ if ((type) == SBI_NEED_FSCK) \
+ f2fs_fault_report(sbi->sb, REPORT_FAULT_NEED_FSCK, __func__, __LINE__); \
+} while (0)
+
static inline void clear_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type)
{
clear_bit(type, &sbi->s_flag);
@@ -4061,6 +4078,8 @@ void f2fs_destroy_segment_manager_caches(void);
int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint);
enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
enum page_type type, enum temp_type temp);
+u8 f2fs_io_type_to_write_stream(struct block_device *bdev,
+ enum page_type type, enum temp_type temp);
unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi);
unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
unsigned int segno);
@@ -4153,6 +4172,7 @@ void f2fs_submit_merged_write_folio(struct f2fs_sb_info *sbi,
struct folio *folio, enum page_type type);
void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
struct bio **bio, struct folio *folio);
+void f2fs_submit_all_merged_ipu_writes(struct f2fs_sb_info *sbi);
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
int f2fs_submit_page_bio(struct f2fs_io_info *fio);
int f2fs_merge_page_bio(struct f2fs_io_info *fio);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index fb12c5c9affda..6edf0105dbc8f 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4784,6 +4784,30 @@ static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
return true;
}
+#ifdef CONFIG_F2FS_IOSTAT
+static void f2fs_dio_end_bio(struct bio *bio)
+{
+ struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+ void *orig_bi_private = iostat_ctx->post_read_ctx;
+
+ iostat_update_and_unbind_ctx(bio);
+ bio->bi_private = orig_bi_private;
+ iomap_dio_bio_end_io(bio);
+}
+
+static void f2fs_dio_iostat_start(struct f2fs_sb_info *sbi, struct bio *bio)
+{
+ void *bi_private = bio->bi_private;
+
+ iostat_alloc_and_bind_ctx(sbi, bio, bi_private);
+ iostat_update_submit_ctx(bio, DATA);
+ bio->bi_end_io = f2fs_dio_end_bio;
+}
+#else
+static inline void f2fs_dio_iostat_start(struct f2fs_sb_info *sbi,
+ struct bio *bio) {}
+#endif
+
static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
unsigned int flags)
{
@@ -4796,8 +4820,18 @@ static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
return 0;
}
+static void f2fs_dio_read_submit_io(const struct iomap_iter *iter,
+ struct bio *bio, loff_t file_offset)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(iter->inode);
+
+ f2fs_dio_iostat_start(sbi, bio);
+ blk_crypto_submit_bio(bio);
+}
+
static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
.end_io = f2fs_dio_read_end_io,
+ .submit_io = f2fs_dio_read_submit_io,
};
static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -5076,6 +5110,9 @@ static void f2fs_dio_write_submit_io(const struct iomap_iter *iter,
enum temp_type temp = f2fs_get_segment_temp(sbi, type);
bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp);
+ bio->bi_write_stream =
+ f2fs_io_type_to_write_stream(bio->bi_bdev, DATA, temp);
+ f2fs_dio_iostat_start(sbi, bio);
blk_crypto_submit_bio(bio);
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ba93010924c06..69e0a867219d1 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1220,8 +1220,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
static int ra_data_block(struct inode *inode, pgoff_t index)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct address_space *mapping = f2fs_is_cow_file(inode) ?
- F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping;
+ struct address_space *mapping = inode->i_mapping;
+ struct inode *atomic_inode = NULL;
struct dnode_of_data dn;
struct folio *folio, *efolio;
struct f2fs_io_info fio = {
@@ -1236,9 +1236,22 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
};
int err = 0;
+ f2fs_down_read(&F2FS_I(inode)->i_sem);
+ if (f2fs_is_cow_file(inode)) {
+ atomic_inode = igrab(F2FS_I(inode)->atomic_inode);
+ if (!atomic_inode) {
+ f2fs_up_read(&F2FS_I(inode)->i_sem);
+ return -EBUSY;
+ }
+ mapping = atomic_inode->i_mapping;
+ }
+ f2fs_up_read(&F2FS_I(inode)->i_sem);
+
folio = f2fs_grab_cache_folio(mapping, index, true);
- if (IS_ERR(folio))
- return PTR_ERR(folio);
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
+ goto out_iput;
+ }
if (f2fs_lookup_read_extent_cache_block(inode, index,
&dn.data_blkaddr)) {
@@ -1299,11 +1312,16 @@ got_it:
f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE);
f2fs_update_iostat(sbi, NULL, FS_GDATA_READ_IO, F2FS_BLKSIZE);
+ if (atomic_inode)
+ iput(atomic_inode);
return 0;
put_encrypted_page:
f2fs_put_page(fio.encrypted_page, true);
put_folio:
f2fs_folio_put(folio, true);
+out_iput:
+ if (atomic_inode)
+ iput(atomic_inode);
return err;
}
@@ -1314,8 +1332,8 @@ put_folio:
static int move_data_block(struct inode *inode, block_t bidx,
int gc_type, unsigned int segno, int off)
{
- struct address_space *mapping = f2fs_is_cow_file(inode) ?
- F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping;
+ struct address_space *mapping = inode->i_mapping;
+ struct inode *atomic_inode = NULL;
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
.ino = inode->i_ino,
@@ -1337,10 +1355,23 @@ static int move_data_block(struct inode *inode, block_t bidx,
(fio.sbi->gc_mode != GC_URGENT_HIGH) ?
CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA;
+ f2fs_down_read(&F2FS_I(inode)->i_sem);
+ if (f2fs_is_cow_file(inode)) {
+ atomic_inode = igrab(F2FS_I(inode)->atomic_inode);
+ if (!atomic_inode) {
+ f2fs_up_read(&F2FS_I(inode)->i_sem);
+ return -EBUSY;
+ }
+ mapping = atomic_inode->i_mapping;
+ }
+ f2fs_up_read(&F2FS_I(inode)->i_sem);
+
/* do not read out */
folio = f2fs_grab_cache_folio(mapping, bidx, false);
- if (IS_ERR(folio))
- return PTR_ERR(folio);
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
+ goto out_iput;
+ }
if (!check_valid_map(F2FS_I_SB(inode), segno, off)) {
err = -ENOENT;
@@ -1473,6 +1504,9 @@ out:
folio_unlock(folio);
folio_end_dropbehind(folio);
folio_put(folio);
+out_iput:
+ if (atomic_inode)
+ iput(atomic_inode);
return err;
}
@@ -1754,9 +1788,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi);
unsigned int sec_end_segno;
int seg_freed = 0, migrated = 0;
- unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
- SUM_TYPE_DATA : SUM_TYPE_NODE;
- unsigned char data_type = (type == SUM_TYPE_DATA) ? DATA : NODE;
+ unsigned char type;
+ unsigned char data_type;
int submitted = 0, sum_blk_cnt;
if (__is_large_section(sbi)) {
@@ -1855,6 +1888,12 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
migrated >= sbi->migration_granularity)
continue;
+ if (migrated == 0) {
+ type = IS_DATASEG(get_seg_entry(sbi, cur_segno)->type) ?
+ SUM_TYPE_DATA : SUM_TYPE_NODE;
+ data_type = (type == SUM_TYPE_DATA) ? DATA : NODE;
+ }
+
sum = SUM_BLK_PAGE_ADDR(sbi, sum_folio, cur_segno);
if (type != GET_SUM_TYPE(sum_footer(sbi, sum))) {
f2fs_err(sbi, "Inconsistent segment (%u) type "
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 7aabfc9b43cb8..099f720897016 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -9,6 +9,7 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/fiemap.h>
+#include <linux/fserror.h>
#include "f2fs.h"
#include "node.h"
@@ -179,6 +180,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio)
f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%llu, i_addr[0]:0x%x, run fsck to fix.",
__func__, dn->inode->i_ino, dn->data_blkaddr);
f2fs_handle_error(fio.sbi, ERROR_INVALID_BLKADDR);
+ fserror_report_file_metadata(dn->inode, -EFSCORRUPTED, GFP_NOFS);
return -EFSCORRUPTED;
}
@@ -435,6 +437,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct folio *ifolio,
__func__, dir->i_ino, dn.data_blkaddr);
f2fs_handle_error(F2FS_F_SB(folio), ERROR_INVALID_BLKADDR);
err = -EFSCORRUPTED;
+ fserror_report_file_metadata(dn.inode, err, GFP_NOFS);
goto out;
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index c6dcda447882f..25f30b8eadc53 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -11,6 +11,7 @@
#include <linux/sched/mm.h>
#include <linux/lz4.h>
#include <linux/zstd.h>
+#include <linux/fserror.h>
#include "f2fs.h"
#include "node.h"
@@ -480,6 +481,7 @@ static int do_read_inode(struct inode *inode)
f2fs_folio_put(node_folio, true);
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
+ fserror_report_file_metadata(inode, -EFSCORRUPTED, GFP_NOFS);
return -EFSCORRUPTED;
}
@@ -541,6 +543,7 @@ static int do_read_inode(struct inode *inode)
if (!sanity_check_extent_cache(inode, node_folio)) {
f2fs_folio_put(node_folio, true);
f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
+ fserror_report_file_metadata(inode, -EFSCORRUPTED, GFP_NOFS);
return -EFSCORRUPTED;
}
@@ -561,8 +564,13 @@ static int do_read_inode(struct inode *inode)
static bool is_meta_ino(struct f2fs_sb_info *sbi, unsigned int ino)
{
- return ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi) ||
- ino == F2FS_COMPRESS_INO(sbi);
+ if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi))
+ return true;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (test_opt(sbi, COMPRESS_CACHE) && ino == F2FS_COMPRESS_INO(sbi))
+ return true;
+#endif
+ return false;
}
struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
@@ -583,6 +591,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
trace_f2fs_iget_exit(inode, ret);
iput(inode);
f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
+ fserror_report_file_metadata(inode, ret, GFP_NOFS);
return ERR_PTR(ret);
}
@@ -787,6 +796,7 @@ retry:
if (err == -ENOMEM || ++count <= DEFAULT_RETRY_IO_COUNT)
goto retry;
stop_checkpoint:
+ fserror_report_file_metadata(inode, -EFSCORRUPTED, GFP_NOFS);
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_UPDATE_INODE);
return;
}
@@ -858,10 +868,15 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_abort_atomic_write(inode, true);
if (fi->cow_inode && f2fs_is_cow_file(fi->cow_inode)) {
- clear_inode_flag(fi->cow_inode, FI_COW_FILE);
- F2FS_I(fi->cow_inode)->atomic_inode = NULL;
- iput(fi->cow_inode);
+ struct inode *cow_inode = fi->cow_inode;
+
+ f2fs_down_write(&F2FS_I(cow_inode)->i_sem);
+ clear_inode_flag(cow_inode, FI_COW_FILE);
+ F2FS_I(cow_inode)->atomic_inode = NULL;
fi->cow_inode = NULL;
+ f2fs_up_write(&F2FS_I(cow_inode)->i_sem);
+
+ iput(cow_inode);
}
trace_f2fs_evict_inode(inode);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4e5bd9e4cfc32..cd5a394f61114 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -12,6 +12,7 @@
#include <linux/blkdev.h>
#include <linux/folio_batch.h>
#include <linux/swap.h>
+#include <linux/fserror.h>
#include "f2fs.h"
#include "node.h"
@@ -72,7 +73,11 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
sizeof(struct free_nid)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == NAT_ENTRIES) {
- mem_size = (nm_i->nat_cnt[TOTAL_NAT] *
+ /*
+ * nat_cnt[] is heuristic accounting. Sample it locklessly here
+ * to avoid taking nat_tree_lock in the balance path.
+ */
+ mem_size = (data_race(READ_ONCE(nm_i->nat_cnt[TOTAL_NAT])) *
sizeof(struct nat_entry)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
if (excess_cached_nats(sbi))
@@ -1265,6 +1270,8 @@ skip_partial:
if (err == -ENOENT) {
set_sbi_flag(F2FS_F_SB(folio), SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
+ fserror_report_file_metadata(dn.inode, -EFSCORRUPTED,
+ GFP_NOFS);
f2fs_err_ratelimited(sbi,
"truncate node fail, ino:%llu, nid:%u, "
"offset[0]:%d, offset[1]:%d, nofs:%d",
@@ -1556,6 +1563,8 @@ out_err:
next_blkaddr_of_node(folio));
f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
+ fserror_report_file_metadata(folio->mapping->host,
+ -EFSCORRUPTED, in_irq ? GFP_NOWAIT : GFP_NOFS);
return -EFSCORRUPTED;
}
@@ -1778,6 +1787,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool do_fsync,
if (f2fs_sanity_check_node_footer(sbi, folio, nid,
NODE_TYPE_REGULAR, false)) {
+ fserror_report_metadata(sbi->sb, -EFSCORRUPTED, GFP_NOFS);
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_NID);
goto redirty_out;
}
@@ -1875,7 +1885,7 @@ int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode,
}
if (!__write_node_folio(node_folio, false, false, NULL,
- &wbc, false, FS_GC_NODE_IO, NULL))
+ &wbc, false, io_type, NULL))
err = -EAGAIN;
goto release_folio;
out_folio:
@@ -2703,6 +2713,8 @@ retry:
spin_unlock(&nm_i->nid_list_lock);
f2fs_err(sbi, "Corrupted nid %u in free_nid_list",
i->nid);
+ fserror_report_metadata(sbi->sb, -EFSCORRUPTED,
+ GFP_NOFS);
f2fs_stop_checkpoint(sbi, false,
STOP_CP_REASON_CORRUPTED_NID);
return false;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index bcf2034e42639..5e114f3520998 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -129,13 +129,17 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi)
{
- return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid *
+ /* nat_cnt[] is heuristic accounting sampled locklessly here. */
+ return data_race(READ_ONCE(NM_I(sbi)->nat_cnt[DIRTY_NAT])) >=
+ NM_I(sbi)->max_nid *
NM_I(sbi)->dirty_nats_ratio / 100;
}
static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
{
- return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD;
+ /* nat_cnt[] is heuristic accounting sampled locklessly here. */
+ return data_race(READ_ONCE(NM_I(sbi)->nat_cnt[TOTAL_NAT])) >=
+ DEF_NAT_CACHE_THRESHOLD;
}
enum mem_type {
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 3d3dacec94825..89af8407b6673 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -9,6 +9,7 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/sched/mm.h>
+#include <linux/fserror.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"
@@ -679,6 +680,7 @@ retry_dn:
ofs_of_node(folio));
err = -EFSCORRUPTED;
f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
+ fserror_report_file_metadata(dn.inode, err, GFP_NOFS);
goto err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 788f8b0502492..1ef4edb770784 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -17,6 +17,7 @@
#include <linux/freezer.h>
#include <linux/sched/signal.h>
#include <linux/random.h>
+#include <linux/fserror.h>
#include "f2fs.h"
#include "segment.h"
@@ -444,6 +445,13 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
if (has_enough_free_secs(sbi, 0, 0))
return;
+ /*
+ * Submit all cached OPU/IPU DATA bios before triggering
+ * foreground GC to avoid potential deadlocks.
+ */
+ f2fs_submit_merged_write(sbi, DATA);
+ f2fs_submit_all_merged_ipu_writes(sbi);
+
if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
sbi->gc_thread->f2fs_gc_task) {
DEFINE_WAIT(wait);
@@ -462,6 +470,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
.should_migrate_blocks = false,
.err_gc_skipped = false,
.nr_free_secs = 1 };
+
f2fs_down_write_trace(&sbi->gc_lock, &gc_control.lc);
stat_inc_gc_call_count(sbi, FOREGROUND);
f2fs_gc(sbi, &gc_control);
@@ -2896,6 +2905,7 @@ got_it:
/* set it as dirty segment in free segmap */
if (test_bit(segno, free_i->free_segmap)) {
ret = -EFSCORRUPTED;
+ fserror_report_metadata(sbi->sb, -EFSCORRUPTED, GFP_NOFS);
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_FREE_BITMAP);
goto out_unlock;
}
@@ -3636,6 +3646,19 @@ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
}
}
+u8 f2fs_io_type_to_write_stream(struct block_device *bdev,
+ enum page_type type, enum temp_type temp)
+{
+ unsigned short nr = bdev_max_write_streams(bdev);
+
+ if (type != DATA || !nr)
+ return 0;
+ if (nr < NR_TEMP_TYPE)
+ return temp == COLD ? nr : HOT + 1;
+
+ return temp + 1;
+}
+
static int __get_segment_type_2(struct f2fs_io_info *fio)
{
if (fio->type == DATA)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ccf806b676f53..87f816f010679 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -29,6 +29,7 @@
#include <linux/lz4.h>
#include <linux/ctype.h>
#include <linux/fs_parser.h>
+#include <linux/fserror.h>
#include "f2fs.h"
#include "node.h"
@@ -2632,6 +2633,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
/* check if we need more GC first */
unusable = f2fs_get_unusable_blocks(sbi);
+
+ f2fs_info(sbi, "%s starts, unusable: %u", __func__, unusable);
+
if (!f2fs_disable_cp_again(sbi, unusable))
goto skip_gc;
@@ -2639,6 +2643,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
sbi->gc_mode = GC_URGENT_HIGH;
+ f2fs_info(sbi, "%s: run f2fs_gc() to migrate blocks", __func__);
+
while (!f2fs_time_over(sbi, DISABLE_TIME)) {
struct f2fs_gc_control gc_control = {
.victim_segno = NULL_SEGNO,
@@ -2659,6 +2665,12 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
break;
}
+ f2fs_info(sbi, "%s: call sync_filesystem() to persist meta: %lld, node: %lld, data: %lld",
+ __func__,
+ get_pages(sbi, F2FS_DIRTY_META),
+ get_pages(sbi, F2FS_DIRTY_NODES),
+ get_pages(sbi, F2FS_DIRTY_DATA));
+
ret = sync_filesystem(sbi->sb);
if (ret || err) {
err = ret ? ret : err;
@@ -2672,6 +2684,12 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
}
skip_gc:
+ f2fs_info(sbi, "%s: call f2fs_write_checkpoint(), meta: %lld, node: %lld, data: %lld",
+ __func__,
+ get_pages(sbi, F2FS_DIRTY_META),
+ get_pages(sbi, F2FS_DIRTY_NODES),
+ get_pages(sbi, F2FS_DIRTY_DATA));
+
f2fs_down_write_trace(&sbi->gc_lock, &lc);
cpc.reason = CP_PAUSE;
set_sbi_flag(sbi, SBI_CP_DISABLED);
@@ -2689,7 +2707,7 @@ out_unlock:
restore_flag:
sbi->gc_mode = gc_mode;
sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
- f2fs_info(sbi, "f2fs_disable_checkpoint() finish, err:%d", err);
+ f2fs_info(sbi, "%s finishes, err:%d", __func__, err);
return err;
}
@@ -4608,6 +4626,7 @@ static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
spin_lock_irqsave(&sbi->error_lock, flags);
if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
sbi->stop_reason[reason]++;
+ sbi->stop_reason_dirty = true;
spin_unlock_irqrestore(&sbi->error_lock, flags);
}
@@ -4615,6 +4634,7 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
unsigned long flags;
+ bool report_shutdown = false;
int err;
f2fs_down_write(&sbi->sb_lock);
@@ -4626,6 +4646,10 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
sbi->error_dirty = false;
}
memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
+ if (sbi->stop_reason_dirty) {
+ report_shutdown = true;
+ sbi->stop_reason_dirty = false;
+ }
spin_unlock_irqrestore(&sbi->error_lock, flags);
err = f2fs_commit_super(sbi, false);
@@ -4635,6 +4659,9 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
f2fs_err_ratelimited(sbi,
"f2fs_commit_super fails to record stop_reason, err:%d",
err);
+
+ if (report_shutdown)
+ fserror_report_shutdown(sbi->sb, GFP_NOFS);
}
void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
@@ -4649,6 +4676,27 @@ void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
spin_unlock_irqrestore(&sbi->error_lock, flags);
}
+static void f2fs_report_fserror(struct f2fs_sb_info *sbi, unsigned char error)
+{
+ switch (error) {
+ case ERROR_INVALID_BLKADDR:
+ case ERROR_CORRUPTED_INODE:
+ case ERROR_INCONSISTENT_SUMMARY:
+ case ERROR_INCONSISTENT_SUM_TYPE:
+ case ERROR_CORRUPTED_JOURNAL:
+ case ERROR_INCONSISTENT_NODE_COUNT:
+ case ERROR_INCONSISTENT_BLOCK_COUNT:
+ case ERROR_INVALID_CURSEG:
+ case ERROR_INCONSISTENT_SIT:
+ case ERROR_INVALID_NODE_REFERENCE:
+ case ERROR_INCONSISTENT_NAT:
+ fserror_report_metadata(sbi->sb, -EFSCORRUPTED, GFP_NOFS);
+ break;
+ default:
+ return;
+ }
+}
+
void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
{
f2fs_save_errors(sbi, error);
@@ -4658,6 +4706,8 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
if (!test_bit(error, (unsigned long *)sbi->errors))
return;
schedule_work(&sbi->s_error_work);
+
+ f2fs_report_fserror(sbi, error);
}
static bool system_going_down(void)
@@ -4724,9 +4774,18 @@ static void f2fs_handle_critical_error(struct f2fs_sb_info *sbi,
*/
}
+void f2fs_fault_report(struct super_block *sb, unsigned int err_code,
+ const char *func, unsigned int data)
+{
+ trace_f2fs_fault_report(sb, err_code, func, data);
+}
+
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
unsigned char reason)
{
+ if (reason != STOP_CP_REASON_SHUTDOWN)
+ f2fs_fault_report(sbi->sb, REPORT_FAULT_STOP_CP, __func__, reason);
+
f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL);
if (!end_io)
f2fs_flush_merged_writes(sbi);
@@ -5107,6 +5166,13 @@ try_onemore:
goto free_page_array_cache;
}
+ /*
+ * Initialize ino entry info early so f2fs_drop_inode ->
+ * f2fs_exist_written_data can safely take im->ino_lock if mount
+ * fails after this point and triggers iput on cleanup.
+ */
+ f2fs_init_ino_entry_info(sbi);
+
err = f2fs_get_valid_checkpoint(sbi);
if (err) {
f2fs_err(sbi, "Failed to get valid F2FS checkpoint");
@@ -5151,8 +5217,6 @@ try_onemore:
f2fs_init_extent_cache_info(sbi);
- f2fs_init_ino_entry_info(sbi);
-
f2fs_init_fsync_node_info(sbi);
/* setup checkpoint request control and start checkpoint issue thread */
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 352e96ad5c3a5..665687244c939 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -1399,6 +1399,7 @@ F2FS_FEATURE_RO_ATTR(pin_file);
F2FS_FEATURE_RO_ATTR(linear_lookup);
#endif
F2FS_FEATURE_RO_ATTR(packed_ssa);
+F2FS_FEATURE_RO_ATTR(fserror);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -1566,6 +1567,7 @@ static struct attribute *f2fs_feat_attrs[] = {
BASE_ATTR_LIST(linear_lookup),
#endif
BASE_ATTR_LIST(packed_ssa),
+ BASE_ATTR_LIST(fserror),
NULL,
};
ATTRIBUTE_GROUPS(f2fs_feat);
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index 92ebcc19cab09..39f4825154452 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -25,6 +25,7 @@
*/
#include <linux/f2fs_fs.h>
+#include <linux/fserror.h>
#include "f2fs.h"
#include "xattr.h"
@@ -243,6 +244,7 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr");
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_VERITY_XATTR);
+ fserror_report_file_metadata(inode, -EFSCORRUPTED, GFP_NOFS);
return -EFSCORRUPTED;
}
if (buf_size) {
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 610d5810074dc..24cef7e1f56a5 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -19,6 +19,7 @@
#include <linux/f2fs_fs.h>
#include <linux/security.h>
#include <linux/posix_acl_xattr.h>
+#include <linux/fserror.h>
#include "f2fs.h"
#include "xattr.h"
#include "segment.h"
@@ -371,6 +372,7 @@ static int lookup_all_xattrs(struct inode *inode, struct folio *ifolio,
err = -ENODATA;
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_XATTR);
+ fserror_report_file_metadata(inode, err, GFP_NOFS);
goto out;
}
check:
@@ -590,6 +592,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_XATTR);
+ fserror_report_file_metadata(inode,
+ -EFSCORRUPTED, GFP_NOFS);
break;
}
@@ -677,6 +681,7 @@ retry:
error = -EFSCORRUPTED;
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_XATTR);
+ fserror_report_file_metadata(inode, error, GFP_NOFS);
goto exit;
}
@@ -705,6 +710,7 @@ retry:
error = -EFSCORRUPTED;
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_XATTR);
+ fserror_report_file_metadata(inode, error, GFP_NOFS);
goto exit;
}
last = XATTR_NEXT_ENTRY(last);