Merge branch 'dev' of https://git.kernel.org/pub/scm/linux/kernel/git/linkinjeon/exfat.git

author: Mark Brown <broonie@kernel.org> 2026-05-29 12:27:15 +0100
committer: Mark Brown <broonie@kernel.org> 2026-05-29 12:27:15 +0100
commit: f945eab143d9c5a10b1ec5d1d1d743df59dfcc9e (patch)
tree: 0dd9076182b1dc0bbae8813d02afea74a29e457e /fs
parent: fa872176b6ab36906ebb0e476122295f1446cf1f (diff)
parent: 8cf22f18dc8bed9d3924336b312684edd44e43f1 (diff)
download: linux-next-history-f945eab143d9c5a10b1ec5d1d1d743df59dfcc9e.tar.gz
16 files changed, 757 insertions, 509 deletions
diff --git a/fs/exfat/Kconfig b/fs/exfat/Kconfig
index cbeca8e44d9b3..1fcb10c8d7bc9 100644
--- a/fs/exfat/Kconfig
+++ b/fs/exfat/Kconfig
@@ -4,7 +4,7 @@ config EXFAT_FS
 	tristate "exFAT filesystem support"
 	select BUFFER_HEAD
 	select NLS
-	select LEGACY_DIRECT_IO
+	select FS_IOMAP
 	help
 	  This allows you to mount devices formatted with the exFAT file system.
 	  exFAT is typically used on SD-Cards or USB sticks.
diff --git a/fs/exfat/Makefile b/fs/exfat/Makefile
index ed51926a49717..e06bf85870ae7 100644
--- a/fs/exfat/Makefile
+++ b/fs/exfat/Makefile
@@ -5,4 +5,4 @@
 obj-$(CONFIG_EXFAT_FS) += exfat.o
 
 exfat-y	:= inode.o namei.o dir.o super.o fatent.o cache.o nls.o misc.o \
-	   file.o balloc.o
+	   file.o balloc.o iomap.o
diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
index 625f2f14d4fe0..e66ebf899778c 100644
--- a/fs/exfat/balloc.c
+++ b/fs/exfat/balloc.c
@@ -112,7 +112,7 @@ static int exfat_allocate_bitmap(struct super_block *sb,
 	}
 
 	if (exfat_test_bitmap_range(sb, sbi->map_clu,
-		EXFAT_B_TO_CLU_ROUND_UP(map_size, sbi)) == false)
+		exfat_bytes_to_cluster_round_up(sbi, map_size)) == false)
 		goto err_out;
 
 	return 0;
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index ac008ccaa97de..8b8f6bc0c233c 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -76,7 +76,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
 	struct super_block *sb = inode->i_sb;
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 	struct exfat_inode_info *ei = EXFAT_I(inode);
-	unsigned int dentry = EXFAT_B_TO_DEN(*cpos) & 0xFFFFFFFF;
+	unsigned int dentry = exfat_bytes_to_dentries(*cpos) & 0xFFFFFFFF;
 	struct buffer_head *bh;
 
 	/* check if the given file ID is opened */
@@ -84,13 +84,13 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
 		return -EPERM;
 
 	exfat_chain_set(&dir, ei->start_clu,
-		EXFAT_B_TO_CLU(i_size_read(inode), sbi), ei->flags);
+		exfat_bytes_to_cluster(sbi, i_size_read(inode)), ei->flags);
 
 	dentries_per_clu = sbi->dentries_per_clu;
-	max_dentries = (unsigned int)min_t(u64, MAX_EXFAT_DENTRIES,
-				(u64)EXFAT_CLU_TO_DEN(sbi->num_clusters, sbi));
+	max_dentries = min(MAX_EXFAT_DENTRIES,
+			exfat_cluster_to_dentries(sbi, sbi->num_clusters));
 
-	clu_offset = EXFAT_DEN_TO_CLU(dentry, sbi);
+	clu_offset = exfat_dentries_to_cluster(sbi, dentry);
 	exfat_chain_dup(&clu, &dir);
 
 	if (clu.flags == ALLOC_FAT_CHAIN) {
@@ -147,10 +147,10 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
 			dir_entry->dir = clu;
 			brelse(bh);
 
-			ei->hint_bmap.off = EXFAT_DEN_TO_CLU(dentry, sbi);
+			ei->hint_bmap.off = exfat_dentries_to_cluster(sbi, dentry);
 			ei->hint_bmap.clu = clu.dir;
 
-			*cpos = EXFAT_DEN_TO_B(dentry + 1 + num_ext);
+			*cpos = exfat_dentries_to_bytes(dentry + 1 + num_ext);
 			return 0;
 		}
 
@@ -160,7 +160,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
 
 out:
 	dir_entry->namebuf.lfn[0] = '\0';
-	*cpos = EXFAT_DEN_TO_B(dentry);
+	*cpos = exfat_dentries_to_bytes(dentry);
 	return 0;
 }
 
@@ -295,7 +295,7 @@ int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu)
 
 	exfat_chain_set(clu, EXFAT_EOF_CLUSTER, 0, ALLOC_NO_FAT_CHAIN);
 
-	ret = exfat_alloc_cluster(inode, 1, clu, IS_DIRSYNC(inode));
+	ret = exfat_alloc_cluster(inode, 1, clu, IS_DIRSYNC(inode), false);
 	if (ret)
 		return ret;
 
@@ -465,7 +465,7 @@ static void exfat_free_benign_secondary_clusters(struct inode *inode,
 		return;
 
 	exfat_chain_set(&dir, start_clu,
-			EXFAT_B_TO_CLU_ROUND_UP(size, EXFAT_SB(sb)),
+			exfat_bytes_to_cluster_round_up(EXFAT_SB(sb), size),
 			flags);
 	exfat_free_cluster(inode, &dir);
 }
@@ -556,10 +556,11 @@ static int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir
 	unsigned int off, clu = 0;
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 
-	off = EXFAT_DEN_TO_B(entry);
+	off = exfat_dentries_to_bytes(entry);
 
 	clu = p_dir->dir;
-	ret = exfat_cluster_walk(sb, &clu, EXFAT_B_TO_CLU(off, sbi), p_dir->flags);
+	ret = exfat_cluster_walk(sb, &clu, exfat_bytes_to_cluster(sbi, off),
+			p_dir->flags);
 	if (ret)
 		return ret;
 
@@ -567,7 +568,7 @@ static int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir
 		exfat_fs_error(sb,
 			"unexpected early break in cluster chain (clu : %u, len : %d)",
 			p_dir->dir,
-			EXFAT_B_TO_CLU(off, sbi));
+			exfat_bytes_to_cluster(sbi, off));
 		return -EIO;
 	}
 
@@ -577,13 +578,13 @@ static int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir
 	}
 
 	/* byte offset in cluster */
-	off = EXFAT_CLU_OFFSET(off, sbi);
+	off = exfat_cluster_offset(sbi, off);
 
 	/* byte offset in sector    */
-	*offset = EXFAT_BLK_OFFSET(off, sb);
+	*offset = exfat_block_offset(sb, off);
 
 	/* sector offset in cluster */
-	*sector = EXFAT_B_TO_BLK(off, sb);
+	*sector = exfat_bytes_to_block(sb, off);
 	*sector += exfat_cluster_to_sector(sbi, clu);
 	return 0;
 }
@@ -593,7 +594,7 @@ struct exfat_dentry *exfat_get_dentry(struct super_block *sb,
 {
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 	unsigned int sect_per_clus = sbi->sect_per_clus;
-	unsigned int dentries_per_page = EXFAT_B_TO_DEN(PAGE_SIZE);
+	unsigned int dentries_per_page = exfat_bytes_to_dentries(PAGE_SIZE);
 	int off;
 	sector_t sec;
 
@@ -672,8 +673,8 @@ struct exfat_dentry *exfat_get_dentry_cached(
 	struct exfat_entry_set_cache *es, int num)
 {
 	int off = es->start_off + num * DENTRY_SIZE;
-	struct buffer_head *bh = es->bh[EXFAT_B_TO_BLK(off, es->sb)];
-	char *p = bh->b_data + EXFAT_BLK_OFFSET(off, es->sb);
+	struct buffer_head *bh = es->bh[exfat_bytes_to_block(es->sb, off)];
+	char *p = bh->b_data + exfat_block_offset(es->sb, off);
 
 	return (struct exfat_dentry *)p;
 }
@@ -741,7 +742,7 @@ static int __exfat_get_dentry_set(struct exfat_entry_set_cache *es,
 
 	es->num_entries = num_entries;
 
-	num_bh = EXFAT_B_TO_BLK_ROUND_UP(off + num_entries * DENTRY_SIZE, sb);
+	num_bh = exfat_bytes_to_block_round_up(sb, off + num_entries * DENTRY_SIZE);
 	if (num_bh > ARRAY_SIZE(es->__bh)) {
 		es->bh = kmalloc_objs(*es->bh, num_bh, GFP_NOFS);
 		if (!es->bh) {
@@ -830,7 +831,7 @@ static int exfat_validate_empty_dentry_set(struct exfat_entry_set_cache *es)
 
 err_used_follow_unused:
 	off = es->start_off + (i << DENTRY_SIZE_BITS);
-	bh = es->bh[EXFAT_B_TO_BLK(off, es->sb)];
+	bh = es->bh[exfat_bytes_to_block(es->sb, off)];
 
 	exfat_fs_error(es->sb,
 		"in sector %lld, dentry %d should be unused, but 0x%x",
@@ -839,7 +840,8 @@ err_used_follow_unused:
 	return -EIO;
 
 count_skip_entries:
-	es->num_entries = EXFAT_B_TO_DEN(EXFAT_BLK_TO_B(es->num_bh, es->sb) - es->start_off);
+	es->num_entries =
+		exfat_bytes_to_dentries(exfat_block_to_bytes(es->sb, es->num_bh) - es->start_off);
 	for (; i < es->num_entries; i++) {
 		ep = exfat_get_dentry_cached(es, i);
 		if (IS_EXFAT_DELETED(ep->type))
@@ -892,7 +894,7 @@ static inline void exfat_set_empty_hint(struct exfat_inode_info *ei,
 {
 	if (ei->hint_femp.eidx == EXFAT_HINT_NONE ||
 	    ei->hint_femp.eidx > dentry) {
-		int total_entries = EXFAT_B_TO_DEN(i_size_read(&ei->vfs_inode));
+		int total_entries = exfat_bytes_to_dentries(i_size_read(&ei->vfs_inode));
 
 		if (candi_empty->count == 0) {
 			candi_empty->cur = *clu;
@@ -1027,12 +1029,12 @@ rewind:
 				continue;
 			}
 
-			brelse(bh);
 			if (entry_type == TYPE_EXTEND) {
 				unsigned short entry_uniname[16], unichar;
 
 				if (step != DIRENT_STEP_NAME ||
 				    name_len >= MAX_NAME_LENGTH) {
+					brelse(bh);
 					step = DIRENT_STEP_FILE;
 					continue;
 				}
@@ -1043,6 +1045,7 @@ rewind:
 					uniname += EXFAT_FILE_NAME_LEN;
 
 				len = exfat_extract_uni_name(ep, entry_uniname);
+				brelse(bh);
 				name_len += len;
 
 				unichar = *(uniname+len);
@@ -1061,6 +1064,7 @@ rewind:
 				continue;
 			}
 
+			brelse(bh);
 			if (entry_type &
 					(TYPE_CRITICAL_SEC | TYPE_BENIGN_SEC)) {
 				if (step == DIRENT_STEP_SECD) {
@@ -1215,7 +1219,7 @@ static int exfat_get_volume_label_dentry(struct super_block *sb,
 			es->bh = es->__bh;
 			es->bh[0] = bh;
 			es->num_bh = 1;
-			es->start_off = EXFAT_DEN_TO_B(i) % sb->s_blocksize;
+			es->start_off = exfat_dentries_to_bytes(i) % sb->s_blocksize;
 
 			return 0;
 		}
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 89ef5368277f8..174728904dc10 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -12,6 +12,7 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <uapi/linux/exfat.h>
+#include <linux/buffer_head.h>
 
 #define EXFAT_ROOT_INO		1
 
@@ -85,38 +86,6 @@ enum {
 	<< (PAGE_SHIFT - (sb)->s_blocksize_bits))
 
 /*
- * helpers for cluster size to byte conversion.
- */
-#define EXFAT_CLU_TO_B(b, sbi)		((b) << (sbi)->cluster_size_bits)
-#define EXFAT_B_TO_CLU(b, sbi)		((b) >> (sbi)->cluster_size_bits)
-#define EXFAT_B_TO_CLU_ROUND_UP(b, sbi)	\
-	(((b - 1) >> (sbi)->cluster_size_bits) + 1)
-#define EXFAT_CLU_OFFSET(off, sbi)	((off) & ((sbi)->cluster_size - 1))
-
-/*
- * helpers for block size to byte conversion.
- */
-#define EXFAT_BLK_TO_B(b, sb)		((b) << (sb)->s_blocksize_bits)
-#define EXFAT_B_TO_BLK(b, sb)		((b) >> (sb)->s_blocksize_bits)
-#define EXFAT_B_TO_BLK_ROUND_UP(b, sb)	\
-	(((b - 1) >> (sb)->s_blocksize_bits) + 1)
-#define EXFAT_BLK_OFFSET(off, sb)	((off) & ((sb)->s_blocksize - 1))
-
-/*
- * helpers for block size to dentry size conversion.
- */
-#define EXFAT_B_TO_DEN(b)		((b) >> DENTRY_SIZE_BITS)
-#define EXFAT_DEN_TO_B(b)		((b) << DENTRY_SIZE_BITS)
-
-/*
- * helpers for cluster size to dentry size conversion.
- */
-#define EXFAT_CLU_TO_DEN(clu, sbi)	\
-	((clu) << ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS))
-#define EXFAT_DEN_TO_CLU(dentry, sbi)	\
-	((dentry) >> ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS))
-
-/*
  * helpers for fat entry.
  */
 #define FAT_ENT_SIZE (4)
@@ -149,7 +118,7 @@ enum {
  * The 608 bytes are in 3 sectors at most (even 512 Byte sector).
  */
 #define DIR_CACHE_SIZE		\
-	(DIV_ROUND_UP(EXFAT_DEN_TO_B(ES_MAX_ENTRY_NUM), SECTOR_SIZE) + 1)
+	(DIV_ROUND_UP(ES_MAX_ENTRY_NUM << DENTRY_SIZE_BITS, SECTOR_SIZE) + 1)
 
 /* Superblock flags */
 #define EXFAT_FLAGS_SHUTDOWN	1
@@ -259,6 +228,7 @@ struct exfat_sb_info {
 	unsigned long long FAT1_start_sector; /* FAT1 start sector */
 	unsigned long long FAT2_start_sector; /* FAT2 start sector */
 	unsigned long long data_start_sector; /* data area start sector */
+	unsigned long long data_start_bytes;
 	unsigned int num_FAT_sectors; /* num of FAT sectors */
 	unsigned int root_dir; /* root dir cluster */
 	unsigned int dentries_per_clu; /* num of dentries per cluster */
@@ -324,6 +294,8 @@ struct exfat_inode_info {
 	/* on-disk position of directory entry or 0 */
 	loff_t i_pos;
 	loff_t valid_size;
+	/* page-aligned size that has been zeroed out for mmap */
+	loff_t zeroed_size;
 	/* hash by i_location */
 	struct hlist_node i_hash_fat;
 	/* protect bmap against truncate */
@@ -432,6 +404,101 @@ static inline loff_t exfat_ondisk_size(const struct inode *inode)
 	return ((loff_t)inode->i_blocks) << 9;
 }
 
+static inline loff_t exfat_cluster_to_phys_bytes(struct exfat_sb_info *sbi,
+		unsigned int clus)
+{
+	return ((loff_t)(clus - EXFAT_RESERVED_CLUSTERS) << sbi->cluster_size_bits) +
+		sbi->data_start_bytes;
+}
+
+/*
+ * helpers for cluster size to byte conversion.
+ */
+static inline loff_t exfat_cluster_to_bytes(struct exfat_sb_info *sbi,
+		u32 nr_clusters)
+{
+	return (loff_t)nr_clusters << sbi->cluster_size_bits;
+}
+
+static inline blkcnt_t exfat_cluster_to_sectors(struct exfat_sb_info *sbi,
+		u32 nr_clusters)
+{
+	return (blkcnt_t)nr_clusters << (sbi->cluster_size_bits - 9);
+}
+
+static inline u32 exfat_bytes_to_cluster(struct exfat_sb_info *sbi, loff_t size)
+{
+	return (u32)(size >> sbi->cluster_size_bits);
+}
+
+static inline u32 exfat_bytes_to_cluster_round_up(struct exfat_sb_info *sbi,
+		loff_t size)
+{
+	if (size <= 0)
+		return 0;
+	return (u32)((size - 1) >> sbi->cluster_size_bits) + 1;
+}
+
+static inline u32 exfat_cluster_offset(struct exfat_sb_info *sbi, loff_t off)
+{
+	return off & (sbi->cluster_size - 1);
+}
+
+/*
+ * helpers for block size to byte conversion.
+ */
+static inline loff_t exfat_block_to_bytes(struct super_block *sb,
+		sector_t block)
+{
+	return (loff_t)block << sb->s_blocksize_bits;
+}
+
+static inline sector_t exfat_bytes_to_block(struct super_block *sb, loff_t size)
+{
+	return (sector_t)(size >> sb->s_blocksize_bits);
+}
+
+static inline sector_t exfat_bytes_to_block_round_up(struct super_block *sb,
+		loff_t size)
+{
+	if (size <= 0)
+		return 0;
+	return (sector_t)(((size - 1) >> sb->s_blocksize_bits) + 1);
+}
+
+static inline u32 exfat_block_offset(struct super_block *sb, loff_t off)
+{
+	return (u32)(off & (sb->s_blocksize - 1));
+}
+
+/*
+ * helpers for block size to dentry size conversion.
+ */
+static inline u32 exfat_bytes_to_dentries(loff_t b)
+{
+	return (u32)(b >> DENTRY_SIZE_BITS);
+}
+
+static inline u32 exfat_dentries_to_bytes(u32 dentry)
+{
+	return dentry << DENTRY_SIZE_BITS;
+}
+
+/*
+ * helpers for cluster size to dentry size conversion.
+ */
+static inline u32 exfat_cluster_to_dentries(struct exfat_sb_info *sbi,
+		u32 nr_clusters)
+{
+	return nr_clusters << (sbi->cluster_size_bits - DENTRY_SIZE_BITS);
+}
+
+static inline u32 exfat_dentries_to_cluster(struct exfat_sb_info *sbi,
+		u32 dentry)
+{
+	return dentry >> (sbi->cluster_size_bits - DENTRY_SIZE_BITS);
+}
+
 /* super.c */
 int exfat_set_volume_dirty(struct super_block *sb);
 int exfat_clear_volume_dirty(struct super_block *sb);
@@ -441,7 +508,7 @@ int exfat_clear_volume_dirty(struct super_block *sb);
 	exfat_cluster_walk(sb, (pclu), 1, ALLOC_FAT_CHAIN)
 
 int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
-		struct exfat_chain *p_chain, bool sync_bmap);
+		struct exfat_chain *p_chain, bool sync_bmap, bool contig);
 int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain);
 int exfat_ent_get(struct super_block *sb, unsigned int loc,
 		unsigned int *content, struct buffer_head **last);
@@ -490,7 +557,6 @@ int exfat_trim_fs(struct inode *inode, struct fstrim_range *range);
 /* file.c */
 extern const struct file_operations exfat_file_operations;
 int __exfat_truncate(struct inode *inode);
-void exfat_truncate(struct inode *inode);
 int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 		  struct iattr *attr);
 int exfat_getattr(struct mnt_idmap *idmap, const struct path *path,
@@ -584,7 +650,9 @@ struct inode *exfat_iget(struct super_block *sb, loff_t i_pos);
 int __exfat_write_inode(struct inode *inode, int sync);
 int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
 void exfat_evict_inode(struct inode *inode);
-int exfat_block_truncate_page(struct inode *inode, loff_t from);
+int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
+		unsigned int *clu, unsigned int *count, int create,
+		bool *balloc);
 
 /* exfat/nls.c */
 unsigned short exfat_toupper(struct super_block *sb, unsigned short a);
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index dce0955e689aa..a8b11e2ce43f1 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -412,14 +412,14 @@ int exfat_zeroed_cluster(struct inode *dir, unsigned int clu)
 
 	if (IS_DIRSYNC(dir))
 		return sync_blockdev_range(sb->s_bdev,
-				EXFAT_BLK_TO_B(blknr, sb),
-				EXFAT_BLK_TO_B(last_blknr, sb) - 1);
+				exfat_block_to_bytes(sb, blknr),
+				exfat_block_to_bytes(sb, last_blknr) - 1);
 
 	return 0;
 }
 
 int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
-		struct exfat_chain *p_chain, bool sync_bmap)
+		struct exfat_chain *p_chain, bool sync_bmap, bool contig)
 {
 	int ret = -ENOSPC;
 	unsigned int total_cnt;
@@ -470,14 +470,20 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
 
 	while ((new_clu = exfat_find_free_bitmap(sb, hint_clu)) !=
 	       EXFAT_EOF_CLUSTER) {
-		if (new_clu != hint_clu &&
-		    p_chain->flags == ALLOC_NO_FAT_CHAIN) {
-			if (exfat_chain_cont_cluster(sb, p_chain->dir,
-					p_chain->size)) {
-				ret = -EIO;
-				goto free_cluster;
+		if (new_clu != hint_clu) {
+			if (p_chain->flags == ALLOC_NO_FAT_CHAIN) {
+				if (exfat_chain_cont_cluster(sb, p_chain->dir,
+							     p_chain->size)) {
+					ret = -EIO;
+					goto free_cluster;
+				}
+				p_chain->flags = ALLOC_FAT_CHAIN;
+			}
+
+			if (contig && p_chain->size > 0) {
+				hint_clu = last_clu;
+				goto done;
 			}
-			p_chain->flags = ALLOC_FAT_CHAIN;
 		}
 
 		/* update allocation bitmap */
@@ -507,9 +513,9 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
 		last_clu = new_clu;
 
 		if (p_chain->size == num_alloc) {
+done:
 			sbi->clu_srch_ptr = hint_clu;
-			sbi->used_clusters += num_alloc;
-
+			sbi->used_clusters += p_chain->size;
 			mutex_unlock(&sbi->bitmap_lock);
 			return 0;
 		}
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 354bdcfe4abcd..e6e58584f567a 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -14,9 +14,11 @@
 #include <linux/writeback.h>
 #include <linux/filelock.h>
 #include <linux/falloc.h>
+#include <linux/iomap.h>
 
 #include "exfat_raw.h"
 #include "exfat_fs.h"
+#include "iomap.h"
 
 static int exfat_cont_expand(struct inode *inode, loff_t size)
 {
@@ -26,16 +28,17 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
 	struct super_block *sb = inode->i_sb;
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 	struct exfat_chain clu;
+	loff_t oldsize = i_size_read(inode);
 
-	truncate_pagecache(inode, i_size_read(inode));
+	truncate_pagecache(inode, oldsize);
 
 	ret = inode_newsize_ok(inode, size);
 	if (ret)
 		return ret;
 
-	num_clusters = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi);
+	num_clusters = exfat_bytes_to_cluster(sbi, exfat_ondisk_size(inode));
 	/* integer overflow is already checked in inode_newsize_ok(). */
-	new_num_clusters = EXFAT_B_TO_CLU_ROUND_UP(size, sbi);
+	new_num_clusters = exfat_bytes_to_cluster_round_up(sbi, size);
 
 	if (new_num_clusters == num_clusters)
 		goto out;
@@ -56,7 +59,7 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
 	clu.flags = ei->flags;
 
 	ret = exfat_alloc_cluster(inode, new_num_clusters - num_clusters,
-			&clu, inode_needs_sync(inode));
+			&clu, inode_needs_sync(inode), false);
 	if (ret)
 		return ret;
 
@@ -78,6 +81,13 @@ out:
 	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
 	/* Expanded range not zeroed, do not update valid_size */
 	i_size_write(inode, size);
+	/*
+	 * When extending file size, call truncate_pagecache() first,
+	 * then update i_size, and call pagecache_isize_extended()
+	 * to ensures the straddling folio is properly marked RO so
+	 * page_mkwrite() is called and post-EOF area is zeroed.
+	 */
+	pagecache_isize_extended(inode, oldsize, inode->i_size);
 
 	inode->i_blocks = round_up(size, sbi->cluster_size) >> 9;
 	mark_inode_dirty(inode);
@@ -200,8 +210,8 @@ int __exfat_truncate(struct inode *inode)
 
 	exfat_set_volume_dirty(sb);
 
-	num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi);
-	num_clusters_phys = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi);
+	num_clusters_new = exfat_bytes_to_cluster_round_up(sbi, i_size_read(inode));
+	num_clusters_phys = exfat_bytes_to_cluster(sbi, exfat_ondisk_size(inode));
 
 	exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags);
 
@@ -236,7 +246,7 @@ int __exfat_truncate(struct inode *inode)
 	}
 
 	if (i_size_read(inode) < ei->valid_size)
-		ei->valid_size = i_size_read(inode);
+		ei->valid_size = ei->zeroed_size = i_size_read(inode);
 
 	if (ei->type == TYPE_FILE)
 		ei->attr |= EXFAT_ATTR_ARCHIVE;
@@ -282,7 +292,7 @@ int __exfat_truncate(struct inode *inode)
 	return 0;
 }
 
-void exfat_truncate(struct inode *inode)
+static void exfat_truncate(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
@@ -383,10 +393,12 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 	exfat_truncate_inode_atime(inode);
 
 	if (attr->ia_valid & ATTR_SIZE) {
-		error = exfat_block_truncate_page(inode, attr->ia_size);
-		if (error)
-			goto out;
-
+		/*
+		 * Wait for any in-flight DIO to finish before truncating to
+		 * prevent a concurrent DIO from writing to clusters that are
+		 * about to be freed.
+		 */
+		inode_dio_wait(inode);
 		down_write(&EXFAT_I(inode)->truncate_lock);
 		truncate_setsize(inode, attr->ia_size);
 
@@ -631,42 +643,76 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 
 static int exfat_extend_valid_size(struct inode *inode, loff_t new_valid_size)
 {
-	int err;
-	loff_t pos;
 	struct exfat_inode_info *ei = EXFAT_I(inode);
-	struct address_space *mapping = inode->i_mapping;
-	const struct address_space_operations *ops = mapping->a_ops;
+	loff_t old_valid_size = ei->valid_size;
+	int ret = 0;
 
-	pos = ei->valid_size;
-	while (pos < new_valid_size) {
-		u32 len;
-		struct folio *folio;
-		unsigned long off;
+	if (old_valid_size < new_valid_size) {
+		if (i_size_read(inode) < new_valid_size) {
+			i_size_write(inode, new_valid_size);
+			mark_inode_dirty(inode);
+		}
 
-		len = PAGE_SIZE - (pos & (PAGE_SIZE - 1));
-		if (pos + len > new_valid_size)
-			len = new_valid_size - pos;
+		ret = iomap_zero_range(inode, old_valid_size,
+				new_valid_size - old_valid_size, NULL,
+				&exfat_write_iomap_ops, NULL, NULL);
+		if (ret) {
+			truncate_setsize(inode, old_valid_size);
+			exfat_truncate(inode);
+		}
+	}
 
-		err = ops->write_begin(NULL, mapping, pos, len, &folio, NULL);
-		if (err)
-			goto out;
+	return ret;
+}
 
-		off = offset_in_folio(folio, pos);
-		folio_zero_new_buffers(folio, off, off + len);
+static ssize_t exfat_fallback_buffered_write(struct kiocb *iocb,
+		struct iov_iter *from)
+{
+	loff_t offset = iocb->ki_pos, end;
+	ssize_t written;
+	int ret;
 
-		err = ops->write_end(NULL, mapping, pos, len, len, folio, NULL);
-		if (err < 0)
-			goto out;
-		pos += len;
+	iocb->ki_flags &= ~IOCB_DIRECT;
 
-		balance_dirty_pages_ratelimited(mapping);
-		cond_resched();
-	}
+	written = iomap_file_buffered_write(iocb, from, &exfat_write_iomap_ops,
+			NULL, NULL);
+	if (written < 0)
+		return written;
 
-	return 0;
+	end = iocb->ki_pos + written - 1;
+	ret = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
+			offset, end);
+	if (ret)
+		return -EIO;
 
-out:
-	return err;
+	invalidate_mapping_pages(iocb->ki_filp->f_mapping,
+			offset >> PAGE_SHIFT,
+			end >> PAGE_SHIFT);
+
+	return written;
+}
+
+static ssize_t exfat_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	ssize_t ret;
+
+	ret = iomap_dio_rw(iocb, from, &exfat_write_iomap_ops,
+			&exfat_write_dio_ops, 0, NULL, 0);
+	if (ret == -ENOTBLK)
+		ret = 0;
+	else if (ret < 0)
+		return ret;
+
+	if (iov_iter_count(from)) {
+		ssize_t written;
+
+		written = exfat_fallback_buffered_write(iocb, from);
+		if (written < 0)
+			return written;
+		ret += written;
+	}
+
+	return ret;
 }
 
 static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
@@ -677,6 +723,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	struct exfat_inode_info *ei = EXFAT_I(inode);
 	loff_t pos = iocb->ki_pos;
 	loff_t valid_size;
+	int err;
 
 	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
 		return -EIO;
@@ -692,14 +739,10 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	if (ret <= 0)
 		goto unlock;
 
-	if (iocb->ki_flags & IOCB_DIRECT) {
-		unsigned long align = pos | iov_iter_alignment(iter);
-
-		if (!IS_ALIGNED(align, i_blocksize(inode)) &&
-		    !IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev))) {
-			ret = -EINVAL;
-			goto unlock;
-		}
+	err = file_modified(iocb->ki_filp);
+	if (err) {
+		ret = err;
+		goto unlock;
 	}
 
 	if (pos > valid_size) {
@@ -713,7 +756,11 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 			goto unlock;
 	}
 
-	ret = __generic_file_write_iter(iocb, iter);
+	if (iocb->ki_flags & IOCB_DIRECT)
+		ret = exfat_dio_write_iter(iocb, iter);
+	else
+		ret = iomap_file_buffered_write(iocb, iter,
+				&exfat_write_iomap_ops, NULL, NULL);
 	if (ret < 0)
 		goto unlock;
 
@@ -740,37 +787,78 @@ unlock:
 static ssize_t exfat_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
+	ssize_t ret;
 
 	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
-	return generic_file_read_iter(iocb, iter);
+	inode_lock_shared(inode);
+
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		file_accessed(iocb->ki_filp);
+		ret = iomap_dio_rw(iocb, iter, &exfat_iomap_ops, NULL, 0,
+				NULL, 0);
+	} else {
+		ret = generic_file_read_iter(iocb, iter);
+	}
+
+	inode_unlock_shared(inode);
+
+	return ret;
 }
 
 static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf)
 {
-	int err;
 	struct inode *inode = file_inode(vmf->vma->vm_file);
 	struct exfat_inode_info *ei = EXFAT_I(inode);
-	loff_t new_valid_size;
+	vm_fault_t ret;
+	loff_t new_valid_size, mmap_valid_size;
 
 	if (!inode_trylock(inode))
 		return VM_FAULT_RETRY;
 
-	new_valid_size = ((loff_t)vmf->pgoff + 1) << PAGE_SHIFT;
-	new_valid_size = min(new_valid_size, i_size_read(inode));
+	mmap_valid_size = ((loff_t)vmf->pgoff + 1) << PAGE_SHIFT;
+	new_valid_size = min(mmap_valid_size, i_size_read(inode));
 
 	if (ei->valid_size < new_valid_size) {
-		err = exfat_extend_valid_size(inode, new_valid_size);
-		if (err < 0) {
-			inode_unlock(inode);
-			return vmf_fs_error(err);
+		if (ei->zeroed_size < mmap_valid_size) {
+			int err;
+
+			/*
+			 * Only zero the range that hasn't been zeroed yet for
+			 * this mmap write path. zeroed_size tracks the largest
+			 * page-aligned offset that has already been zeroed.
+			 *
+			 * This prevents unnecessarily zeroing out the entire
+			 * tail page on every page fault when userspace writes
+			 * data byte-by-byte through mmap (after a small
+			 * fallocate). It fixes data corruption in the tail page
+			 * while preserving the existing valid_size semantics.
+			 */
+			err = iomap_zero_range(inode, ei->zeroed_size,
+					mmap_valid_size - ei->zeroed_size, NULL,
+					&exfat_iomap_ops, NULL, NULL);
+			if (err < 0) {
+				inode_unlock(inode);
+				return vmf_fs_error(err);
+			}
+			ei->zeroed_size = mmap_valid_size;
 		}
+
+		ei->valid_size = new_valid_size;
+		mark_inode_dirty(inode);
 	}
 
+	sb_start_pagefault(inode->i_sb);
+	file_update_time(vmf->vma->vm_file);
+
+	filemap_invalidate_lock_shared(inode->i_mapping);
+	ret = iomap_page_mkwrite(vmf, &exfat_write_iomap_ops, NULL);
+	filemap_invalidate_unlock_shared(inode->i_mapping);
+	sb_end_pagefault(inode->i_sb);
 	inode_unlock(inode);
 
-	return filemap_page_mkwrite(vmf);
+	return ret;
 }
 
 static const struct vm_operations_struct exfat_file_vm_ops = {
@@ -786,6 +874,21 @@ static int exfat_file_mmap_prepare(struct vm_area_desc *desc)
 	if (unlikely(exfat_forced_shutdown(file_inode(desc->file)->i_sb)))
 		return -EIO;
 
+	if (vma_desc_test_all(desc, VMA_SHARED_BIT, VMA_MAYWRITE_BIT)) {
+		struct inode *inode = file_inode(file);
+		loff_t from, to;
+		int err;
+
+		from = ((loff_t)desc->pgoff << PAGE_SHIFT);
+		to = min_t(loff_t, i_size_read(inode),
+				from + vma_desc_size(desc));
+		if (EXFAT_I(inode)->valid_size < to) {
+			err = exfat_extend_valid_size(inode, to);
+			if (err)
+				return err;
+		}
+	}
+
 	file_accessed(file);
 	desc->vm_ops = &exfat_file_vm_ops;
 	return 0;
@@ -800,8 +903,48 @@ static ssize_t exfat_splice_read(struct file *in, loff_t *ppos,
 	return filemap_splice_read(in, ppos, pipe, len, flags);
 }
 
+static int exfat_file_open(struct inode *inode, struct file *filp)
+{
+	int err;
+
+	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
+		return -EIO;
+
+	err = generic_file_open(inode, filp);
+	if (err)
+		return err;
+
+	filp->f_mode |= FMODE_CAN_ODIRECT;
+
+	return 0;
+}
+
+static loff_t exfat_file_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file->f_mapping->host;
+
+	switch (whence) {
+	case SEEK_HOLE:
+		inode_lock_shared(inode);
+		offset = iomap_seek_hole(inode, offset, &exfat_iomap_ops);
+		inode_unlock_shared(inode);
+		break;
+	case SEEK_DATA:
+		inode_lock_shared(inode);
+		offset = iomap_seek_data(inode, offset, &exfat_iomap_ops);
+		inode_unlock_shared(inode);
+		break;
+	default:
+		return generic_file_llseek(file, offset, whence);
+	}
+	if (offset < 0)
+		return offset;
+	return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
+}
+
 const struct file_operations exfat_file_operations = {
-	.llseek		= generic_file_llseek,
+	.open		= exfat_file_open,
+	.llseek		= exfat_file_llseek,
 	.read_iter	= exfat_file_read_iter,
 	.write_iter	= exfat_file_write_iter,
 	.unlocked_ioctl = exfat_ioctl,
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 1ea4c740fef9e..8e8d94319c3c2 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -13,13 +13,16 @@
 #include <linux/uio.h>
 #include <linux/random.h>
 #include <linux/iversion.h>
+#include <linux/iomap.h>
 
 #include "exfat_raw.h"
 #include "exfat_fs.h"
+#include "iomap.h"
 
 int __exfat_write_inode(struct inode *inode, int sync)
 {
 	unsigned long long on_disk_size;
+	unsigned long long on_disk_valid_size;
 	struct exfat_dentry *ep, *ep2;
 	struct exfat_entry_set_cache es;
 	struct super_block *sb = inode->i_sb;
@@ -69,22 +72,30 @@ int __exfat_write_inode(struct inode *inode, int sync)
 			     &ep->dentry.file.access_date,
 			     NULL);
 
-	/* File size should be zero if there is no cluster allocated */
-	on_disk_size = i_size_read(inode);
+	/*
+	 * During a DIO write, valid_size is updated eagerly in iomap_end (so
+	 * that concurrent buffered reads see IOMAP_MAPPED) while i_size is
+	 * updated asynchronously in end_io.  The FAT chain was already
+	 * extended to cover ceil(valid_size/cluster_size) clusters.  Use the
+	 * maximum so the on-disk size field always covers the FAT chain,
+	 * preventing fsck from reporting "more clusters are allocated".
+	 */
+	on_disk_size = max_t(unsigned long long, i_size_read(inode),
+			ei->valid_size);
 
 	if (ei->start_clu == EXFAT_EOF_CLUSTER)
 		on_disk_size = 0;
-
-	ep2->dentry.stream.size = cpu_to_le64(on_disk_size);
 	/*
-	 * mmap write does not use exfat_write_end(), valid_size may be
-	 * extended to the sector-aligned length in exfat_get_block().
-	 * So we need to fixup valid_size to the writren length.
+	 * valid_size on disk must reflect only confirmed data (up to i_size)
+	 * and must not exceed on_disk_size.
 	 */
-	if (on_disk_size < ei->valid_size)
-		ep2->dentry.stream.valid_size = ep2->dentry.stream.size;
-	else
-		ep2->dentry.stream.valid_size = cpu_to_le64(ei->valid_size);
+	on_disk_valid_size = min_t(unsigned long long, ei->valid_size,
+			i_size_read(inode));
+	if (ei->start_clu == EXFAT_EOF_CLUSTER)
+		on_disk_valid_size = 0;
+
+	ep2->dentry.stream.size = cpu_to_le64(on_disk_size);
+	ep2->dentry.stream.valid_size = cpu_to_le64(on_disk_valid_size);
 
 	if (on_disk_size) {
 		ep2->dentry.stream.flags = ei->flags;
@@ -123,8 +134,9 @@ void exfat_sync_inode(struct inode *inode)
  * Output: errcode, cluster number
  * *clu = (~0), if it's unable to allocate a new cluster
  */
-static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
-		unsigned int *clu, unsigned int *count, int create)
+int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
+		unsigned int *clu, unsigned int *count, int create,
+		bool *balloc)
 {
 	int ret;
 	unsigned int last_clu;
@@ -135,10 +147,10 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
 	unsigned int local_clu_offset = clu_offset;
 	unsigned int num_to_be_allocated = 0, num_clusters;
 
-	num_clusters = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi);
-
-	if (clu_offset >= num_clusters)
-		num_to_be_allocated = clu_offset - num_clusters + 1;
+	num_clusters = exfat_bytes_to_cluster(sbi, exfat_ondisk_size(inode));
+	if (clu_offset > num_clusters ||
+	    *count > num_clusters - clu_offset)
+		num_to_be_allocated = clu_offset + *count - num_clusters;
 
 	if (!create && (num_to_be_allocated > 0)) {
 		*clu = EXFAT_EOF_CLUSTER;
@@ -181,7 +193,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
 		}
 
 		ret = exfat_alloc_cluster(inode, num_to_be_allocated, &new_clu,
-				inode_needs_sync(inode));
+				inode_needs_sync(inode), true);
 		if (ret)
 			return ret;
 
@@ -215,19 +227,11 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
 		}
 
 		*clu = new_clu.dir;
+		*count = new_clu.size;
 
-		inode->i_blocks += EXFAT_CLU_TO_B(num_to_be_allocated, sbi) >> 9;
-
-		/*
-		 * Move *clu pointer along FAT chains (hole care) because the
-		 * caller of this function expect *clu to be the last cluster.
-		 * This only works when num_to_be_allocated >= 2,
-		 * *clu = (the first cluster of the allocated chain) =>
-		 * (the last cluster of ...)
-		 */
-		if (exfat_cluster_walk(sb, clu, num_to_be_allocated - 1, ei->flags))
-			return -EIO;
-		*count = 1;
+		inode->i_blocks += exfat_cluster_to_sectors(sbi, new_clu.size);
+		if (balloc)
+			*balloc = true;
 	}
 
 	/* hint information */
@@ -237,154 +241,15 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
 	return 0;
 }
 
-static int exfat_get_block(struct inode *inode, sector_t iblock,
-		struct buffer_head *bh_result, int create)
-{
-	struct exfat_inode_info *ei = EXFAT_I(inode);
-	struct super_block *sb = inode->i_sb;
-	struct exfat_sb_info *sbi = EXFAT_SB(sb);
-	unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
-	int err = 0;
-	unsigned long mapped_blocks = 0;
-	unsigned int cluster, sec_offset, count;
-	sector_t last_block;
-	sector_t phys = 0;
-	sector_t valid_blks;
-	loff_t i_size;
-
-	mutex_lock(&sbi->s_lock);
-	i_size = i_size_read(inode);
-	last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size, sb);
-	if (iblock >= last_block && !create)
-		goto done;
-
-	/* Is this block already allocated? */
-	count = EXFAT_B_TO_CLU_ROUND_UP(bh_result->b_size, sbi);
-	err = exfat_map_cluster(inode, iblock >> sbi->sect_per_clus_bits,
-			&cluster, &count, create);
-	if (err) {
-		if (err != -ENOSPC)
-			exfat_fs_error_ratelimit(sb,
-				"failed to bmap (inode : %p iblock : %llu, err : %d)",
-				inode, (unsigned long long)iblock, err);
-		goto unlock_ret;
-	}
-
-	if (cluster == EXFAT_EOF_CLUSTER)
-		goto done;
-
-	/* sector offset in cluster */
-	sec_offset = iblock & (sbi->sect_per_clus - 1);
-
-	phys = exfat_cluster_to_sector(sbi, cluster) + sec_offset;
-	mapped_blocks = ((unsigned long)count << sbi->sect_per_clus_bits) - sec_offset;
-	max_blocks = min(mapped_blocks, max_blocks);
-
-	map_bh(bh_result, sb, phys);
-	if (buffer_delay(bh_result))
-		clear_buffer_delay(bh_result);
-
-	/*
-	 * In most cases, we just need to set bh_result to mapped, unmapped
-	 * or new status as follows:
-	 *  1. i_size == valid_size
-	 *  2. write case (create == 1)
-	 *  3. direct_read (!bh_result->b_folio)
-	 *     -> the unwritten part will be zeroed in exfat_direct_IO()
-	 *
-	 * Otherwise, in the case of buffered read, it is necessary to take
-	 * care the last nested block if valid_size is not equal to i_size.
-	 */
-	if (i_size == ei->valid_size || create || !bh_result->b_folio)
-		valid_blks = EXFAT_B_TO_BLK_ROUND_UP(ei->valid_size, sb);
-	else
-		valid_blks = EXFAT_B_TO_BLK(ei->valid_size, sb);
-
-	/* The range has been fully written, map it */
-	if (iblock + max_blocks < valid_blks)
-		goto done;
-
-	/* The range has been partially written, map the written part */
-	if (iblock < valid_blks) {
-		max_blocks = valid_blks - iblock;
-		goto done;
-	}
-
-	/* The area has not been written, map and mark as new for create case */
-	if (create) {
-		set_buffer_new(bh_result);
-		ei->valid_size = EXFAT_BLK_TO_B(iblock + max_blocks, sb);
-		mark_inode_dirty(inode);
-		goto done;
-	}
-
-	/*
-	 * The area has just one block partially written.
-	 * In that case, we should read and fill the unwritten part of
-	 * a block with zero.
-	 */
-	if (bh_result->b_folio && iblock == valid_blks &&
-	    (ei->valid_size & (sb->s_blocksize - 1))) {
-		loff_t size, pos;
-		void *addr;
-
-		max_blocks = 1;
-
-		/*
-		 * No buffer_head is allocated.
-		 * (1) bmap: It's enough to set blocknr without I/O.
-		 * (2) read: The unwritten part should be filled with zero.
-		 *           If a folio does not have any buffers,
-		 *           let's returns -EAGAIN to fallback to
-		 *           block_read_full_folio() for per-bh IO.
-		 */
-		if (!folio_buffers(bh_result->b_folio)) {
-			err = -EAGAIN;
-			goto done;
-		}
-
-		pos = EXFAT_BLK_TO_B(iblock, sb);
-		size = ei->valid_size - pos;
-		addr = folio_address(bh_result->b_folio) +
-			offset_in_folio(bh_result->b_folio, pos);
-
-		/* Check if bh->b_data points to proper addr in folio */
-		if (bh_result->b_data != addr) {
-			exfat_fs_error_ratelimit(sb,
-					"b_data(%p) != folio_addr(%p)",
-					bh_result->b_data, addr);
-			err = -EINVAL;
-			goto done;
-		}
-
-		/* Read a block */
-		err = bh_read(bh_result, 0);
-		if (err < 0)
-			goto done;
-
-		/* Zero unwritten part of a block */
-		memset(bh_result->b_data + size, 0, bh_result->b_size - size);
-		err = 0;
-		goto done;
-	}
-
-	/*
-	 * The area has not been written, clear mapped for read/bmap cases.
-	 * If so, it will be filled with zero without reading from disk.
-	 */
-	clear_buffer_mapped(bh_result);
-done:
-	bh_result->b_size = EXFAT_BLK_TO_B(max_blocks, sb);
-	if (err < 0)
-		clear_buffer_mapped(bh_result);
-unlock_ret:
-	mutex_unlock(&sbi->s_lock);
-	return err;
-}
-
 static int exfat_read_folio(struct file *file, struct folio *folio)
 {
-	return mpage_read_folio(folio, exfat_get_block);
+	struct iomap_read_folio_ctx ctx = {
+		.cur_folio = folio,
+		.ops = &exfat_iomap_bio_read_ops,
+	};
+
+	iomap_read_folio(&exfat_iomap_ops, &ctx, NULL);
+	return 0;
 }
 
 static void exfat_readahead(struct readahead_control *rac)
@@ -393,6 +258,10 @@ static void exfat_readahead(struct readahead_control *rac)
 	struct inode *inode = mapping->host;
 	struct exfat_inode_info *ei = EXFAT_I(inode);
 	loff_t pos = readahead_pos(rac);
+	struct iomap_read_folio_ctx ctx = {
+		.ops = &exfat_iomap_bio_read_ops,
+		.rac = rac,
+	};
 
 	/* Range cross valid_size, read it page by page. */
 	if (ei->valid_size < i_size_read(inode) &&
@@ -400,115 +269,22 @@ static void exfat_readahead(struct readahead_control *rac)
 	    ei->valid_size < pos + readahead_length(rac))
 		return;
 
-	mpage_readahead(rac, exfat_get_block);
+	iomap_readahead(&exfat_iomap_ops, &ctx, NULL);
 }
 
 static int exfat_writepages(struct address_space *mapping,
 		struct writeback_control *wbc)
 {
-	if (unlikely(exfat_forced_shutdown(mapping->host->i_sb)))
-		return -EIO;
-
-	return mpage_writepages(mapping, wbc, exfat_get_block);
-}
-
-static void exfat_write_failed(struct address_space *mapping, loff_t to)
-{
-	struct inode *inode = mapping->host;
-
-	if (to > i_size_read(inode)) {
-		truncate_pagecache(inode, i_size_read(inode));
-		inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
-		exfat_truncate(inode);
-	}
-}
-
-static int exfat_write_begin(const struct kiocb *iocb,
-			     struct address_space *mapping,
-			     loff_t pos, unsigned int len,
-			     struct folio **foliop, void **fsdata)
-{
-	int ret;
+	struct iomap_writepage_ctx wpc = {
+		.inode		= mapping->host,
+		.wbc		= wbc,
+		.ops		= &exfat_writeback_ops,
+	};
 
 	if (unlikely(exfat_forced_shutdown(mapping->host->i_sb)))
 		return -EIO;
 
-	ret = block_write_begin(mapping, pos, len, foliop, exfat_get_block);
-
-	if (ret < 0)
-		exfat_write_failed(mapping, pos+len);
-
-	return ret;
-}
-
-static int exfat_write_end(const struct kiocb *iocb,
-			   struct address_space *mapping,
-			   loff_t pos, unsigned int len, unsigned int copied,
-			   struct folio *folio, void *fsdata)
-{
-	struct inode *inode = mapping->host;
-	struct exfat_inode_info *ei = EXFAT_I(inode);
-	int err;
-
-	err = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata);
-	if (err < len)
-		exfat_write_failed(mapping, pos+len);
-
-	if (!(err < 0) && pos + err > ei->valid_size) {
-		ei->valid_size = pos + err;
-		mark_inode_dirty(inode);
-	}
-
-	if (!(err < 0) && !(ei->attr & EXFAT_ATTR_ARCHIVE)) {
-		inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
-		ei->attr |= EXFAT_ATTR_ARCHIVE;
-		mark_inode_dirty(inode);
-	}
-
-	return err;
-}
-
-static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
-	struct address_space *mapping = iocb->ki_filp->f_mapping;
-	struct inode *inode = mapping->host;
-	struct exfat_inode_info *ei = EXFAT_I(inode);
-	loff_t pos = iocb->ki_pos;
-	loff_t size = pos + iov_iter_count(iter);
-	int rw = iov_iter_rw(iter);
-	ssize_t ret;
-
-	/*
-	 * Need to use the DIO_LOCKING for avoiding the race
-	 * condition of exfat_get_block() and ->truncate().
-	 */
-	ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block);
-	if (ret < 0) {
-		if (rw == WRITE && ret != -EIOCBQUEUED)
-			exfat_write_failed(mapping, size);
-
-		return ret;
-	}
-
-	size = pos + ret;
-
-	if (rw == WRITE) {
-		/*
-		 * If the block had been partially written before this write,
-		 * ->valid_size will not be updated in exfat_get_block(),
-		 * update it here.
-		 */
-		if (ei->valid_size < size) {
-			ei->valid_size = size;
-			mark_inode_dirty(inode);
-		}
-	} else if (pos < ei->valid_size && ei->valid_size < size) {
-		/* zero the unwritten part in the partially written block */
-		iov_iter_revert(iter, size - ei->valid_size);
-		iov_iter_zero(size - ei->valid_size, iter);
-	}
-
-	return ret;
+	return iomap_writepages(&wpc);
 }
 
 static sector_t exfat_aop_bmap(struct address_space *mapping, sector_t block)
@@ -517,34 +293,22 @@ static sector_t exfat_aop_bmap(struct address_space *mapping, sector_t block)
 
 	/* exfat_get_cluster() assumes the requested blocknr isn't truncated. */
 	down_read(&EXFAT_I(mapping->host)->truncate_lock);
-	blocknr = generic_block_bmap(mapping, block, exfat_get_block);
+	blocknr = iomap_bmap(mapping, block, &exfat_iomap_ops);
 	up_read(&EXFAT_I(mapping->host)->truncate_lock);
 	return blocknr;
 }
 
-/*
- * exfat_block_truncate_page() zeroes out a mapping from file offset `from'
- * up to the end of the block which corresponds to `from'.
- * This is required during truncate to physically zeroout the tail end
- * of that block so it doesn't yield old data if the file is later grown.
- * Also, avoid causing failure from fsx for cases of "data past EOF"
- */
-int exfat_block_truncate_page(struct inode *inode, loff_t from)
-{
-	return block_truncate_page(inode->i_mapping, from, exfat_get_block);
-}
-
 static const struct address_space_operations exfat_aops = {
-	.dirty_folio	= block_dirty_folio,
-	.invalidate_folio = block_invalidate_folio,
-	.read_folio	= exfat_read_folio,
-	.readahead	= exfat_readahead,
-	.writepages	= exfat_writepages,
-	.write_begin	= exfat_write_begin,
-	.write_end	= exfat_write_end,
-	.direct_IO	= exfat_direct_IO,
-	.bmap		= exfat_aop_bmap,
-	.migrate_folio	= buffer_migrate_folio,
+	.read_folio		= exfat_read_folio,
+	.readahead		= exfat_readahead,
+	.writepages		= exfat_writepages,
+	.dirty_folio		= iomap_dirty_folio,
+	.bmap			= exfat_aop_bmap,
+	.migrate_folio		= filemap_migrate_folio,
+	.is_partially_uptodate	= iomap_is_partially_uptodate,
+	.error_remove_folio	= generic_error_remove_folio,
+	.release_folio		= iomap_release_folio,
+	.invalidate_folio	= iomap_invalidate_folio,
 };
 
 static inline unsigned long exfat_hash(loff_t i_pos)
@@ -608,6 +372,7 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
 	ei->flags = info->flags;
 	ei->type = info->type;
 	ei->valid_size = info->valid_size;
+	ei->zeroed_size = info->valid_size;
 
 	ei->version = 0;
 	ei->hint_stat.eidx = 0;
diff --git a/fs/exfat/iomap.c b/fs/exfat/iomap.c
new file mode 100644
index 0000000000000..3ac1eebe997f5
--- /dev/null
+++ b/fs/exfat/iomap.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * iomap callack functions
+ *
+ * Copyright (C) 2026 Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+#include <linux/iomap.h>
+#include <linux/pagemap.h>
+
+#include "exfat_raw.h"
+#include "exfat_fs.h"
+#include "iomap.h"
+
+/*
+ * exfat_file_write_dio_end_io - Direct I/O write completion handler
+ *
+ * Updates i_size if the write extended the file. Called from the dio layer
+ * after I/O completion.
+ */
+static int exfat_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
+		int error, unsigned int flags)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+
+	if (error)
+		return error;
+
+	if (size && i_size_read(inode) < iocb->ki_pos + size) {
+		i_size_write(inode, iocb->ki_pos + size);
+		mark_inode_dirty(inode);
+	}
+
+	return 0;
+}
+
+const struct iomap_dio_ops exfat_write_dio_ops = {
+	.end_io		= exfat_file_write_dio_end_io,
+};
+
+static int __exfat_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+		unsigned int flags, struct iomap *iomap, bool may_alloc)
+{
+	struct super_block *sb = inode->i_sb;
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	struct exfat_inode_info *ei = EXFAT_I(inode);
+	unsigned int cluster, num_clusters;
+	loff_t cluster_offset, cluster_length;
+	int err;
+	bool balloc = false;
+
+	if (!may_alloc) {
+		/* Completely beyond EOF. Treat as hole */
+		if (i_size_read(inode) <= offset) {
+			iomap->type = IOMAP_HOLE;
+			iomap->addr = IOMAP_NULL_ADDR;
+			iomap->offset = offset;
+			iomap->length = length;
+			return 0;
+		}
+
+		/* Clamp length if the requested range goes beyond i_size */
+		if (offset + length > i_size_read(inode))
+			length = round_up(i_size_read(inode),
+					  i_blocksize(inode)) - offset;
+	}
+
+	num_clusters = exfat_bytes_to_cluster_round_up(sbi,
+			offset + length) - exfat_bytes_to_cluster(sbi, offset);
+
+	mutex_lock(&sbi->s_lock);
+	iomap->bdev = inode->i_sb->s_bdev;
+	iomap->offset = offset;
+
+	err = exfat_map_cluster(inode, exfat_bytes_to_cluster(sbi, offset),
+			&cluster, &num_clusters, may_alloc, &balloc);
+	if (err)
+		goto out;
+
+	cluster_offset = exfat_cluster_offset(sbi, offset);
+	cluster_length = exfat_cluster_to_bytes(sbi, num_clusters);
+
+	iomap->length = min_t(loff_t, length, cluster_length - cluster_offset);
+	iomap->addr = exfat_cluster_to_phys_bytes(sbi, cluster) + cluster_offset;
+	iomap->type = IOMAP_MAPPED;
+	iomap->flags = IOMAP_F_MERGED;
+
+	if (may_alloc || flags & IOMAP_ZERO) {
+		if (balloc)
+			iomap->flags |= IOMAP_F_NEW;
+		else if (iomap->offset + iomap->length >= ei->valid_size) {
+			/*
+			 * This is a write that starts at or extends beyond
+			 * the current valid_size. The region between the old
+			 * valid_size and the end of this write needs to be
+			 * zeroed in the page cache to prevent stale data
+			 * exposure (see IOMAP_F_ZERO_TAIL handling in
+			 * __iomap_write_begin()).
+			 */
+			iomap->flags |= IOMAP_F_ZERO_TAIL;
+		}
+	} else {
+		/*
+		 * valid_size is tracked in byte granularity and
+		 * marks the exact boundary between valid data and
+		 * holes (or unwritten space).
+		 *
+		 * When IOMAP_REPORT is set (used by lseek(SEEK_HOLE)
+		 * and SEEK_DATA), we return IOMAP_HOLE. This allows
+		 * iomap_seek_hole_iter() to directly return the
+		 * precise byte position.
+		 *
+		 * For normal I/O paths (without IOMAP_REPORT) we
+		 * return IOMAP_UNWRITTEN so the write path can
+		 * distinguish it from a real hole.
+		 */
+		if (offset >= ei->valid_size) {
+			iomap->type = flags & IOMAP_REPORT ?
+				IOMAP_HOLE : IOMAP_UNWRITTEN;
+		} else if (offset + iomap->length > ei->valid_size) {
+			if (flags & IOMAP_REPORT) {
+				/*
+				 * For SEEK_HOLE/SEEK_DATA, clip the length
+				 * to the exact byte boundary (valid_size).
+				 * This ensures the caller gets the precise
+				 * hole position in byte units.
+				 */
+				iomap->length = ei->valid_size - iomap->offset;
+			} else
+				iomap->length = round_up(ei->valid_size,
+							 i_blocksize(inode)) -
+								iomap->offset;
+		}
+	}
+
+	iomap->flags |= IOMAP_F_MERGED;
+out:
+	mutex_unlock(&sbi->s_lock);
+	return err;
+}
+
+static int exfat_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
+{
+	return __exfat_iomap_begin(inode, offset, length, flags, iomap, false);
+}
+
+static int exfat_write_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
+{
+	return __exfat_iomap_begin(inode, offset, length, flags, iomap, true);
+}
+
+const struct iomap_ops exfat_iomap_ops = {
+	.iomap_begin = exfat_iomap_begin,
+};
+
+/*
+ * exfat_write_iomap_end - Update the state after write
+ *
+ * Extends ->valid_size to cover the newly written range.
+ * Marks the inode dirty if metadata was changed.
+ */
+static int exfat_write_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+		ssize_t written, unsigned int flags, struct iomap *iomap)
+{
+	struct exfat_inode_info *ei = EXFAT_I(inode);
+	bool dirtied = false;
+	loff_t end;
+
+	if (!written)
+		return 0;
+
+	end = pos + written;
+
+	if (ei->valid_size < end) {
+		ei->valid_size = end;
+		if (ei->zeroed_size < end)
+			ei->zeroed_size = end;
+		dirtied = true;
+	}
+
+	if (dirtied || iomap->flags & IOMAP_F_SIZE_CHANGED)
+		mark_inode_dirty(inode);
+
+	return written;
+}
+
+const struct iomap_ops exfat_write_iomap_ops = {
+	.iomap_begin	= exfat_write_iomap_begin,
+	.iomap_end	= exfat_write_iomap_end,
+};
+
+/*
+ * exfat_writeback_range - Map folio during writeback
+ *
+ * Called for each folio during writeback. If the folio falls outside the
+ * current iomap, remaps by calling read_iomap_begin.
+ */
+static ssize_t exfat_writeback_range(struct iomap_writepage_ctx *wpc,
+		struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
+{
+	if (offset < wpc->iomap.offset ||
+	    offset >= wpc->iomap.offset + wpc->iomap.length) {
+		int error;
+
+		error = __exfat_iomap_begin(wpc->inode, offset, len,
+				0, &wpc->iomap, false);
+		if (error)
+			return error;
+	}
+
+	return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
+}
+
+const struct iomap_writeback_ops exfat_writeback_ops = {
+	.writeback_range	= exfat_writeback_range,
+	.writeback_submit	= iomap_ioend_writeback_submit,
+};
+
+/**
+ * exfat_iomap_read_end_io - iomap read bio completion handler for exFAT
+ * @bio: bio that has completed reading
+ *
+ * exfat_iomap_begin() rounds up MAPPED extents to the block boundary of
+ * valid_size. This ensures that any subsequent blocks are treated as
+ * IOMAP_UNWRITTEN, but it also causes the "straddle block" containing
+ * valid_size to be read from disk. The disk data beyond valid_size in
+ * this block is stale and must be zeroed to prevent data leakage.
+ */
+static void exfat_iomap_read_end_io(struct bio *bio)
+{
+	int error = blk_status_to_errno(bio->bi_status);
+	struct folio_iter iter;
+
+	bio_for_each_folio_all(iter, bio) {
+		struct folio *folio = iter.folio;
+		struct exfat_inode_info *ei = EXFAT_I(folio->mapping->host);
+		s64 valid_size;
+		loff_t pos = folio_pos(folio);
+
+		valid_size = ei->valid_size;
+		if (pos + iter.offset < valid_size &&
+		    pos + iter.offset + iter.length > valid_size)
+			folio_zero_segment(folio, offset_in_folio(folio, valid_size),
+					   iter.offset + iter.length);
+
+		iomap_finish_folio_read(folio, iter.offset, iter.length, error);
+	}
+	bio_put(bio);
+}
+
+static void exfat_iomap_bio_submit_read(const struct iomap_iter *iter,
+		struct iomap_read_folio_ctx *ctx)
+{
+	struct bio *bio = ctx->read_ctx;
+
+	bio->bi_end_io = exfat_iomap_read_end_io;
+	submit_bio(bio);
+}
+
+const struct iomap_read_ops exfat_iomap_bio_read_ops = {
+	.read_folio_range	= iomap_bio_read_folio_range,
+	.submit_read		= exfat_iomap_bio_submit_read,
+};
diff --git a/fs/exfat/iomap.h b/fs/exfat/iomap.h
new file mode 100644
index 0000000000000..830388f386f4e
--- /dev/null
+++ b/fs/exfat/iomap.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2026 Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+#ifndef _LINUX_EXFAT_IOMAP_H
+#define _LINUX_EXFAT_IOMAP_H
+
+extern const struct iomap_dio_ops exfat_write_dio_ops;
+extern const struct iomap_ops exfat_iomap_ops;
+extern const struct iomap_ops exfat_write_iomap_ops;
+extern const struct iomap_writeback_ops exfat_writeback_ops;
+extern const struct iomap_read_ops exfat_iomap_bio_read_ops;
+
+#endif /* _LINUX_EXFAT_IOMAP_H */
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 2c5636634b4a4..e69da0f5a111d 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -208,7 +208,7 @@ static int exfat_search_empty_slot(struct super_block *sb,
 	int dentries_per_clu;
 	struct exfat_chain clu;
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
-	int total_entries = EXFAT_CLU_TO_DEN(p_dir->size, sbi);
+	unsigned int total_entries = exfat_cluster_to_dentries(sbi, p_dir->size);
 
 	dentries_per_clu = sbi->dentries_per_clu;
 
@@ -266,7 +266,7 @@ static int exfat_search_empty_slot(struct super_block *sb,
 
 static int exfat_check_max_dentries(struct inode *inode)
 {
-	if (EXFAT_B_TO_DEN(i_size_read(inode)) >= MAX_EXFAT_DENTRIES) {
+	if (exfat_bytes_to_dentries(i_size_read(inode)) >= MAX_EXFAT_DENTRIES) {
 		/*
 		 * exFAT spec allows a dir to grow up to 8388608(256MB)
 		 * dentries
@@ -314,7 +314,8 @@ int exfat_find_empty_entry(struct inode *inode,
 	}
 
 	exfat_chain_set(p_dir, ei->start_clu,
-			EXFAT_B_TO_CLU(i_size_read(inode), sbi), ei->flags);
+			exfat_bytes_to_cluster(sbi, i_size_read(inode)),
+			ei->flags);
 
 	while ((dentry = exfat_search_empty_slot(sb, &hint_femp, p_dir,
 					num_entries, es)) < 0) {
@@ -340,7 +341,7 @@ int exfat_find_empty_entry(struct inode *inode,
 		}
 
 		/* allocate a cluster */
-		ret = exfat_alloc_cluster(inode, 1, &clu, IS_DIRSYNC(inode));
+		ret = exfat_alloc_cluster(inode, 1, &clu, IS_DIRSYNC(inode), false);
 		if (ret)
 			return ret;
 
@@ -375,7 +376,7 @@ int exfat_find_empty_entry(struct inode *inode,
 
 		hint_femp.cur.size++;
 		p_dir->size++;
-		size = EXFAT_CLU_TO_B(p_dir->size, sbi);
+		size = exfat_cluster_to_bytes(sbi, p_dir->size);
 
 		/* directory inode should be updated in here */
 		i_size_write(inode, size);
@@ -604,7 +605,7 @@ static int exfat_find(struct inode *dir, const struct qstr *qname,
 		return ret;
 
 	exfat_chain_set(&cdir, ei->start_clu,
-		EXFAT_B_TO_CLU(i_size_read(dir), sbi), ei->flags);
+		exfat_bytes_to_cluster(sbi, i_size_read(dir)), ei->flags);
 
 	/* check the validation of hint_stat and initialize it if required */
 	if (ei->version != (inode_peek_iversion_raw(dir) & 0xffffffff)) {
@@ -681,7 +682,7 @@ static int exfat_find(struct inode *dir, const struct qstr *qname,
 		return -EIO;
 	}
 
-	if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) {
+	if (unlikely(exfat_bytes_to_cluster_round_up(sbi, info->size) > sbi->used_clusters)) {
 		exfat_fs_error(sb, "data size is invalid(%lld)", info->size);
 		return -EIO;
 	}
@@ -695,7 +696,8 @@ static int exfat_find(struct inode *dir, const struct qstr *qname,
 
 	if (info->type == TYPE_DIR) {
 		exfat_chain_set(&cdir, info->start_clu,
-				EXFAT_B_TO_CLU(info->size, sbi), info->flags);
+				exfat_bytes_to_cluster(sbi, info->size),
+				info->flags);
 		count = exfat_count_dir_entries(sb, &cdir);
 		if (count < 0)
 			return -EIO;
@@ -705,71 +707,44 @@ static int exfat_find(struct inode *dir, const struct qstr *qname,
 	return 0;
 }
 
-static int exfat_d_anon_disconn(struct dentry *dentry)
-{
-	return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED);
-}
-
 static struct dentry *exfat_lookup(struct inode *dir, struct dentry *dentry,
 		unsigned int flags)
 {
 	struct super_block *sb = dir->i_sb;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	struct dentry *alias;
 	struct exfat_dir_entry info;
 	int err;
 	loff_t i_pos;
-	mode_t i_mode;
 
 	mutex_lock(&EXFAT_SB(sb)->s_lock);
 	err = exfat_find(dir, &dentry->d_name, &info);
 	if (err) {
-		if (err == -ENOENT) {
-			inode = NULL;
-			goto out;
-		}
-		goto unlock;
+		if (unlikely(err != -ENOENT))
+			inode = ERR_PTR(err);
+		goto out;
 	}
 
 	i_pos = exfat_make_i_pos(&info);
 	inode = exfat_build_inode(sb, &info, i_pos);
-	err = PTR_ERR_OR_ZERO(inode);
-	if (err)
-		goto unlock;
+	if (IS_ERR(inode) || S_ISDIR(inode->i_mode))
+		goto out;
 
-	i_mode = inode->i_mode;
 	alias = d_find_alias(inode);
 
 	/*
 	 * Checking "alias->d_parent == dentry->d_parent" to make sure
 	 * FS is not corrupted (especially double linked dir).
 	 */
-	if (alias && alias->d_parent == dentry->d_parent &&
-			!exfat_d_anon_disconn(alias)) {
-
+	if (alias && alias->d_parent == dentry->d_parent) {
 		/*
-		 * Unhashed alias is able to exist because of revalidate()
-		 * called by lookup_fast. You can easily make this status
-		 * by calling create and lookup concurrently
-		 * In such case, we reuse an alias instead of new dentry
+		 * This inode has a hashed alias dentry with different
+		 * name. This means, the user did ->lookup() by an
+		 * another name (longname vs 8.3 alias of it) in past.
+		 *
+		 * Switch to new one for reason of locality if possible.
 		 */
-		if (d_unhashed(alias)) {
-			WARN_ON(alias->d_name.hash_len !=
-				dentry->d_name.hash_len);
-			exfat_info(sb, "rehashed a dentry(%p) in read lookup",
-				   alias);
-			d_drop(dentry);
-			d_rehash(alias);
-		} else if (!S_ISDIR(i_mode)) {
-			/*
-			 * This inode has non anonymous-DCACHE_DISCONNECTED
-			 * dentry. This means, the user did ->lookup() by an
-			 * another name (longname vs 8.3 alias of it) in past.
-			 *
-			 * Switch to new one for reason of locality if possible.
-			 */
-			d_move(alias, dentry);
-		}
+		d_move(alias, dentry);
 		iput(inode);
 		mutex_unlock(&EXFAT_SB(sb)->s_lock);
 		return alias;
@@ -781,9 +756,6 @@ out:
 		exfat_d_version_set(dentry, inode_query_iversion(dir));
 
 	return d_splice_alias(inode, dentry);
-unlock:
-	mutex_unlock(&EXFAT_SB(sb)->s_lock);
-	return ERR_PTR(err);
 }
 
 /* remove an entry, BUT don't truncate */
@@ -951,7 +923,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
 	}
 
 	exfat_chain_set(&clu_to_free, ei->start_clu,
-		EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi), ei->flags);
+		exfat_bytes_to_cluster_round_up(sbi, i_size_read(inode)), ei->flags);
 
 	err = exfat_check_dir_empty(sb, &clu_to_free);
 	if (err) {
@@ -1158,8 +1130,8 @@ static int __exfat_rename(struct inode *old_parent_inode,
 
 			new_clu.dir = new_ei->start_clu;
 			new_clu.size =
-				EXFAT_B_TO_CLU_ROUND_UP(i_size_read(new_inode),
-				sbi);
+				exfat_bytes_to_cluster_round_up(sbi,
+						i_size_read(new_inode));
 			new_clu.flags = new_ei->flags;
 
 			ret = exfat_check_dir_empty(sb, &new_clu);
@@ -1203,8 +1175,8 @@ static int __exfat_rename(struct inode *old_parent_inode,
 			struct exfat_chain new_clu_to_free;
 
 			exfat_chain_set(&new_clu_to_free, new_ei->start_clu,
-				EXFAT_B_TO_CLU_ROUND_UP(i_size_read(new_inode),
-				sbi), new_ei->flags);
+				exfat_bytes_to_cluster_round_up(sbi, i_size_read(new_inode)),
+				new_ei->flags);
 
 			if (exfat_free_cluster(new_inode, &new_clu_to_free)) {
 				/* just set I/O error only */
diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c
index 57db08a5271cf..055447edcf9a6 100644
--- a/fs/exfat/nls.c
+++ b/fs/exfat/nls.c
@@ -769,13 +769,18 @@ int exfat_create_upcase_table(struct super_block *sb)
 
 			tbl_clu  = le32_to_cpu(ep->dentry.upcase.start_clu);
 			tbl_size = le64_to_cpu(ep->dentry.upcase.size);
-
-			sector = exfat_cluster_to_sector(sbi, tbl_clu);
-			num_sectors = ((tbl_size - 1) >> blksize_bits) + 1;
-			ret = exfat_load_upcase_table(sb, sector, num_sectors,
-				le32_to_cpu(ep->dentry.upcase.checksum));
-
+			if (tbl_size) {
+				sector = exfat_cluster_to_sector(sbi, tbl_clu);
+				num_sectors = ((tbl_size - 1) >> blksize_bits) + 1;
+				ret = exfat_load_upcase_table(sb, sector, num_sectors,
+					le32_to_cpu(ep->dentry.upcase.checksum));
+			} else {
+				exfat_fs_error(sb,
+					       "bad upcase table size (0 bytes). Please run fsck");
+				ret = -EINVAL;
+			}
 			brelse(bh);
+
 			if (ret && ret != -EIO) {
 				/* free memory from exfat_load_upcase_table call */
 				exfat_free_upcase_table(sbi);
@@ -790,6 +795,8 @@ int exfat_create_upcase_table(struct super_block *sb)
 			return -EIO;
 	}
 
+	exfat_fs_error(sb, "no upcase table entry. Please run fsck");
+
 load_default:
 	/* load default upcase table */
 	return exfat_load_default_upcase_table(sb);
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 95d87e2d7717f..388db271c6bf5 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -369,7 +369,7 @@ static int exfat_read_root(struct inode *inode, struct exfat_chain *root_clu)
 	ei->hint_stat.clu = sbi->root_dir;
 	ei->hint_femp.eidx = EXFAT_HINT_NONE;
 
-	i_size_write(inode, EXFAT_CLU_TO_B(root_clu->size, sbi));
+	i_size_write(inode, exfat_cluster_to_bytes(sbi, root_clu->size));
 
 	num_subdirs = exfat_count_dir_entries(sb, root_clu);
 	if (num_subdirs < 0)
@@ -499,6 +499,7 @@ static int exfat_read_boot_sector(struct super_block *sb)
 	if (p_boot->num_fats == 2)
 		sbi->FAT2_start_sector += sbi->num_FAT_sectors;
 	sbi->data_start_sector = le32_to_cpu(p_boot->clu_offset);
+	sbi->data_start_bytes = sbi->data_start_sector << p_boot->sect_size_bits;
 	sbi->num_sectors = le64_to_cpu(p_boot->vol_length);
 	/* because the cluster index starts with 2 */
 	sbi->num_clusters = le32_to_cpu(p_boot->clu_count) +
@@ -538,7 +539,7 @@ static int exfat_read_boot_sector(struct super_block *sb)
 	 * machines.
 	 */
 	sb->s_maxbytes = min(MAX_LFS_FILESIZE,
-			     EXFAT_CLU_TO_B((loff_t)EXFAT_MAX_NUM_CLUSTER, sbi));
+			     exfat_cluster_to_bytes(sbi, (loff_t)EXFAT_MAX_NUM_CLUSTER));
 
 	/* check logical sector size */
 	if (exfat_calibrate_blocksize(sb, 1 << p_boot->sect_size_bits))
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d7b648421a70f..d6451c4208d49 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -836,6 +836,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter,
 				return -EIO;
 			folio_zero_segments(folio, poff, from, to, poff + plen);
 		} else {
+			const struct iomap *iomap = iomap_iter_srcmap(iter);
 			int status;
 
 			if (iter->flags & IOMAP_NOWAIT)
@@ -853,6 +854,9 @@ static int __iomap_write_begin(const struct iomap_iter *iter,
 						  len, status, GFP_NOFS);
 			if (status)
 				return status;
+
+			if (iomap->flags & IOMAP_F_ZERO_TAIL)
+				folio_zero_segment(folio, to, poff + plen);
 		}
 		iomap_set_range_uptodate(folio, poff, plen);
 	} while ((block_start += plen) < block_end);
@@ -1058,7 +1062,6 @@ static bool iomap_write_end_inline(const struct iomap_iter *iter,
 	void *addr;
 
 	WARN_ON_ONCE(!folio_test_uptodate(folio));
-	BUG_ON(!iomap_inline_data_valid(iomap));
 
 	if (WARN_ON_ONCE(!iomap->inline_data))
 		return false;
@@ -1543,6 +1546,8 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
 		size_t offset;
 		bool ret;
 
+		balance_dirty_pages_ratelimited(iter->inode->i_mapping);
+
 		bytes = min_t(u64, SIZE_MAX, bytes);
 		status = iomap_write_begin(iter, write_ops, &folio, &offset,
 				&bytes);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index b36ee619cdcdd..b485e3b191daf 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -69,7 +69,7 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter,
 
 	/* Sync dio can't be polled reliably */
 	if ((iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(iocb)) {
-		bio_set_polled(bio, iocb);
+		bio->bi_opf |= REQ_POLLED;
 		WRITE_ONCE(iocb->private, bio);
 	}
 
@@ -603,9 +603,6 @@ static int iomap_dio_inline_iter(struct iomap_iter *iomi, struct iomap_dio *dio)
 	if (WARN_ON_ONCE(!inline_data))
 		return -EIO;
 
-	if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap)))
-		return -EIO;
-
 	if (dio->flags & IOMAP_DIO_WRITE) {
 		loff_t size = iomi->inode->i_size;
 
diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c
index c04796f6e57fa..e4a29829591a7 100644
--- a/fs/iomap/iter.c
+++ b/fs/iomap/iter.c
@@ -6,17 +6,13 @@
 #include <linux/iomap.h>
 #include "trace.h"
 
-static inline void iomap_iter_reset_iomap(struct iomap_iter *iter)
+static inline void iomap_iter_clean_fbatch(struct iomap_iter *iter)
 {
 	if (iter->iomap.flags & IOMAP_F_FOLIO_BATCH) {
 		folio_batch_release(iter->fbatch);
 		folio_batch_reinit(iter->fbatch);
 		iter->iomap.flags &= ~IOMAP_F_FOLIO_BATCH;
 	}
-
-	iter->status = 0;
-	memset(&iter->iomap, 0, sizeof(iter->iomap));
-	memset(&iter->srcmap, 0, sizeof(iter->srcmap));
 }
 
 /* Advance the current iterator position and decrement the remaining length */
@@ -102,10 +98,14 @@ int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops)
 		ret = 0;
 	else
 		ret = 1;
-	iomap_iter_reset_iomap(iter);
+	iomap_iter_clean_fbatch(iter);
+	iter->status = 0;
 	if (ret <= 0)
 		return ret;
 
+	memset(&iter->iomap, 0, sizeof(iter->iomap));
+	memset(&iter->srcmap, 0, sizeof(iter->srcmap));
+
 begin:
 	ret = ops->iomap_begin(iter->inode, iter->pos, iter->len, iter->flags,
 			       &iter->iomap, &iter->srcmap);
author	Mark Brown <broonie@kernel.org>	2026-05-29 12:27:15 +0100
committer	Mark Brown <broonie@kernel.org>	2026-05-29 12:27:15 +0100
commit	f945eab143d9c5a10b1ec5d1d1d743df59dfcc9e (patch)
tree	0dd9076182b1dc0bbae8813d02afea74a29e457e /fs
parent	fa872176b6ab36906ebb0e476122295f1446cf1f (diff)
parent	8cf22f18dc8bed9d3924336b312684edd44e43f1 (diff)
download	linux-next-history-f945eab143d9c5a10b1ec5d1d1d743df59dfcc9e.tar.gz