aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
authorMark Brown <broonie@kernel.org>2026-05-29 12:27:15 +0100
committerMark Brown <broonie@kernel.org>2026-05-29 12:27:15 +0100
commitf945eab143d9c5a10b1ec5d1d1d743df59dfcc9e (patch)
tree0dd9076182b1dc0bbae8813d02afea74a29e457e /fs
parentfa872176b6ab36906ebb0e476122295f1446cf1f (diff)
parent8cf22f18dc8bed9d3924336b312684edd44e43f1 (diff)
downloadlinux-next-history-f945eab143d9c5a10b1ec5d1d1d743df59dfcc9e.tar.gz
Merge branch 'dev' of https://git.kernel.org/pub/scm/linux/kernel/git/linkinjeon/exfat.git
Diffstat (limited to 'fs')
-rw-r--r--fs/exfat/Kconfig2
-rw-r--r--fs/exfat/Makefile2
-rw-r--r--fs/exfat/balloc.c2
-rw-r--r--fs/exfat/dir.c54
-rw-r--r--fs/exfat/exfat_fs.h140
-rw-r--r--fs/exfat/fatent.c30
-rw-r--r--fs/exfat/file.c261
-rw-r--r--fs/exfat/inode.c363
-rw-r--r--fs/exfat/iomap.c265
-rw-r--r--fs/exfat/iomap.h15
-rw-r--r--fs/exfat/namei.c84
-rw-r--r--fs/exfat/nls.c19
-rw-r--r--fs/exfat/super.c5
-rw-r--r--fs/iomap/buffered-io.c7
-rw-r--r--fs/iomap/direct-io.c5
-rw-r--r--fs/iomap/iter.c12
16 files changed, 757 insertions, 509 deletions
diff --git a/fs/exfat/Kconfig b/fs/exfat/Kconfig
index cbeca8e44d9b3..1fcb10c8d7bc9 100644
--- a/fs/exfat/Kconfig
+++ b/fs/exfat/Kconfig
@@ -4,7 +4,7 @@ config EXFAT_FS
tristate "exFAT filesystem support"
select BUFFER_HEAD
select NLS
- select LEGACY_DIRECT_IO
+ select FS_IOMAP
help
This allows you to mount devices formatted with the exFAT file system.
exFAT is typically used on SD-Cards or USB sticks.
diff --git a/fs/exfat/Makefile b/fs/exfat/Makefile
index ed51926a49717..e06bf85870ae7 100644
--- a/fs/exfat/Makefile
+++ b/fs/exfat/Makefile
@@ -5,4 +5,4 @@
obj-$(CONFIG_EXFAT_FS) += exfat.o
exfat-y := inode.o namei.o dir.o super.o fatent.o cache.o nls.o misc.o \
- file.o balloc.o
+ file.o balloc.o iomap.o
diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
index 625f2f14d4fe0..e66ebf899778c 100644
--- a/fs/exfat/balloc.c
+++ b/fs/exfat/balloc.c
@@ -112,7 +112,7 @@ static int exfat_allocate_bitmap(struct super_block *sb,
}
if (exfat_test_bitmap_range(sb, sbi->map_clu,
- EXFAT_B_TO_CLU_ROUND_UP(map_size, sbi)) == false)
+ exfat_bytes_to_cluster_round_up(sbi, map_size)) == false)
goto err_out;
return 0;
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index ac008ccaa97de..8b8f6bc0c233c 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -76,7 +76,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
struct super_block *sb = inode->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
struct exfat_inode_info *ei = EXFAT_I(inode);
- unsigned int dentry = EXFAT_B_TO_DEN(*cpos) & 0xFFFFFFFF;
+ unsigned int dentry = exfat_bytes_to_dentries(*cpos) & 0xFFFFFFFF;
struct buffer_head *bh;
/* check if the given file ID is opened */
@@ -84,13 +84,13 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
return -EPERM;
exfat_chain_set(&dir, ei->start_clu,
- EXFAT_B_TO_CLU(i_size_read(inode), sbi), ei->flags);
+ exfat_bytes_to_cluster(sbi, i_size_read(inode)), ei->flags);
dentries_per_clu = sbi->dentries_per_clu;
- max_dentries = (unsigned int)min_t(u64, MAX_EXFAT_DENTRIES,
- (u64)EXFAT_CLU_TO_DEN(sbi->num_clusters, sbi));
+ max_dentries = min(MAX_EXFAT_DENTRIES,
+ exfat_cluster_to_dentries(sbi, sbi->num_clusters));
- clu_offset = EXFAT_DEN_TO_CLU(dentry, sbi);
+ clu_offset = exfat_dentries_to_cluster(sbi, dentry);
exfat_chain_dup(&clu, &dir);
if (clu.flags == ALLOC_FAT_CHAIN) {
@@ -147,10 +147,10 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
dir_entry->dir = clu;
brelse(bh);
- ei->hint_bmap.off = EXFAT_DEN_TO_CLU(dentry, sbi);
+ ei->hint_bmap.off = exfat_dentries_to_cluster(sbi, dentry);
ei->hint_bmap.clu = clu.dir;
- *cpos = EXFAT_DEN_TO_B(dentry + 1 + num_ext);
+ *cpos = exfat_dentries_to_bytes(dentry + 1 + num_ext);
return 0;
}
@@ -160,7 +160,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
out:
dir_entry->namebuf.lfn[0] = '\0';
- *cpos = EXFAT_DEN_TO_B(dentry);
+ *cpos = exfat_dentries_to_bytes(dentry);
return 0;
}
@@ -295,7 +295,7 @@ int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu)
exfat_chain_set(clu, EXFAT_EOF_CLUSTER, 0, ALLOC_NO_FAT_CHAIN);
- ret = exfat_alloc_cluster(inode, 1, clu, IS_DIRSYNC(inode));
+ ret = exfat_alloc_cluster(inode, 1, clu, IS_DIRSYNC(inode), false);
if (ret)
return ret;
@@ -465,7 +465,7 @@ static void exfat_free_benign_secondary_clusters(struct inode *inode,
return;
exfat_chain_set(&dir, start_clu,
- EXFAT_B_TO_CLU_ROUND_UP(size, EXFAT_SB(sb)),
+ exfat_bytes_to_cluster_round_up(EXFAT_SB(sb), size),
flags);
exfat_free_cluster(inode, &dir);
}
@@ -556,10 +556,11 @@ static int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir
unsigned int off, clu = 0;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
- off = EXFAT_DEN_TO_B(entry);
+ off = exfat_dentries_to_bytes(entry);
clu = p_dir->dir;
- ret = exfat_cluster_walk(sb, &clu, EXFAT_B_TO_CLU(off, sbi), p_dir->flags);
+ ret = exfat_cluster_walk(sb, &clu, exfat_bytes_to_cluster(sbi, off),
+ p_dir->flags);
if (ret)
return ret;
@@ -567,7 +568,7 @@ static int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir
exfat_fs_error(sb,
"unexpected early break in cluster chain (clu : %u, len : %d)",
p_dir->dir,
- EXFAT_B_TO_CLU(off, sbi));
+ exfat_bytes_to_cluster(sbi, off));
return -EIO;
}
@@ -577,13 +578,13 @@ static int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir
}
/* byte offset in cluster */
- off = EXFAT_CLU_OFFSET(off, sbi);
+ off = exfat_cluster_offset(sbi, off);
/* byte offset in sector */
- *offset = EXFAT_BLK_OFFSET(off, sb);
+ *offset = exfat_block_offset(sb, off);
/* sector offset in cluster */
- *sector = EXFAT_B_TO_BLK(off, sb);
+ *sector = exfat_bytes_to_block(sb, off);
*sector += exfat_cluster_to_sector(sbi, clu);
return 0;
}
@@ -593,7 +594,7 @@ struct exfat_dentry *exfat_get_dentry(struct super_block *sb,
{
struct exfat_sb_info *sbi = EXFAT_SB(sb);
unsigned int sect_per_clus = sbi->sect_per_clus;
- unsigned int dentries_per_page = EXFAT_B_TO_DEN(PAGE_SIZE);
+ unsigned int dentries_per_page = exfat_bytes_to_dentries(PAGE_SIZE);
int off;
sector_t sec;
@@ -672,8 +673,8 @@ struct exfat_dentry *exfat_get_dentry_cached(
struct exfat_entry_set_cache *es, int num)
{
int off = es->start_off + num * DENTRY_SIZE;
- struct buffer_head *bh = es->bh[EXFAT_B_TO_BLK(off, es->sb)];
- char *p = bh->b_data + EXFAT_BLK_OFFSET(off, es->sb);
+ struct buffer_head *bh = es->bh[exfat_bytes_to_block(es->sb, off)];
+ char *p = bh->b_data + exfat_block_offset(es->sb, off);
return (struct exfat_dentry *)p;
}
@@ -741,7 +742,7 @@ static int __exfat_get_dentry_set(struct exfat_entry_set_cache *es,
es->num_entries = num_entries;
- num_bh = EXFAT_B_TO_BLK_ROUND_UP(off + num_entries * DENTRY_SIZE, sb);
+ num_bh = exfat_bytes_to_block_round_up(sb, off + num_entries * DENTRY_SIZE);
if (num_bh > ARRAY_SIZE(es->__bh)) {
es->bh = kmalloc_objs(*es->bh, num_bh, GFP_NOFS);
if (!es->bh) {
@@ -830,7 +831,7 @@ static int exfat_validate_empty_dentry_set(struct exfat_entry_set_cache *es)
err_used_follow_unused:
off = es->start_off + (i << DENTRY_SIZE_BITS);
- bh = es->bh[EXFAT_B_TO_BLK(off, es->sb)];
+ bh = es->bh[exfat_bytes_to_block(es->sb, off)];
exfat_fs_error(es->sb,
"in sector %lld, dentry %d should be unused, but 0x%x",
@@ -839,7 +840,8 @@ err_used_follow_unused:
return -EIO;
count_skip_entries:
- es->num_entries = EXFAT_B_TO_DEN(EXFAT_BLK_TO_B(es->num_bh, es->sb) - es->start_off);
+ es->num_entries =
+ exfat_bytes_to_dentries(exfat_block_to_bytes(es->sb, es->num_bh) - es->start_off);
for (; i < es->num_entries; i++) {
ep = exfat_get_dentry_cached(es, i);
if (IS_EXFAT_DELETED(ep->type))
@@ -892,7 +894,7 @@ static inline void exfat_set_empty_hint(struct exfat_inode_info *ei,
{
if (ei->hint_femp.eidx == EXFAT_HINT_NONE ||
ei->hint_femp.eidx > dentry) {
- int total_entries = EXFAT_B_TO_DEN(i_size_read(&ei->vfs_inode));
+ int total_entries = exfat_bytes_to_dentries(i_size_read(&ei->vfs_inode));
if (candi_empty->count == 0) {
candi_empty->cur = *clu;
@@ -1027,12 +1029,12 @@ rewind:
continue;
}
- brelse(bh);
if (entry_type == TYPE_EXTEND) {
unsigned short entry_uniname[16], unichar;
if (step != DIRENT_STEP_NAME ||
name_len >= MAX_NAME_LENGTH) {
+ brelse(bh);
step = DIRENT_STEP_FILE;
continue;
}
@@ -1043,6 +1045,7 @@ rewind:
uniname += EXFAT_FILE_NAME_LEN;
len = exfat_extract_uni_name(ep, entry_uniname);
+ brelse(bh);
name_len += len;
unichar = *(uniname+len);
@@ -1061,6 +1064,7 @@ rewind:
continue;
}
+ brelse(bh);
if (entry_type &
(TYPE_CRITICAL_SEC | TYPE_BENIGN_SEC)) {
if (step == DIRENT_STEP_SECD) {
@@ -1215,7 +1219,7 @@ static int exfat_get_volume_label_dentry(struct super_block *sb,
es->bh = es->__bh;
es->bh[0] = bh;
es->num_bh = 1;
- es->start_off = EXFAT_DEN_TO_B(i) % sb->s_blocksize;
+ es->start_off = exfat_dentries_to_bytes(i) % sb->s_blocksize;
return 0;
}
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 89ef5368277f8..174728904dc10 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -12,6 +12,7 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <uapi/linux/exfat.h>
+#include <linux/buffer_head.h>
#define EXFAT_ROOT_INO 1
@@ -85,38 +86,6 @@ enum {
<< (PAGE_SHIFT - (sb)->s_blocksize_bits))
/*
- * helpers for cluster size to byte conversion.
- */
-#define EXFAT_CLU_TO_B(b, sbi) ((b) << (sbi)->cluster_size_bits)
-#define EXFAT_B_TO_CLU(b, sbi) ((b) >> (sbi)->cluster_size_bits)
-#define EXFAT_B_TO_CLU_ROUND_UP(b, sbi) \
- (((b - 1) >> (sbi)->cluster_size_bits) + 1)
-#define EXFAT_CLU_OFFSET(off, sbi) ((off) & ((sbi)->cluster_size - 1))
-
-/*
- * helpers for block size to byte conversion.
- */
-#define EXFAT_BLK_TO_B(b, sb) ((b) << (sb)->s_blocksize_bits)
-#define EXFAT_B_TO_BLK(b, sb) ((b) >> (sb)->s_blocksize_bits)
-#define EXFAT_B_TO_BLK_ROUND_UP(b, sb) \
- (((b - 1) >> (sb)->s_blocksize_bits) + 1)
-#define EXFAT_BLK_OFFSET(off, sb) ((off) & ((sb)->s_blocksize - 1))
-
-/*
- * helpers for block size to dentry size conversion.
- */
-#define EXFAT_B_TO_DEN(b) ((b) >> DENTRY_SIZE_BITS)
-#define EXFAT_DEN_TO_B(b) ((b) << DENTRY_SIZE_BITS)
-
-/*
- * helpers for cluster size to dentry size conversion.
- */
-#define EXFAT_CLU_TO_DEN(clu, sbi) \
- ((clu) << ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS))
-#define EXFAT_DEN_TO_CLU(dentry, sbi) \
- ((dentry) >> ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS))
-
-/*
* helpers for fat entry.
*/
#define FAT_ENT_SIZE (4)
@@ -149,7 +118,7 @@ enum {
* The 608 bytes are in 3 sectors at most (even 512 Byte sector).
*/
#define DIR_CACHE_SIZE \
- (DIV_ROUND_UP(EXFAT_DEN_TO_B(ES_MAX_ENTRY_NUM), SECTOR_SIZE) + 1)
+ (DIV_ROUND_UP(ES_MAX_ENTRY_NUM << DENTRY_SIZE_BITS, SECTOR_SIZE) + 1)
/* Superblock flags */
#define EXFAT_FLAGS_SHUTDOWN 1
@@ -259,6 +228,7 @@ struct exfat_sb_info {
unsigned long long FAT1_start_sector; /* FAT1 start sector */
unsigned long long FAT2_start_sector; /* FAT2 start sector */
unsigned long long data_start_sector; /* data area start sector */
+ unsigned long long data_start_bytes;
unsigned int num_FAT_sectors; /* num of FAT sectors */
unsigned int root_dir; /* root dir cluster */
unsigned int dentries_per_clu; /* num of dentries per cluster */
@@ -324,6 +294,8 @@ struct exfat_inode_info {
/* on-disk position of directory entry or 0 */
loff_t i_pos;
loff_t valid_size;
+ /* page-aligned size that has been zeroed out for mmap */
+ loff_t zeroed_size;
/* hash by i_location */
struct hlist_node i_hash_fat;
/* protect bmap against truncate */
@@ -432,6 +404,101 @@ static inline loff_t exfat_ondisk_size(const struct inode *inode)
return ((loff_t)inode->i_blocks) << 9;
}
+static inline loff_t exfat_cluster_to_phys_bytes(struct exfat_sb_info *sbi,
+ unsigned int clus)
+{
+ return ((loff_t)(clus - EXFAT_RESERVED_CLUSTERS) << sbi->cluster_size_bits) +
+ sbi->data_start_bytes;
+}
+
+/*
+ * helpers for cluster size to byte conversion.
+ */
+static inline loff_t exfat_cluster_to_bytes(struct exfat_sb_info *sbi,
+ u32 nr_clusters)
+{
+ return (loff_t)nr_clusters << sbi->cluster_size_bits;
+}
+
+static inline blkcnt_t exfat_cluster_to_sectors(struct exfat_sb_info *sbi,
+ u32 nr_clusters)
+{
+ return (blkcnt_t)nr_clusters << (sbi->cluster_size_bits - 9);
+}
+
+static inline u32 exfat_bytes_to_cluster(struct exfat_sb_info *sbi, loff_t size)
+{
+ return (u32)(size >> sbi->cluster_size_bits);
+}
+
+static inline u32 exfat_bytes_to_cluster_round_up(struct exfat_sb_info *sbi,
+ loff_t size)
+{
+ if (size <= 0)
+ return 0;
+ return (u32)((size - 1) >> sbi->cluster_size_bits) + 1;
+}
+
+static inline u32 exfat_cluster_offset(struct exfat_sb_info *sbi, loff_t off)
+{
+ return off & (sbi->cluster_size - 1);
+}
+
+/*
+ * helpers for block size to byte conversion.
+ */
+static inline loff_t exfat_block_to_bytes(struct super_block *sb,
+ sector_t block)
+{
+ return (loff_t)block << sb->s_blocksize_bits;
+}
+
+static inline sector_t exfat_bytes_to_block(struct super_block *sb, loff_t size)
+{
+ return (sector_t)(size >> sb->s_blocksize_bits);
+}
+
+static inline sector_t exfat_bytes_to_block_round_up(struct super_block *sb,
+ loff_t size)
+{
+ if (size <= 0)
+ return 0;
+ return (sector_t)(((size - 1) >> sb->s_blocksize_bits) + 1);
+}
+
+static inline u32 exfat_block_offset(struct super_block *sb, loff_t off)
+{
+ return (u32)(off & (sb->s_blocksize - 1));
+}
+
+/*
+ * helpers for block size to dentry size conversion.
+ */
+static inline u32 exfat_bytes_to_dentries(loff_t b)
+{
+ return (u32)(b >> DENTRY_SIZE_BITS);
+}
+
+static inline u32 exfat_dentries_to_bytes(u32 dentry)
+{
+ return dentry << DENTRY_SIZE_BITS;
+}
+
+/*
+ * helpers for cluster size to dentry size conversion.
+ */
+static inline u32 exfat_cluster_to_dentries(struct exfat_sb_info *sbi,
+ u32 nr_clusters)
+{
+ return nr_clusters << (sbi->cluster_size_bits - DENTRY_SIZE_BITS);
+}
+
+static inline u32 exfat_dentries_to_cluster(struct exfat_sb_info *sbi,
+ u32 dentry)
+{
+ return dentry >> (sbi->cluster_size_bits - DENTRY_SIZE_BITS);
+}
+
/* super.c */
int exfat_set_volume_dirty(struct super_block *sb);
int exfat_clear_volume_dirty(struct super_block *sb);
@@ -441,7 +508,7 @@ int exfat_clear_volume_dirty(struct super_block *sb);
exfat_cluster_walk(sb, (pclu), 1, ALLOC_FAT_CHAIN)
int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
- struct exfat_chain *p_chain, bool sync_bmap);
+ struct exfat_chain *p_chain, bool sync_bmap, bool contig);
int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain);
int exfat_ent_get(struct super_block *sb, unsigned int loc,
unsigned int *content, struct buffer_head **last);
@@ -490,7 +557,6 @@ int exfat_trim_fs(struct inode *inode, struct fstrim_range *range);
/* file.c */
extern const struct file_operations exfat_file_operations;
int __exfat_truncate(struct inode *inode);
-void exfat_truncate(struct inode *inode);
int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
int exfat_getattr(struct mnt_idmap *idmap, const struct path *path,
@@ -584,7 +650,9 @@ struct inode *exfat_iget(struct super_block *sb, loff_t i_pos);
int __exfat_write_inode(struct inode *inode, int sync);
int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
void exfat_evict_inode(struct inode *inode);
-int exfat_block_truncate_page(struct inode *inode, loff_t from);
+int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
+ unsigned int *clu, unsigned int *count, int create,
+ bool *balloc);
/* exfat/nls.c */
unsigned short exfat_toupper(struct super_block *sb, unsigned short a);
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index dce0955e689aa..a8b11e2ce43f1 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -412,14 +412,14 @@ int exfat_zeroed_cluster(struct inode *dir, unsigned int clu)
if (IS_DIRSYNC(dir))
return sync_blockdev_range(sb->s_bdev,
- EXFAT_BLK_TO_B(blknr, sb),
- EXFAT_BLK_TO_B(last_blknr, sb) - 1);
+ exfat_block_to_bytes(sb, blknr),
+ exfat_block_to_bytes(sb, last_blknr) - 1);
return 0;
}
int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
- struct exfat_chain *p_chain, bool sync_bmap)
+ struct exfat_chain *p_chain, bool sync_bmap, bool contig)
{
int ret = -ENOSPC;
unsigned int total_cnt;
@@ -470,14 +470,20 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
while ((new_clu = exfat_find_free_bitmap(sb, hint_clu)) !=
EXFAT_EOF_CLUSTER) {
- if (new_clu != hint_clu &&
- p_chain->flags == ALLOC_NO_FAT_CHAIN) {
- if (exfat_chain_cont_cluster(sb, p_chain->dir,
- p_chain->size)) {
- ret = -EIO;
- goto free_cluster;
+ if (new_clu != hint_clu) {
+ if (p_chain->flags == ALLOC_NO_FAT_CHAIN) {
+ if (exfat_chain_cont_cluster(sb, p_chain->dir,
+ p_chain->size)) {
+ ret = -EIO;
+ goto free_cluster;
+ }
+ p_chain->flags = ALLOC_FAT_CHAIN;
+ }
+
+ if (contig && p_chain->size > 0) {
+ hint_clu = last_clu;
+ goto done;
}
- p_chain->flags = ALLOC_FAT_CHAIN;
}
/* update allocation bitmap */
@@ -507,9 +513,9 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
last_clu = new_clu;
if (p_chain->size == num_alloc) {
+done:
sbi->clu_srch_ptr = hint_clu;
- sbi->used_clusters += num_alloc;
-
+ sbi->used_clusters += p_chain->size;
mutex_unlock(&sbi->bitmap_lock);
return 0;
}
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 354bdcfe4abcd..e6e58584f567a 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -14,9 +14,11 @@
#include <linux/writeback.h>
#include <linux/filelock.h>
#include <linux/falloc.h>
+#include <linux/iomap.h>
#include "exfat_raw.h"
#include "exfat_fs.h"
+#include "iomap.h"
static int exfat_cont_expand(struct inode *inode, loff_t size)
{
@@ -26,16 +28,17 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
struct super_block *sb = inode->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
struct exfat_chain clu;
+ loff_t oldsize = i_size_read(inode);
- truncate_pagecache(inode, i_size_read(inode));
+ truncate_pagecache(inode, oldsize);
ret = inode_newsize_ok(inode, size);
if (ret)
return ret;
- num_clusters = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi);
+ num_clusters = exfat_bytes_to_cluster(sbi, exfat_ondisk_size(inode));
/* integer overflow is already checked in inode_newsize_ok(). */
- new_num_clusters = EXFAT_B_TO_CLU_ROUND_UP(size, sbi);
+ new_num_clusters = exfat_bytes_to_cluster_round_up(sbi, size);
if (new_num_clusters == num_clusters)
goto out;
@@ -56,7 +59,7 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
clu.flags = ei->flags;
ret = exfat_alloc_cluster(inode, new_num_clusters - num_clusters,
- &clu, inode_needs_sync(inode));
+ &clu, inode_needs_sync(inode), false);
if (ret)
return ret;
@@ -78,6 +81,13 @@ out:
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
/* Expanded range not zeroed, do not update valid_size */
i_size_write(inode, size);
+ /*
+ * When extending file size, call truncate_pagecache() first,
+ * then update i_size, and call pagecache_isize_extended()
+ * to ensures the straddling folio is properly marked RO so
+ * page_mkwrite() is called and post-EOF area is zeroed.
+ */
+ pagecache_isize_extended(inode, oldsize, inode->i_size);
inode->i_blocks = round_up(size, sbi->cluster_size) >> 9;
mark_inode_dirty(inode);
@@ -200,8 +210,8 @@ int __exfat_truncate(struct inode *inode)
exfat_set_volume_dirty(sb);
- num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi);
- num_clusters_phys = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi);
+ num_clusters_new = exfat_bytes_to_cluster_round_up(sbi, i_size_read(inode));
+ num_clusters_phys = exfat_bytes_to_cluster(sbi, exfat_ondisk_size(inode));
exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags);
@@ -236,7 +246,7 @@ int __exfat_truncate(struct inode *inode)
}
if (i_size_read(inode) < ei->valid_size)
- ei->valid_size = i_size_read(inode);
+ ei->valid_size = ei->zeroed_size = i_size_read(inode);
if (ei->type == TYPE_FILE)
ei->attr |= EXFAT_ATTR_ARCHIVE;
@@ -282,7 +292,7 @@ int __exfat_truncate(struct inode *inode)
return 0;
}
-void exfat_truncate(struct inode *inode)
+static void exfat_truncate(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
@@ -383,10 +393,12 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
exfat_truncate_inode_atime(inode);
if (attr->ia_valid & ATTR_SIZE) {
- error = exfat_block_truncate_page(inode, attr->ia_size);
- if (error)
- goto out;
-
+ /*
+ * Wait for any in-flight DIO to finish before truncating to
+ * prevent a concurrent DIO from writing to clusters that are
+ * about to be freed.
+ */
+ inode_dio_wait(inode);
down_write(&EXFAT_I(inode)->truncate_lock);
truncate_setsize(inode, attr->ia_size);
@@ -631,42 +643,76 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
static int exfat_extend_valid_size(struct inode *inode, loff_t new_valid_size)
{
- int err;
- loff_t pos;
struct exfat_inode_info *ei = EXFAT_I(inode);
- struct address_space *mapping = inode->i_mapping;
- const struct address_space_operations *ops = mapping->a_ops;
+ loff_t old_valid_size = ei->valid_size;
+ int ret = 0;
- pos = ei->valid_size;
- while (pos < new_valid_size) {
- u32 len;
- struct folio *folio;
- unsigned long off;
+ if (old_valid_size < new_valid_size) {
+ if (i_size_read(inode) < new_valid_size) {
+ i_size_write(inode, new_valid_size);
+ mark_inode_dirty(inode);
+ }
- len = PAGE_SIZE - (pos & (PAGE_SIZE - 1));
- if (pos + len > new_valid_size)
- len = new_valid_size - pos;
+ ret = iomap_zero_range(inode, old_valid_size,
+ new_valid_size - old_valid_size, NULL,
+ &exfat_write_iomap_ops, NULL, NULL);
+ if (ret) {
+ truncate_setsize(inode, old_valid_size);
+ exfat_truncate(inode);
+ }
+ }
- err = ops->write_begin(NULL, mapping, pos, len, &folio, NULL);
- if (err)
- goto out;
+ return ret;
+}
- off = offset_in_folio(folio, pos);
- folio_zero_new_buffers(folio, off, off + len);
+static ssize_t exfat_fallback_buffered_write(struct kiocb *iocb,
+ struct iov_iter *from)
+{
+ loff_t offset = iocb->ki_pos, end;
+ ssize_t written;
+ int ret;
- err = ops->write_end(NULL, mapping, pos, len, len, folio, NULL);
- if (err < 0)
- goto out;
- pos += len;
+ iocb->ki_flags &= ~IOCB_DIRECT;
- balance_dirty_pages_ratelimited(mapping);
- cond_resched();
- }
+ written = iomap_file_buffered_write(iocb, from, &exfat_write_iomap_ops,
+ NULL, NULL);
+ if (written < 0)
+ return written;
- return 0;
+ end = iocb->ki_pos + written - 1;
+ ret = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
+ offset, end);
+ if (ret)
+ return -EIO;
-out:
- return err;
+ invalidate_mapping_pages(iocb->ki_filp->f_mapping,
+ offset >> PAGE_SHIFT,
+ end >> PAGE_SHIFT);
+
+ return written;
+}
+
+static ssize_t exfat_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ ssize_t ret;
+
+ ret = iomap_dio_rw(iocb, from, &exfat_write_iomap_ops,
+ &exfat_write_dio_ops, 0, NULL, 0);
+ if (ret == -ENOTBLK)
+ ret = 0;
+ else if (ret < 0)
+ return ret;
+
+ if (iov_iter_count(from)) {
+ ssize_t written;
+
+ written = exfat_fallback_buffered_write(iocb, from);
+ if (written < 0)
+ return written;
+ ret += written;
+ }
+
+ return ret;
}
static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
@@ -677,6 +723,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
struct exfat_inode_info *ei = EXFAT_I(inode);
loff_t pos = iocb->ki_pos;
loff_t valid_size;
+ int err;
if (unlikely(exfat_forced_shutdown(inode->i_sb)))
return -EIO;
@@ -692,14 +739,10 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
if (ret <= 0)
goto unlock;
- if (iocb->ki_flags & IOCB_DIRECT) {
- unsigned long align = pos | iov_iter_alignment(iter);
-
- if (!IS_ALIGNED(align, i_blocksize(inode)) &&
- !IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev))) {
- ret = -EINVAL;
- goto unlock;
- }
+ err = file_modified(iocb->ki_filp);
+ if (err) {
+ ret = err;
+ goto unlock;
}
if (pos > valid_size) {
@@ -713,7 +756,11 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
goto unlock;
}
- ret = __generic_file_write_iter(iocb, iter);
+ if (iocb->ki_flags & IOCB_DIRECT)
+ ret = exfat_dio_write_iter(iocb, iter);
+ else
+ ret = iomap_file_buffered_write(iocb, iter,
+ &exfat_write_iomap_ops, NULL, NULL);
if (ret < 0)
goto unlock;
@@ -740,37 +787,78 @@ unlock:
static ssize_t exfat_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
if (unlikely(exfat_forced_shutdown(inode->i_sb)))
return -EIO;
- return generic_file_read_iter(iocb, iter);
+ inode_lock_shared(inode);
+
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ file_accessed(iocb->ki_filp);
+ ret = iomap_dio_rw(iocb, iter, &exfat_iomap_ops, NULL, 0,
+ NULL, 0);
+ } else {
+ ret = generic_file_read_iter(iocb, iter);
+ }
+
+ inode_unlock_shared(inode);
+
+ return ret;
}
static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf)
{
- int err;
struct inode *inode = file_inode(vmf->vma->vm_file);
struct exfat_inode_info *ei = EXFAT_I(inode);
- loff_t new_valid_size;
+ vm_fault_t ret;
+ loff_t new_valid_size, mmap_valid_size;
if (!inode_trylock(inode))
return VM_FAULT_RETRY;
- new_valid_size = ((loff_t)vmf->pgoff + 1) << PAGE_SHIFT;
- new_valid_size = min(new_valid_size, i_size_read(inode));
+ mmap_valid_size = ((loff_t)vmf->pgoff + 1) << PAGE_SHIFT;
+ new_valid_size = min(mmap_valid_size, i_size_read(inode));
if (ei->valid_size < new_valid_size) {
- err = exfat_extend_valid_size(inode, new_valid_size);
- if (err < 0) {
- inode_unlock(inode);
- return vmf_fs_error(err);
+ if (ei->zeroed_size < mmap_valid_size) {
+ int err;
+
+ /*
+ * Only zero the range that hasn't been zeroed yet for
+ * this mmap write path. zeroed_size tracks the largest
+ * page-aligned offset that has already been zeroed.
+ *
+ * This prevents unnecessarily zeroing out the entire
+ * tail page on every page fault when userspace writes
+ * data byte-by-byte through mmap (after a small
+ * fallocate). It fixes data corruption in the tail page
+ * while preserving the existing valid_size semantics.
+ */
+ err = iomap_zero_range(inode, ei->zeroed_size,
+ mmap_valid_size - ei->zeroed_size, NULL,
+ &exfat_iomap_ops, NULL, NULL);
+ if (err < 0) {
+ inode_unlock(inode);
+ return vmf_fs_error(err);
+ }
+ ei->zeroed_size = mmap_valid_size;
}
+
+ ei->valid_size = new_valid_size;
+ mark_inode_dirty(inode);
}
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vmf->vma->vm_file);
+
+ filemap_invalidate_lock_shared(inode->i_mapping);
+ ret = iomap_page_mkwrite(vmf, &exfat_write_iomap_ops, NULL);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
+ sb_end_pagefault(inode->i_sb);
inode_unlock(inode);
- return filemap_page_mkwrite(vmf);
+ return ret;
}
static const struct vm_operations_struct exfat_file_vm_ops = {
@@ -786,6 +874,21 @@ static int exfat_file_mmap_prepare(struct vm_area_desc *desc)
if (unlikely(exfat_forced_shutdown(file_inode(desc->file)->i_sb)))
return -EIO;
+ if (vma_desc_test_all(desc, VMA_SHARED_BIT, VMA_MAYWRITE_BIT)) {
+ struct inode *inode = file_inode(file);
+ loff_t from, to;
+ int err;
+
+ from = ((loff_t)desc->pgoff << PAGE_SHIFT);
+ to = min_t(loff_t, i_size_read(inode),
+ from + vma_desc_size(desc));
+ if (EXFAT_I(inode)->valid_size < to) {
+ err = exfat_extend_valid_size(inode, to);
+ if (err)
+ return err;
+ }
+ }
+
file_accessed(file);
desc->vm_ops = &exfat_file_vm_ops;
return 0;
@@ -800,8 +903,48 @@ static ssize_t exfat_splice_read(struct file *in, loff_t *ppos,
return filemap_splice_read(in, ppos, pipe, len, flags);
}
+static int exfat_file_open(struct inode *inode, struct file *filp)
+{
+ int err;
+
+ if (unlikely(exfat_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
+ err = generic_file_open(inode, filp);
+ if (err)
+ return err;
+
+ filp->f_mode |= FMODE_CAN_ODIRECT;
+
+ return 0;
+}
+
+static loff_t exfat_file_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+
+ switch (whence) {
+ case SEEK_HOLE:
+ inode_lock_shared(inode);
+ offset = iomap_seek_hole(inode, offset, &exfat_iomap_ops);
+ inode_unlock_shared(inode);
+ break;
+ case SEEK_DATA:
+ inode_lock_shared(inode);
+ offset = iomap_seek_data(inode, offset, &exfat_iomap_ops);
+ inode_unlock_shared(inode);
+ break;
+ default:
+ return generic_file_llseek(file, offset, whence);
+ }
+ if (offset < 0)
+ return offset;
+ return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
+}
+
const struct file_operations exfat_file_operations = {
- .llseek = generic_file_llseek,
+ .open = exfat_file_open,
+ .llseek = exfat_file_llseek,
.read_iter = exfat_file_read_iter,
.write_iter = exfat_file_write_iter,
.unlocked_ioctl = exfat_ioctl,
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 1ea4c740fef9e..8e8d94319c3c2 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -13,13 +13,16 @@
#include <linux/uio.h>
#include <linux/random.h>
#include <linux/iversion.h>
+#include <linux/iomap.h>
#include "exfat_raw.h"
#include "exfat_fs.h"
+#include "iomap.h"
int __exfat_write_inode(struct inode *inode, int sync)
{
unsigned long long on_disk_size;
+ unsigned long long on_disk_valid_size;
struct exfat_dentry *ep, *ep2;
struct exfat_entry_set_cache es;
struct super_block *sb = inode->i_sb;
@@ -69,22 +72,30 @@ int __exfat_write_inode(struct inode *inode, int sync)
&ep->dentry.file.access_date,
NULL);
- /* File size should be zero if there is no cluster allocated */
- on_disk_size = i_size_read(inode);
+ /*
+ * During a DIO write, valid_size is updated eagerly in iomap_end (so
+ * that concurrent buffered reads see IOMAP_MAPPED) while i_size is
+ * updated asynchronously in end_io. The FAT chain was already
+ * extended to cover ceil(valid_size/cluster_size) clusters. Use the
+ * maximum so the on-disk size field always covers the FAT chain,
+ * preventing fsck from reporting "more clusters are allocated".
+ */
+ on_disk_size = max_t(unsigned long long, i_size_read(inode),
+ ei->valid_size);
if (ei->start_clu == EXFAT_EOF_CLUSTER)
on_disk_size = 0;
-
- ep2->dentry.stream.size = cpu_to_le64(on_disk_size);
/*
- * mmap write does not use exfat_write_end(), valid_size may be
- * extended to the sector-aligned length in exfat_get_block().
- * So we need to fixup valid_size to the writren length.
+ * valid_size on disk must reflect only confirmed data (up to i_size)
+ * and must not exceed on_disk_size.
*/
- if (on_disk_size < ei->valid_size)
- ep2->dentry.stream.valid_size = ep2->dentry.stream.size;
- else
- ep2->dentry.stream.valid_size = cpu_to_le64(ei->valid_size);
+ on_disk_valid_size = min_t(unsigned long long, ei->valid_size,
+ i_size_read(inode));
+ if (ei->start_clu == EXFAT_EOF_CLUSTER)
+ on_disk_valid_size = 0;
+
+ ep2->dentry.stream.size = cpu_to_le64(on_disk_size);
+ ep2->dentry.stream.valid_size = cpu_to_le64(on_disk_valid_size);
if (on_disk_size) {
ep2->dentry.stream.flags = ei->flags;
@@ -123,8 +134,9 @@ void exfat_sync_inode(struct inode *inode)
* Output: errcode, cluster number
* *clu = (~0), if it's unable to allocate a new cluster
*/
-static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
- unsigned int *clu, unsigned int *count, int create)
+int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
+ unsigned int *clu, unsigned int *count, int create,
+ bool *balloc)
{
int ret;
unsigned int last_clu;
@@ -135,10 +147,10 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
unsigned int local_clu_offset = clu_offset;
unsigned int num_to_be_allocated = 0, num_clusters;
- num_clusters = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi);
-
- if (clu_offset >= num_clusters)
- num_to_be_allocated = clu_offset - num_clusters + 1;
+ num_clusters = exfat_bytes_to_cluster(sbi, exfat_ondisk_size(inode));
+ if (clu_offset > num_clusters ||
+ *count > num_clusters - clu_offset)
+ num_to_be_allocated = clu_offset + *count - num_clusters;
if (!create && (num_to_be_allocated > 0)) {
*clu = EXFAT_EOF_CLUSTER;
@@ -181,7 +193,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
}
ret = exfat_alloc_cluster(inode, num_to_be_allocated, &new_clu,
- inode_needs_sync(inode));
+ inode_needs_sync(inode), true);
if (ret)
return ret;
@@ -215,19 +227,11 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
}
*clu = new_clu.dir;
+ *count = new_clu.size;
- inode->i_blocks += EXFAT_CLU_TO_B(num_to_be_allocated, sbi) >> 9;
-
- /*
- * Move *clu pointer along FAT chains (hole care) because the
- * caller of this function expect *clu to be the last cluster.
- * This only works when num_to_be_allocated >= 2,
- * *clu = (the first cluster of the allocated chain) =>
- * (the last cluster of ...)
- */
- if (exfat_cluster_walk(sb, clu, num_to_be_allocated - 1, ei->flags))
- return -EIO;
- *count = 1;
+ inode->i_blocks += exfat_cluster_to_sectors(sbi, new_clu.size);
+ if (balloc)
+ *balloc = true;
}
/* hint information */
@@ -237,154 +241,15 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
return 0;
}
-static int exfat_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- struct exfat_inode_info *ei = EXFAT_I(inode);
- struct super_block *sb = inode->i_sb;
- struct exfat_sb_info *sbi = EXFAT_SB(sb);
- unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
- int err = 0;
- unsigned long mapped_blocks = 0;
- unsigned int cluster, sec_offset, count;
- sector_t last_block;
- sector_t phys = 0;
- sector_t valid_blks;
- loff_t i_size;
-
- mutex_lock(&sbi->s_lock);
- i_size = i_size_read(inode);
- last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size, sb);
- if (iblock >= last_block && !create)
- goto done;
-
- /* Is this block already allocated? */
- count = EXFAT_B_TO_CLU_ROUND_UP(bh_result->b_size, sbi);
- err = exfat_map_cluster(inode, iblock >> sbi->sect_per_clus_bits,
- &cluster, &count, create);
- if (err) {
- if (err != -ENOSPC)
- exfat_fs_error_ratelimit(sb,
- "failed to bmap (inode : %p iblock : %llu, err : %d)",
- inode, (unsigned long long)iblock, err);
- goto unlock_ret;
- }
-
- if (cluster == EXFAT_EOF_CLUSTER)
- goto done;
-
- /* sector offset in cluster */
- sec_offset = iblock & (sbi->sect_per_clus - 1);
-
- phys = exfat_cluster_to_sector(sbi, cluster) + sec_offset;
- mapped_blocks = ((unsigned long)count << sbi->sect_per_clus_bits) - sec_offset;
- max_blocks = min(mapped_blocks, max_blocks);
-
- map_bh(bh_result, sb, phys);
- if (buffer_delay(bh_result))
- clear_buffer_delay(bh_result);
-
- /*
- * In most cases, we just need to set bh_result to mapped, unmapped
- * or new status as follows:
- * 1. i_size == valid_size
- * 2. write case (create == 1)
- * 3. direct_read (!bh_result->b_folio)
- * -> the unwritten part will be zeroed in exfat_direct_IO()
- *
- * Otherwise, in the case of buffered read, it is necessary to take
- * care the last nested block if valid_size is not equal to i_size.
- */
- if (i_size == ei->valid_size || create || !bh_result->b_folio)
- valid_blks = EXFAT_B_TO_BLK_ROUND_UP(ei->valid_size, sb);
- else
- valid_blks = EXFAT_B_TO_BLK(ei->valid_size, sb);
-
- /* The range has been fully written, map it */
- if (iblock + max_blocks < valid_blks)
- goto done;
-
- /* The range has been partially written, map the written part */
- if (iblock < valid_blks) {
- max_blocks = valid_blks - iblock;
- goto done;
- }
-
- /* The area has not been written, map and mark as new for create case */
- if (create) {
- set_buffer_new(bh_result);
- ei->valid_size = EXFAT_BLK_TO_B(iblock + max_blocks, sb);
- mark_inode_dirty(inode);
- goto done;
- }
-
- /*
- * The area has just one block partially written.
- * In that case, we should read and fill the unwritten part of
- * a block with zero.
- */
- if (bh_result->b_folio && iblock == valid_blks &&
- (ei->valid_size & (sb->s_blocksize - 1))) {
- loff_t size, pos;
- void *addr;
-
- max_blocks = 1;
-
- /*
- * No buffer_head is allocated.
- * (1) bmap: It's enough to set blocknr without I/O.
- * (2) read: The unwritten part should be filled with zero.
- * If a folio does not have any buffers,
- * let's returns -EAGAIN to fallback to
- * block_read_full_folio() for per-bh IO.
- */
- if (!folio_buffers(bh_result->b_folio)) {
- err = -EAGAIN;
- goto done;
- }
-
- pos = EXFAT_BLK_TO_B(iblock, sb);
- size = ei->valid_size - pos;
- addr = folio_address(bh_result->b_folio) +
- offset_in_folio(bh_result->b_folio, pos);
-
- /* Check if bh->b_data points to proper addr in folio */
- if (bh_result->b_data != addr) {
- exfat_fs_error_ratelimit(sb,
- "b_data(%p) != folio_addr(%p)",
- bh_result->b_data, addr);
- err = -EINVAL;
- goto done;
- }
-
- /* Read a block */
- err = bh_read(bh_result, 0);
- if (err < 0)
- goto done;
-
- /* Zero unwritten part of a block */
- memset(bh_result->b_data + size, 0, bh_result->b_size - size);
- err = 0;
- goto done;
- }
-
- /*
- * The area has not been written, clear mapped for read/bmap cases.
- * If so, it will be filled with zero without reading from disk.
- */
- clear_buffer_mapped(bh_result);
-done:
- bh_result->b_size = EXFAT_BLK_TO_B(max_blocks, sb);
- if (err < 0)
- clear_buffer_mapped(bh_result);
-unlock_ret:
- mutex_unlock(&sbi->s_lock);
- return err;
-}
-
static int exfat_read_folio(struct file *file, struct folio *folio)
{
- return mpage_read_folio(folio, exfat_get_block);
+ struct iomap_read_folio_ctx ctx = {
+ .cur_folio = folio,
+ .ops = &exfat_iomap_bio_read_ops,
+ };
+
+ iomap_read_folio(&exfat_iomap_ops, &ctx, NULL);
+ return 0;
}
static void exfat_readahead(struct readahead_control *rac)
@@ -393,6 +258,10 @@ static void exfat_readahead(struct readahead_control *rac)
struct inode *inode = mapping->host;
struct exfat_inode_info *ei = EXFAT_I(inode);
loff_t pos = readahead_pos(rac);
+ struct iomap_read_folio_ctx ctx = {
+ .ops = &exfat_iomap_bio_read_ops,
+ .rac = rac,
+ };
/* Range cross valid_size, read it page by page. */
if (ei->valid_size < i_size_read(inode) &&
@@ -400,115 +269,22 @@ static void exfat_readahead(struct readahead_control *rac)
ei->valid_size < pos + readahead_length(rac))
return;
- mpage_readahead(rac, exfat_get_block);
+ iomap_readahead(&exfat_iomap_ops, &ctx, NULL);
}
static int exfat_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- if (unlikely(exfat_forced_shutdown(mapping->host->i_sb)))
- return -EIO;
-
- return mpage_writepages(mapping, wbc, exfat_get_block);
-}
-
-static void exfat_write_failed(struct address_space *mapping, loff_t to)
-{
- struct inode *inode = mapping->host;
-
- if (to > i_size_read(inode)) {
- truncate_pagecache(inode, i_size_read(inode));
- inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
- exfat_truncate(inode);
- }
-}
-
-static int exfat_write_begin(const struct kiocb *iocb,
- struct address_space *mapping,
- loff_t pos, unsigned int len,
- struct folio **foliop, void **fsdata)
-{
- int ret;
+ struct iomap_writepage_ctx wpc = {
+ .inode = mapping->host,
+ .wbc = wbc,
+ .ops = &exfat_writeback_ops,
+ };
if (unlikely(exfat_forced_shutdown(mapping->host->i_sb)))
return -EIO;
- ret = block_write_begin(mapping, pos, len, foliop, exfat_get_block);
-
- if (ret < 0)
- exfat_write_failed(mapping, pos+len);
-
- return ret;
-}
-
-static int exfat_write_end(const struct kiocb *iocb,
- struct address_space *mapping,
- loff_t pos, unsigned int len, unsigned int copied,
- struct folio *folio, void *fsdata)
-{
- struct inode *inode = mapping->host;
- struct exfat_inode_info *ei = EXFAT_I(inode);
- int err;
-
- err = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata);
- if (err < len)
- exfat_write_failed(mapping, pos+len);
-
- if (!(err < 0) && pos + err > ei->valid_size) {
- ei->valid_size = pos + err;
- mark_inode_dirty(inode);
- }
-
- if (!(err < 0) && !(ei->attr & EXFAT_ATTR_ARCHIVE)) {
- inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
- ei->attr |= EXFAT_ATTR_ARCHIVE;
- mark_inode_dirty(inode);
- }
-
- return err;
-}
-
-static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
- struct address_space *mapping = iocb->ki_filp->f_mapping;
- struct inode *inode = mapping->host;
- struct exfat_inode_info *ei = EXFAT_I(inode);
- loff_t pos = iocb->ki_pos;
- loff_t size = pos + iov_iter_count(iter);
- int rw = iov_iter_rw(iter);
- ssize_t ret;
-
- /*
- * Need to use the DIO_LOCKING for avoiding the race
- * condition of exfat_get_block() and ->truncate().
- */
- ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block);
- if (ret < 0) {
- if (rw == WRITE && ret != -EIOCBQUEUED)
- exfat_write_failed(mapping, size);
-
- return ret;
- }
-
- size = pos + ret;
-
- if (rw == WRITE) {
- /*
- * If the block had been partially written before this write,
- * ->valid_size will not be updated in exfat_get_block(),
- * update it here.
- */
- if (ei->valid_size < size) {
- ei->valid_size = size;
- mark_inode_dirty(inode);
- }
- } else if (pos < ei->valid_size && ei->valid_size < size) {
- /* zero the unwritten part in the partially written block */
- iov_iter_revert(iter, size - ei->valid_size);
- iov_iter_zero(size - ei->valid_size, iter);
- }
-
- return ret;
+ return iomap_writepages(&wpc);
}
static sector_t exfat_aop_bmap(struct address_space *mapping, sector_t block)
@@ -517,34 +293,22 @@ static sector_t exfat_aop_bmap(struct address_space *mapping, sector_t block)
/* exfat_get_cluster() assumes the requested blocknr isn't truncated. */
down_read(&EXFAT_I(mapping->host)->truncate_lock);
- blocknr = generic_block_bmap(mapping, block, exfat_get_block);
+ blocknr = iomap_bmap(mapping, block, &exfat_iomap_ops);
up_read(&EXFAT_I(mapping->host)->truncate_lock);
return blocknr;
}
-/*
- * exfat_block_truncate_page() zeroes out a mapping from file offset `from'
- * up to the end of the block which corresponds to `from'.
- * This is required during truncate to physically zeroout the tail end
- * of that block so it doesn't yield old data if the file is later grown.
- * Also, avoid causing failure from fsx for cases of "data past EOF"
- */
-int exfat_block_truncate_page(struct inode *inode, loff_t from)
-{
- return block_truncate_page(inode->i_mapping, from, exfat_get_block);
-}
-
static const struct address_space_operations exfat_aops = {
- .dirty_folio = block_dirty_folio,
- .invalidate_folio = block_invalidate_folio,
- .read_folio = exfat_read_folio,
- .readahead = exfat_readahead,
- .writepages = exfat_writepages,
- .write_begin = exfat_write_begin,
- .write_end = exfat_write_end,
- .direct_IO = exfat_direct_IO,
- .bmap = exfat_aop_bmap,
- .migrate_folio = buffer_migrate_folio,
+ .read_folio = exfat_read_folio,
+ .readahead = exfat_readahead,
+ .writepages = exfat_writepages,
+ .dirty_folio = iomap_dirty_folio,
+ .bmap = exfat_aop_bmap,
+ .migrate_folio = filemap_migrate_folio,
+ .is_partially_uptodate = iomap_is_partially_uptodate,
+ .error_remove_folio = generic_error_remove_folio,
+ .release_folio = iomap_release_folio,
+ .invalidate_folio = iomap_invalidate_folio,
};
static inline unsigned long exfat_hash(loff_t i_pos)
@@ -608,6 +372,7 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
ei->flags = info->flags;
ei->type = info->type;
ei->valid_size = info->valid_size;
+ ei->zeroed_size = info->valid_size;
ei->version = 0;
ei->hint_stat.eidx = 0;
diff --git a/fs/exfat/iomap.c b/fs/exfat/iomap.c
new file mode 100644
index 0000000000000..3ac1eebe997f5
--- /dev/null
+++ b/fs/exfat/iomap.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * iomap callack functions
+ *
+ * Copyright (C) 2026 Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+#include <linux/iomap.h>
+#include <linux/pagemap.h>
+
+#include "exfat_raw.h"
+#include "exfat_fs.h"
+#include "iomap.h"
+
+/*
+ * exfat_file_write_dio_end_io - Direct I/O write completion handler
+ *
+ * Updates i_size if the write extended the file. Called from the dio layer
+ * after I/O completion.
+ */
+static int exfat_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
+ int error, unsigned int flags)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ if (error)
+ return error;
+
+ if (size && i_size_read(inode) < iocb->ki_pos + size) {
+ i_size_write(inode, iocb->ki_pos + size);
+ mark_inode_dirty(inode);
+ }
+
+ return 0;
+}
+
+const struct iomap_dio_ops exfat_write_dio_ops = {
+ .end_io = exfat_file_write_dio_end_io,
+};
+
+static int __exfat_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned int flags, struct iomap *iomap, bool may_alloc)
+{
+ struct super_block *sb = inode->i_sb;
+ struct exfat_sb_info *sbi = EXFAT_SB(sb);
+ struct exfat_inode_info *ei = EXFAT_I(inode);
+ unsigned int cluster, num_clusters;
+ loff_t cluster_offset, cluster_length;
+ int err;
+ bool balloc = false;
+
+ if (!may_alloc) {
+ /* Completely beyond EOF. Treat as hole */
+ if (i_size_read(inode) <= offset) {
+ iomap->type = IOMAP_HOLE;
+ iomap->addr = IOMAP_NULL_ADDR;
+ iomap->offset = offset;
+ iomap->length = length;
+ return 0;
+ }
+
+ /* Clamp length if the requested range goes beyond i_size */
+ if (offset + length > i_size_read(inode))
+ length = round_up(i_size_read(inode),
+ i_blocksize(inode)) - offset;
+ }
+
+ num_clusters = exfat_bytes_to_cluster_round_up(sbi,
+ offset + length) - exfat_bytes_to_cluster(sbi, offset);
+
+ mutex_lock(&sbi->s_lock);
+ iomap->bdev = inode->i_sb->s_bdev;
+ iomap->offset = offset;
+
+ err = exfat_map_cluster(inode, exfat_bytes_to_cluster(sbi, offset),
+ &cluster, &num_clusters, may_alloc, &balloc);
+ if (err)
+ goto out;
+
+ cluster_offset = exfat_cluster_offset(sbi, offset);
+ cluster_length = exfat_cluster_to_bytes(sbi, num_clusters);
+
+ iomap->length = min_t(loff_t, length, cluster_length - cluster_offset);
+ iomap->addr = exfat_cluster_to_phys_bytes(sbi, cluster) + cluster_offset;
+ iomap->type = IOMAP_MAPPED;
+ iomap->flags = IOMAP_F_MERGED;
+
+ if (may_alloc || flags & IOMAP_ZERO) {
+ if (balloc)
+ iomap->flags |= IOMAP_F_NEW;
+ else if (iomap->offset + iomap->length >= ei->valid_size) {
+ /*
+ * This is a write that starts at or extends beyond
+ * the current valid_size. The region between the old
+ * valid_size and the end of this write needs to be
+ * zeroed in the page cache to prevent stale data
+ * exposure (see IOMAP_F_ZERO_TAIL handling in
+ * __iomap_write_begin()).
+ */
+ iomap->flags |= IOMAP_F_ZERO_TAIL;
+ }
+ } else {
+ /*
+ * valid_size is tracked in byte granularity and
+ * marks the exact boundary between valid data and
+ * holes (or unwritten space).
+ *
+ * When IOMAP_REPORT is set (used by lseek(SEEK_HOLE)
+ * and SEEK_DATA), we return IOMAP_HOLE. This allows
+ * iomap_seek_hole_iter() to directly return the
+ * precise byte position.
+ *
+ * For normal I/O paths (without IOMAP_REPORT) we
+ * return IOMAP_UNWRITTEN so the write path can
+ * distinguish it from a real hole.
+ */
+ if (offset >= ei->valid_size) {
+ iomap->type = flags & IOMAP_REPORT ?
+ IOMAP_HOLE : IOMAP_UNWRITTEN;
+ } else if (offset + iomap->length > ei->valid_size) {
+ if (flags & IOMAP_REPORT) {
+ /*
+ * For SEEK_HOLE/SEEK_DATA, clip the length
+ * to the exact byte boundary (valid_size).
+ * This ensures the caller gets the precise
+ * hole position in byte units.
+ */
+ iomap->length = ei->valid_size - iomap->offset;
+ } else
+ iomap->length = round_up(ei->valid_size,
+ i_blocksize(inode)) -
+ iomap->offset;
+ }
+ }
+
+ iomap->flags |= IOMAP_F_MERGED;
+out:
+ mutex_unlock(&sbi->s_lock);
+ return err;
+}
+
+static int exfat_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
+{
+ return __exfat_iomap_begin(inode, offset, length, flags, iomap, false);
+}
+
+static int exfat_write_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
+{
+ return __exfat_iomap_begin(inode, offset, length, flags, iomap, true);
+}
+
+const struct iomap_ops exfat_iomap_ops = {
+ .iomap_begin = exfat_iomap_begin,
+};
+
+/*
+ * exfat_write_iomap_end - Update the state after write
+ *
+ * Extends ->valid_size to cover the newly written range.
+ * Marks the inode dirty if metadata was changed.
+ */
+static int exfat_write_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+ ssize_t written, unsigned int flags, struct iomap *iomap)
+{
+ struct exfat_inode_info *ei = EXFAT_I(inode);
+ bool dirtied = false;
+ loff_t end;
+
+ if (!written)
+ return 0;
+
+ end = pos + written;
+
+ if (ei->valid_size < end) {
+ ei->valid_size = end;
+ if (ei->zeroed_size < end)
+ ei->zeroed_size = end;
+ dirtied = true;
+ }
+
+ if (dirtied || iomap->flags & IOMAP_F_SIZE_CHANGED)
+ mark_inode_dirty(inode);
+
+ return written;
+}
+
+const struct iomap_ops exfat_write_iomap_ops = {
+ .iomap_begin = exfat_write_iomap_begin,
+ .iomap_end = exfat_write_iomap_end,
+};
+
+/*
+ * exfat_writeback_range - Map folio during writeback
+ *
+ * Called for each folio during writeback. If the folio falls outside the
+ * current iomap, remaps by calling read_iomap_begin.
+ */
+static ssize_t exfat_writeback_range(struct iomap_writepage_ctx *wpc,
+ struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
+{
+ if (offset < wpc->iomap.offset ||
+ offset >= wpc->iomap.offset + wpc->iomap.length) {
+ int error;
+
+ error = __exfat_iomap_begin(wpc->inode, offset, len,
+ 0, &wpc->iomap, false);
+ if (error)
+ return error;
+ }
+
+ return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
+}
+
+const struct iomap_writeback_ops exfat_writeback_ops = {
+ .writeback_range = exfat_writeback_range,
+ .writeback_submit = iomap_ioend_writeback_submit,
+};
+
+/**
+ * exfat_iomap_read_end_io - iomap read bio completion handler for exFAT
+ * @bio: bio that has completed reading
+ *
+ * exfat_iomap_begin() rounds up MAPPED extents to the block boundary of
+ * valid_size. This ensures that any subsequent blocks are treated as
+ * IOMAP_UNWRITTEN, but it also causes the "straddle block" containing
+ * valid_size to be read from disk. The disk data beyond valid_size in
+ * this block is stale and must be zeroed to prevent data leakage.
+ */
+static void exfat_iomap_read_end_io(struct bio *bio)
+{
+ int error = blk_status_to_errno(bio->bi_status);
+ struct folio_iter iter;
+
+ bio_for_each_folio_all(iter, bio) {
+ struct folio *folio = iter.folio;
+ struct exfat_inode_info *ei = EXFAT_I(folio->mapping->host);
+ s64 valid_size;
+ loff_t pos = folio_pos(folio);
+
+ valid_size = ei->valid_size;
+ if (pos + iter.offset < valid_size &&
+ pos + iter.offset + iter.length > valid_size)
+ folio_zero_segment(folio, offset_in_folio(folio, valid_size),
+ iter.offset + iter.length);
+
+ iomap_finish_folio_read(folio, iter.offset, iter.length, error);
+ }
+ bio_put(bio);
+}
+
+static void exfat_iomap_bio_submit_read(const struct iomap_iter *iter,
+ struct iomap_read_folio_ctx *ctx)
+{
+ struct bio *bio = ctx->read_ctx;
+
+ bio->bi_end_io = exfat_iomap_read_end_io;
+ submit_bio(bio);
+}
+
+const struct iomap_read_ops exfat_iomap_bio_read_ops = {
+ .read_folio_range = iomap_bio_read_folio_range,
+ .submit_read = exfat_iomap_bio_submit_read,
+};
diff --git a/fs/exfat/iomap.h b/fs/exfat/iomap.h
new file mode 100644
index 0000000000000..830388f386f4e
--- /dev/null
+++ b/fs/exfat/iomap.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2026 Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+#ifndef _LINUX_EXFAT_IOMAP_H
+#define _LINUX_EXFAT_IOMAP_H
+
+extern const struct iomap_dio_ops exfat_write_dio_ops;
+extern const struct iomap_ops exfat_iomap_ops;
+extern const struct iomap_ops exfat_write_iomap_ops;
+extern const struct iomap_writeback_ops exfat_writeback_ops;
+extern const struct iomap_read_ops exfat_iomap_bio_read_ops;
+
+#endif /* _LINUX_EXFAT_IOMAP_H */
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 2c5636634b4a4..e69da0f5a111d 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -208,7 +208,7 @@ static int exfat_search_empty_slot(struct super_block *sb,
int dentries_per_clu;
struct exfat_chain clu;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
- int total_entries = EXFAT_CLU_TO_DEN(p_dir->size, sbi);
+ unsigned int total_entries = exfat_cluster_to_dentries(sbi, p_dir->size);
dentries_per_clu = sbi->dentries_per_clu;
@@ -266,7 +266,7 @@ static int exfat_search_empty_slot(struct super_block *sb,
static int exfat_check_max_dentries(struct inode *inode)
{
- if (EXFAT_B_TO_DEN(i_size_read(inode)) >= MAX_EXFAT_DENTRIES) {
+ if (exfat_bytes_to_dentries(i_size_read(inode)) >= MAX_EXFAT_DENTRIES) {
/*
* exFAT spec allows a dir to grow up to 8388608(256MB)
* dentries
@@ -314,7 +314,8 @@ int exfat_find_empty_entry(struct inode *inode,
}
exfat_chain_set(p_dir, ei->start_clu,
- EXFAT_B_TO_CLU(i_size_read(inode), sbi), ei->flags);
+ exfat_bytes_to_cluster(sbi, i_size_read(inode)),
+ ei->flags);
while ((dentry = exfat_search_empty_slot(sb, &hint_femp, p_dir,
num_entries, es)) < 0) {
@@ -340,7 +341,7 @@ int exfat_find_empty_entry(struct inode *inode,
}
/* allocate a cluster */
- ret = exfat_alloc_cluster(inode, 1, &clu, IS_DIRSYNC(inode));
+ ret = exfat_alloc_cluster(inode, 1, &clu, IS_DIRSYNC(inode), false);
if (ret)
return ret;
@@ -375,7 +376,7 @@ int exfat_find_empty_entry(struct inode *inode,
hint_femp.cur.size++;
p_dir->size++;
- size = EXFAT_CLU_TO_B(p_dir->size, sbi);
+ size = exfat_cluster_to_bytes(sbi, p_dir->size);
/* directory inode should be updated in here */
i_size_write(inode, size);
@@ -604,7 +605,7 @@ static int exfat_find(struct inode *dir, const struct qstr *qname,
return ret;
exfat_chain_set(&cdir, ei->start_clu,
- EXFAT_B_TO_CLU(i_size_read(dir), sbi), ei->flags);
+ exfat_bytes_to_cluster(sbi, i_size_read(dir)), ei->flags);
/* check the validation of hint_stat and initialize it if required */
if (ei->version != (inode_peek_iversion_raw(dir) & 0xffffffff)) {
@@ -681,7 +682,7 @@ static int exfat_find(struct inode *dir, const struct qstr *qname,
return -EIO;
}
- if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) {
+ if (unlikely(exfat_bytes_to_cluster_round_up(sbi, info->size) > sbi->used_clusters)) {
exfat_fs_error(sb, "data size is invalid(%lld)", info->size);
return -EIO;
}
@@ -695,7 +696,8 @@ static int exfat_find(struct inode *dir, const struct qstr *qname,
if (info->type == TYPE_DIR) {
exfat_chain_set(&cdir, info->start_clu,
- EXFAT_B_TO_CLU(info->size, sbi), info->flags);
+ exfat_bytes_to_cluster(sbi, info->size),
+ info->flags);
count = exfat_count_dir_entries(sb, &cdir);
if (count < 0)
return -EIO;
@@ -705,71 +707,44 @@ static int exfat_find(struct inode *dir, const struct qstr *qname,
return 0;
}
-static int exfat_d_anon_disconn(struct dentry *dentry)
-{
- return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED);
-}
-
static struct dentry *exfat_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct super_block *sb = dir->i_sb;
- struct inode *inode;
+ struct inode *inode = NULL;
struct dentry *alias;
struct exfat_dir_entry info;
int err;
loff_t i_pos;
- mode_t i_mode;
mutex_lock(&EXFAT_SB(sb)->s_lock);
err = exfat_find(dir, &dentry->d_name, &info);
if (err) {
- if (err == -ENOENT) {
- inode = NULL;
- goto out;
- }
- goto unlock;
+ if (unlikely(err != -ENOENT))
+ inode = ERR_PTR(err);
+ goto out;
}
i_pos = exfat_make_i_pos(&info);
inode = exfat_build_inode(sb, &info, i_pos);
- err = PTR_ERR_OR_ZERO(inode);
- if (err)
- goto unlock;
+ if (IS_ERR(inode) || S_ISDIR(inode->i_mode))
+ goto out;
- i_mode = inode->i_mode;
alias = d_find_alias(inode);
/*
* Checking "alias->d_parent == dentry->d_parent" to make sure
* FS is not corrupted (especially double linked dir).
*/
- if (alias && alias->d_parent == dentry->d_parent &&
- !exfat_d_anon_disconn(alias)) {
-
+ if (alias && alias->d_parent == dentry->d_parent) {
/*
- * Unhashed alias is able to exist because of revalidate()
- * called by lookup_fast. You can easily make this status
- * by calling create and lookup concurrently
- * In such case, we reuse an alias instead of new dentry
+ * This inode has a hashed alias dentry with different
+ * name. This means, the user did ->lookup() by an
+ * another name (longname vs 8.3 alias of it) in past.
+ *
+ * Switch to new one for reason of locality if possible.
*/
- if (d_unhashed(alias)) {
- WARN_ON(alias->d_name.hash_len !=
- dentry->d_name.hash_len);
- exfat_info(sb, "rehashed a dentry(%p) in read lookup",
- alias);
- d_drop(dentry);
- d_rehash(alias);
- } else if (!S_ISDIR(i_mode)) {
- /*
- * This inode has non anonymous-DCACHE_DISCONNECTED
- * dentry. This means, the user did ->lookup() by an
- * another name (longname vs 8.3 alias of it) in past.
- *
- * Switch to new one for reason of locality if possible.
- */
- d_move(alias, dentry);
- }
+ d_move(alias, dentry);
iput(inode);
mutex_unlock(&EXFAT_SB(sb)->s_lock);
return alias;
@@ -781,9 +756,6 @@ out:
exfat_d_version_set(dentry, inode_query_iversion(dir));
return d_splice_alias(inode, dentry);
-unlock:
- mutex_unlock(&EXFAT_SB(sb)->s_lock);
- return ERR_PTR(err);
}
/* remove an entry, BUT don't truncate */
@@ -951,7 +923,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
}
exfat_chain_set(&clu_to_free, ei->start_clu,
- EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi), ei->flags);
+ exfat_bytes_to_cluster_round_up(sbi, i_size_read(inode)), ei->flags);
err = exfat_check_dir_empty(sb, &clu_to_free);
if (err) {
@@ -1158,8 +1130,8 @@ static int __exfat_rename(struct inode *old_parent_inode,
new_clu.dir = new_ei->start_clu;
new_clu.size =
- EXFAT_B_TO_CLU_ROUND_UP(i_size_read(new_inode),
- sbi);
+ exfat_bytes_to_cluster_round_up(sbi,
+ i_size_read(new_inode));
new_clu.flags = new_ei->flags;
ret = exfat_check_dir_empty(sb, &new_clu);
@@ -1203,8 +1175,8 @@ static int __exfat_rename(struct inode *old_parent_inode,
struct exfat_chain new_clu_to_free;
exfat_chain_set(&new_clu_to_free, new_ei->start_clu,
- EXFAT_B_TO_CLU_ROUND_UP(i_size_read(new_inode),
- sbi), new_ei->flags);
+ exfat_bytes_to_cluster_round_up(sbi, i_size_read(new_inode)),
+ new_ei->flags);
if (exfat_free_cluster(new_inode, &new_clu_to_free)) {
/* just set I/O error only */
diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c
index 57db08a5271cf..055447edcf9a6 100644
--- a/fs/exfat/nls.c
+++ b/fs/exfat/nls.c
@@ -769,13 +769,18 @@ int exfat_create_upcase_table(struct super_block *sb)
tbl_clu = le32_to_cpu(ep->dentry.upcase.start_clu);
tbl_size = le64_to_cpu(ep->dentry.upcase.size);
-
- sector = exfat_cluster_to_sector(sbi, tbl_clu);
- num_sectors = ((tbl_size - 1) >> blksize_bits) + 1;
- ret = exfat_load_upcase_table(sb, sector, num_sectors,
- le32_to_cpu(ep->dentry.upcase.checksum));
-
+ if (tbl_size) {
+ sector = exfat_cluster_to_sector(sbi, tbl_clu);
+ num_sectors = ((tbl_size - 1) >> blksize_bits) + 1;
+ ret = exfat_load_upcase_table(sb, sector, num_sectors,
+ le32_to_cpu(ep->dentry.upcase.checksum));
+ } else {
+ exfat_fs_error(sb,
+ "bad upcase table size (0 bytes). Please run fsck");
+ ret = -EINVAL;
+ }
brelse(bh);
+
if (ret && ret != -EIO) {
/* free memory from exfat_load_upcase_table call */
exfat_free_upcase_table(sbi);
@@ -790,6 +795,8 @@ int exfat_create_upcase_table(struct super_block *sb)
return -EIO;
}
+ exfat_fs_error(sb, "no upcase table entry. Please run fsck");
+
load_default:
/* load default upcase table */
return exfat_load_default_upcase_table(sb);
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 95d87e2d7717f..388db271c6bf5 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -369,7 +369,7 @@ static int exfat_read_root(struct inode *inode, struct exfat_chain *root_clu)
ei->hint_stat.clu = sbi->root_dir;
ei->hint_femp.eidx = EXFAT_HINT_NONE;
- i_size_write(inode, EXFAT_CLU_TO_B(root_clu->size, sbi));
+ i_size_write(inode, exfat_cluster_to_bytes(sbi, root_clu->size));
num_subdirs = exfat_count_dir_entries(sb, root_clu);
if (num_subdirs < 0)
@@ -499,6 +499,7 @@ static int exfat_read_boot_sector(struct super_block *sb)
if (p_boot->num_fats == 2)
sbi->FAT2_start_sector += sbi->num_FAT_sectors;
sbi->data_start_sector = le32_to_cpu(p_boot->clu_offset);
+ sbi->data_start_bytes = sbi->data_start_sector << p_boot->sect_size_bits;
sbi->num_sectors = le64_to_cpu(p_boot->vol_length);
/* because the cluster index starts with 2 */
sbi->num_clusters = le32_to_cpu(p_boot->clu_count) +
@@ -538,7 +539,7 @@ static int exfat_read_boot_sector(struct super_block *sb)
* machines.
*/
sb->s_maxbytes = min(MAX_LFS_FILESIZE,
- EXFAT_CLU_TO_B((loff_t)EXFAT_MAX_NUM_CLUSTER, sbi));
+ exfat_cluster_to_bytes(sbi, (loff_t)EXFAT_MAX_NUM_CLUSTER));
/* check logical sector size */
if (exfat_calibrate_blocksize(sb, 1 << p_boot->sect_size_bits))
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d7b648421a70f..d6451c4208d49 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -836,6 +836,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter,
return -EIO;
folio_zero_segments(folio, poff, from, to, poff + plen);
} else {
+ const struct iomap *iomap = iomap_iter_srcmap(iter);
int status;
if (iter->flags & IOMAP_NOWAIT)
@@ -853,6 +854,9 @@ static int __iomap_write_begin(const struct iomap_iter *iter,
len, status, GFP_NOFS);
if (status)
return status;
+
+ if (iomap->flags & IOMAP_F_ZERO_TAIL)
+ folio_zero_segment(folio, to, poff + plen);
}
iomap_set_range_uptodate(folio, poff, plen);
} while ((block_start += plen) < block_end);
@@ -1058,7 +1062,6 @@ static bool iomap_write_end_inline(const struct iomap_iter *iter,
void *addr;
WARN_ON_ONCE(!folio_test_uptodate(folio));
- BUG_ON(!iomap_inline_data_valid(iomap));
if (WARN_ON_ONCE(!iomap->inline_data))
return false;
@@ -1543,6 +1546,8 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
size_t offset;
bool ret;
+ balance_dirty_pages_ratelimited(iter->inode->i_mapping);
+
bytes = min_t(u64, SIZE_MAX, bytes);
status = iomap_write_begin(iter, write_ops, &folio, &offset,
&bytes);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index b36ee619cdcdd..b485e3b191daf 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -69,7 +69,7 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter,
/* Sync dio can't be polled reliably */
if ((iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(iocb)) {
- bio_set_polled(bio, iocb);
+ bio->bi_opf |= REQ_POLLED;
WRITE_ONCE(iocb->private, bio);
}
@@ -603,9 +603,6 @@ static int iomap_dio_inline_iter(struct iomap_iter *iomi, struct iomap_dio *dio)
if (WARN_ON_ONCE(!inline_data))
return -EIO;
- if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap)))
- return -EIO;
-
if (dio->flags & IOMAP_DIO_WRITE) {
loff_t size = iomi->inode->i_size;
diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c
index c04796f6e57fa..e4a29829591a7 100644
--- a/fs/iomap/iter.c
+++ b/fs/iomap/iter.c
@@ -6,17 +6,13 @@
#include <linux/iomap.h>
#include "trace.h"
-static inline void iomap_iter_reset_iomap(struct iomap_iter *iter)
+static inline void iomap_iter_clean_fbatch(struct iomap_iter *iter)
{
if (iter->iomap.flags & IOMAP_F_FOLIO_BATCH) {
folio_batch_release(iter->fbatch);
folio_batch_reinit(iter->fbatch);
iter->iomap.flags &= ~IOMAP_F_FOLIO_BATCH;
}
-
- iter->status = 0;
- memset(&iter->iomap, 0, sizeof(iter->iomap));
- memset(&iter->srcmap, 0, sizeof(iter->srcmap));
}
/* Advance the current iterator position and decrement the remaining length */
@@ -102,10 +98,14 @@ int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops)
ret = 0;
else
ret = 1;
- iomap_iter_reset_iomap(iter);
+ iomap_iter_clean_fbatch(iter);
+ iter->status = 0;
if (ret <= 0)
return ret;
+ memset(&iter->iomap, 0, sizeof(iter->iomap));
+ memset(&iter->srcmap, 0, sizeof(iter->srcmap));
+
begin:
ret = ops->iomap_begin(iter->inode, iter->pos, iter->len, iter->flags,
&iter->iomap, &iter->srcmap);