fs/exfat/iomap.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * iomap callack functions
 *
 * Copyright (C) 2026 Namjae Jeon <linkinjeon@kernel.org>
 */

#include <linux/iomap.h>
#include <linux/pagemap.h>

#include "exfat_raw.h"
#include "exfat_fs.h"
#include "iomap.h"

/*
 * exfat_file_write_dio_end_io - Direct I/O write completion handler
 *
 * Updates i_size if the write extended the file. Called from the dio layer
 * after I/O completion.
 */
static int exfat_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
		int error, unsigned int flags)
{
	struct inode *inode = file_inode(iocb->ki_filp);

	if (error)
		return error;

	if (size && i_size_read(inode) < iocb->ki_pos + size) {
		i_size_write(inode, iocb->ki_pos + size);
		mark_inode_dirty(inode);
	}

	return 0;
}

const struct iomap_dio_ops exfat_write_dio_ops = {
	.end_io		= exfat_file_write_dio_end_io,
};

static int __exfat_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
		unsigned int flags, struct iomap *iomap, bool may_alloc)
{
	struct super_block *sb = inode->i_sb;
	struct exfat_sb_info *sbi = EXFAT_SB(sb);
	struct exfat_inode_info *ei = EXFAT_I(inode);
	unsigned int cluster, num_clusters;
	loff_t cluster_offset, cluster_length;
	int err;
	bool balloc = false;

	if (!may_alloc) {
		/* Completely beyond EOF. Treat as hole */
		if (i_size_read(inode) <= offset) {
			iomap->type = IOMAP_HOLE;
			iomap->addr = IOMAP_NULL_ADDR;
			iomap->offset = offset;
			iomap->length = length;
			return 0;
		}

		/* Clamp length if the requested range goes beyond i_size */
		if (offset + length > i_size_read(inode))
			length = round_up(i_size_read(inode),
					  i_blocksize(inode)) - offset;
	}

	num_clusters = exfat_bytes_to_cluster_round_up(sbi,
			offset + length) - exfat_bytes_to_cluster(sbi, offset);

	mutex_lock(&sbi->s_lock);
	iomap->bdev = inode->i_sb->s_bdev;
	iomap->offset = offset;

	err = exfat_map_cluster(inode, exfat_bytes_to_cluster(sbi, offset),
			&cluster, &num_clusters, may_alloc, &balloc);
	if (err)
		goto out;

	cluster_offset = exfat_cluster_offset(sbi, offset);
	cluster_length = exfat_cluster_to_bytes(sbi, num_clusters);

	iomap->length = min_t(loff_t, length, cluster_length - cluster_offset);
	iomap->addr = exfat_cluster_to_phys_bytes(sbi, cluster) + cluster_offset;
	iomap->type = IOMAP_MAPPED;
	iomap->flags = IOMAP_F_MERGED;

	if (may_alloc || flags & IOMAP_ZERO) {
		if (balloc)
			iomap->flags |= IOMAP_F_NEW;
		else if (iomap->offset + iomap->length >= ei->valid_size) {
			/*
			 * This is a write that starts at or extends beyond
			 * the current valid_size. The region between the old
			 * valid_size and the end of this write needs to be
			 * zeroed in the page cache to prevent stale data
			 * exposure (see IOMAP_F_ZERO_TAIL handling in
			 * __iomap_write_begin()).
			 */
			iomap->flags |= IOMAP_F_ZERO_TAIL;
		}
	} else {
		/*
		 * valid_size is tracked in byte granularity and
		 * marks the exact boundary between valid data and
		 * holes (or unwritten space).
		 *
		 * When IOMAP_REPORT is set (used by lseek(SEEK_HOLE)
		 * and SEEK_DATA), we return IOMAP_HOLE. This allows
		 * iomap_seek_hole_iter() to directly return the
		 * precise byte position.
		 *
		 * For normal I/O paths (without IOMAP_REPORT) we
		 * return IOMAP_UNWRITTEN so the write path can
		 * distinguish it from a real hole.
		 */
		if (offset >= ei->valid_size) {
			iomap->type = flags & IOMAP_REPORT ?
				IOMAP_HOLE : IOMAP_UNWRITTEN;
		} else if (offset + iomap->length > ei->valid_size) {
			if (flags & IOMAP_REPORT) {
				/*
				 * For SEEK_HOLE/SEEK_DATA, clip the length
				 * to the exact byte boundary (valid_size).
				 * This ensures the caller gets the precise
				 * hole position in byte units.
				 */
				iomap->length = ei->valid_size - iomap->offset;
			} else
				iomap->length = round_up(ei->valid_size,
							 i_blocksize(inode)) -
								iomap->offset;
		}
	}

	iomap->flags |= IOMAP_F_MERGED;
out:
	mutex_unlock(&sbi->s_lock);
	return err;
}

static int exfat_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
{
	return __exfat_iomap_begin(inode, offset, length, flags, iomap, false);
}

static int exfat_write_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
{
	return __exfat_iomap_begin(inode, offset, length, flags, iomap, true);
}

const struct iomap_ops exfat_iomap_ops = {
	.iomap_begin = exfat_iomap_begin,
};

/*
 * exfat_write_iomap_end - Update the state after write
 *
 * Extends ->valid_size to cover the newly written range.
 * Marks the inode dirty if metadata was changed.
 */
static int exfat_write_iomap_end(struct inode *inode, loff_t pos, loff_t length,
		ssize_t written, unsigned int flags, struct iomap *iomap)
{
	struct exfat_inode_info *ei = EXFAT_I(inode);
	bool dirtied = false;
	loff_t end;

	if (!written)
		return 0;

	end = pos + written;

	if (ei->valid_size < end) {
		ei->valid_size = end;
		if (ei->zeroed_size < end)
			ei->zeroed_size = end;
		dirtied = true;
	}

	if (dirtied || iomap->flags & IOMAP_F_SIZE_CHANGED)
		mark_inode_dirty(inode);

	return written;
}

const struct iomap_ops exfat_write_iomap_ops = {
	.iomap_begin	= exfat_write_iomap_begin,
	.iomap_end	= exfat_write_iomap_end,
};

/*
 * exfat_writeback_range - Map folio during writeback
 *
 * Called for each folio during writeback. If the folio falls outside the
 * current iomap, remaps by calling read_iomap_begin.
 */
static ssize_t exfat_writeback_range(struct iomap_writepage_ctx *wpc,
		struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
{
	if (offset < wpc->iomap.offset ||
	    offset >= wpc->iomap.offset + wpc->iomap.length) {
		int error;

		error = __exfat_iomap_begin(wpc->inode, offset, len,
				0, &wpc->iomap, false);
		if (error)
			return error;
	}

	return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
}

const struct iomap_writeback_ops exfat_writeback_ops = {
	.writeback_range	= exfat_writeback_range,
	.writeback_submit	= iomap_ioend_writeback_submit,
};

/**
 * exfat_iomap_read_end_io - iomap read bio completion handler for exFAT
 * @bio: bio that has completed reading
 *
 * exfat_iomap_begin() rounds up MAPPED extents to the block boundary of
 * valid_size. This ensures that any subsequent blocks are treated as
 * IOMAP_UNWRITTEN, but it also causes the "straddle block" containing
 * valid_size to be read from disk. The disk data beyond valid_size in
 * this block is stale and must be zeroed to prevent data leakage.
 */
static void exfat_iomap_read_end_io(struct bio *bio)
{
	int error = blk_status_to_errno(bio->bi_status);
	struct folio_iter iter;

	bio_for_each_folio_all(iter, bio) {
		struct folio *folio = iter.folio;
		struct exfat_inode_info *ei = EXFAT_I(folio->mapping->host);
		s64 valid_size;
		loff_t pos = folio_pos(folio);

		valid_size = ei->valid_size;
		if (pos + iter.offset < valid_size &&
		    pos + iter.offset + iter.length > valid_size)
			folio_zero_segment(folio, offset_in_folio(folio, valid_size),
					   iter.offset + iter.length);

		iomap_finish_folio_read(folio, iter.offset, iter.length, error);
	}
	bio_put(bio);
}

static void exfat_iomap_bio_submit_read(const struct iomap_iter *iter,
		struct iomap_read_folio_ctx *ctx)
{
	struct bio *bio = ctx->read_ctx;

	bio->bi_end_io = exfat_iomap_read_end_io;
	submit_bio(bio);
}

const struct iomap_read_ops exfat_iomap_bio_read_ops = {
	.read_folio_range	= iomap_bio_read_folio_range,
	.submit_read		= exfat_iomap_bio_submit_read,
};