diff options
| author | Carlos Maiolino <cem@kernel.org> | 2025-01-13 14:54:33 +0100 |
|---|---|---|
| committer | Carlos Maiolino <cem@kernel.org> | 2025-01-13 14:54:33 +0100 |
| commit | 8a092f440e0326a74a008566fef794fc891bdeda (patch) | |
| tree | 3f8911072d1f942212f71a309d38ef789dd92efe /fs/xfs | |
| parent | 9a2ce7254c1ee252c9c7aa2d7d71b45ee31c862a (diff) | |
| parent | 05290bd5c6236b8ad659157edb36bd2d38f46d3e (diff) | |
| download | ath-8a092f440e0326a74a008566fef794fc891bdeda.tar.gz | |
Merge tag 'reserve-rt-metadata-space_2024-12-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into for-next
xfs: enable in-core block reservation for rt metadata [v6.2 03/14]
In preparation for adding reverse mapping and refcounting to the
realtime device, enhance the metadir code to reserve free space for
btree shape changes as delayed allocation blocks.
This enables us to pre-allocate space for the rmap and refcount btrees
in the same manner as we do for the data device counterparts, which is
how we avoid ENOSPC failures when space is low but we've already
committed to a COW operation.
This has been running on the djcloud for months with no problems. Enjoy!
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Diffstat (limited to 'fs/xfs')
| -rw-r--r-- | fs/xfs/libxfs/xfs_ag_resv.c | 3 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_attr.c | 4 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 4 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_errortag.h | 4 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_metadir.c | 4 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_metafile.c | 205 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_metafile.h | 11 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_types.h | 7 | ||||
| -rw-r--r-- | fs/xfs/scrub/tempfile.c | 1 | ||||
| -rw-r--r-- | fs/xfs/xfs_dquot.h | 3 | ||||
| -rw-r--r-- | fs/xfs/xfs_error.c | 3 | ||||
| -rw-r--r-- | fs/xfs/xfs_exchrange.c | 3 | ||||
| -rw-r--r-- | fs/xfs/xfs_fsops.c | 17 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.h | 16 | ||||
| -rw-r--r-- | fs/xfs/xfs_mount.c | 10 | ||||
| -rw-r--r-- | fs/xfs/xfs_mount.h | 1 | ||||
| -rw-r--r-- | fs/xfs/xfs_qm.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_quota.h | 5 | ||||
| -rw-r--r-- | fs/xfs/xfs_rtalloc.c | 21 | ||||
| -rw-r--r-- | fs/xfs/xfs_rtalloc.h | 5 | ||||
| -rw-r--r-- | fs/xfs/xfs_trace.h | 45 | ||||
| -rw-r--r-- | fs/xfs/xfs_trans.c | 4 | ||||
| -rw-r--r-- | fs/xfs/xfs_trans_dquot.c | 8 |
23 files changed, 367 insertions, 19 deletions
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index f5d853089019f..fb79215a509d2 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -114,6 +114,7 @@ xfs_ag_resv_needed( case XFS_AG_RESV_RMAPBT: len -= xfs_perag_resv(pag, type)->ar_reserved; break; + case XFS_AG_RESV_METAFILE: case XFS_AG_RESV_NONE: /* empty */ break; @@ -347,6 +348,7 @@ xfs_ag_resv_alloc_extent( switch (type) { case XFS_AG_RESV_AGFL: + case XFS_AG_RESV_METAFILE: return; case XFS_AG_RESV_METADATA: case XFS_AG_RESV_RMAPBT: @@ -389,6 +391,7 @@ xfs_ag_resv_free_extent( switch (type) { case XFS_AG_RESV_AGFL: + case XFS_AG_RESV_METAFILE: return; case XFS_AG_RESV_METADATA: case XFS_AG_RESV_RMAPBT: diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 17875ad865f5d..8c04acd30d489 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1004,9 +1004,7 @@ xfs_attr_add_fork( unsigned int blks; /* space reservation */ int error; /* error return value */ - if (xfs_is_metadir_inode(ip)) - ASSERT(XFS_IS_DQDETACHED(ip)); - else + if (!xfs_is_metadir_inode(ip)) ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); blks = XFS_ADDAFORK_SPACE_RES(mp); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 0842577755f7b..02323936cc9b2 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1042,9 +1042,7 @@ xfs_bmap_add_attrfork( int error; /* error return value */ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); - if (xfs_is_metadir_inode(ip)) - ASSERT(XFS_IS_DQDETACHED(ip)); - else + if (!xfs_is_metadir_inode(ip)) ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); ASSERT(!xfs_inode_has_attr_fork(ip)); diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index 7002d7676a788..a53c5d40e084d 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -64,7 +64,8 @@ #define XFS_ERRTAG_WB_DELAY_MS 42 #define XFS_ERRTAG_WRITE_DELAY_MS 43 #define XFS_ERRTAG_EXCHMAPS_FINISH_ONE 44 -#define XFS_ERRTAG_MAX 45 +#define XFS_ERRTAG_METAFILE_RESV_CRITICAL 45 +#define XFS_ERRTAG_MAX 46 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -113,5 +114,6 @@ #define XFS_RANDOM_WB_DELAY_MS 3000 #define XFS_RANDOM_WRITE_DELAY_MS 3000 #define XFS_RANDOM_EXCHMAPS_FINISH_ONE 1 +#define XFS_RANDOM_METAFILE_RESV_CRITICAL 4 #endif /* __XFS_ERRORTAG_H_ */ diff --git a/fs/xfs/libxfs/xfs_metadir.c b/fs/xfs/libxfs/xfs_metadir.c index bae7377c0f228..178e89711cb7c 100644 --- a/fs/xfs/libxfs/xfs_metadir.c +++ b/fs/xfs/libxfs/xfs_metadir.c @@ -29,6 +29,10 @@ #include "xfs_dir2_priv.h" #include "xfs_parent.h" #include "xfs_health.h" +#include "xfs_errortag.h" +#include "xfs_error.h" +#include "xfs_btree.h" +#include "xfs_alloc.h" /* * Metadata Directory Tree diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c index adeb25d1a444c..e151663cc9efd 100644 --- a/fs/xfs/libxfs/xfs_metafile.c +++ b/fs/xfs/libxfs/xfs_metafile.c @@ -17,6 +17,10 @@ #include "xfs_metafile.h" #include "xfs_trace.h" #include "xfs_inode.h" +#include "xfs_quota.h" +#include "xfs_errortag.h" +#include "xfs_error.h" +#include "xfs_alloc.h" /* Set up an inode to be recognized as a metadata directory inode. */ void @@ -50,3 +54,204 @@ xfs_metafile_clear_iflag( ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } + +/* + * Is the amount of space that could be allocated towards a given metadata + * file at or beneath a certain threshold? + */ +static inline bool +xfs_metafile_resv_can_cover( + struct xfs_inode *ip, + int64_t rhs) +{ + /* + * The amount of space that can be allocated to this metadata file is + * the remaining reservation for the particular metadata file + the + * global free block count. Take care of the first case to avoid + * touching the per-cpu counter. + */ + if (ip->i_delayed_blks >= rhs) + return true; + + /* + * There aren't enough blocks left in the inode's reservation, but it + * isn't critical unless there also isn't enough free space. + */ + return __percpu_counter_compare(&ip->i_mount->m_fdblocks, + rhs - ip->i_delayed_blks, 2048) >= 0; +} + +/* + * Is this metadata file critically low on blocks? For now we'll define that + * as the number of blocks we can get our hands on being less than 10% of what + * we reserved or less than some arbitrary number (maximum btree height). + */ +bool +xfs_metafile_resv_critical( + struct xfs_inode *ip) +{ + uint64_t asked_low_water; + + if (!ip) + return false; + + ASSERT(xfs_is_metadir_inode(ip)); + trace_xfs_metafile_resv_critical(ip, 0); + + if (!xfs_metafile_resv_can_cover(ip, ip->i_mount->m_rtbtree_maxlevels)) + return true; + + asked_low_water = div_u64(ip->i_meta_resv_asked, 10); + if (!xfs_metafile_resv_can_cover(ip, asked_low_water)) + return true; + + return XFS_TEST_ERROR(false, ip->i_mount, + XFS_ERRTAG_METAFILE_RESV_CRITICAL); +} + +/* Allocate a block from the metadata file's reservation. */ +void +xfs_metafile_resv_alloc_space( + struct xfs_inode *ip, + struct xfs_alloc_arg *args) +{ + int64_t len = args->len; + + ASSERT(xfs_is_metadir_inode(ip)); + ASSERT(args->resv == XFS_AG_RESV_METAFILE); + + trace_xfs_metafile_resv_alloc_space(ip, args->len); + + /* + * Allocate the blocks from the metadata inode's block reservation + * and update the ondisk sb counter. + */ + if (ip->i_delayed_blks > 0) { + int64_t from_resv; + + from_resv = min_t(int64_t, len, ip->i_delayed_blks); + ip->i_delayed_blks -= from_resv; + xfs_mod_delalloc(ip, 0, -from_resv); + xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, + -from_resv); + len -= from_resv; + } + + /* + * Any allocation in excess of the reservation requires in-core and + * on-disk fdblocks updates. If we can grab @len blocks from the + * in-core fdblocks then all we need to do is update the on-disk + * superblock; if not, then try to steal some from the transaction's + * block reservation. Overruns are only expected for rmap btrees. + */ + if (len) { + unsigned int field; + int error; + + error = xfs_dec_fdblocks(ip->i_mount, len, true); + if (error) + field = XFS_TRANS_SB_FDBLOCKS; + else + field = XFS_TRANS_SB_RES_FDBLOCKS; + + xfs_trans_mod_sb(args->tp, field, -len); + } + + ip->i_nblocks += args->len; + xfs_trans_log_inode(args->tp, ip, XFS_ILOG_CORE); +} + +/* Free a block to the metadata file's reservation. */ +void +xfs_metafile_resv_free_space( + struct xfs_inode *ip, + struct xfs_trans *tp, + xfs_filblks_t len) +{ + int64_t to_resv; + + ASSERT(xfs_is_metadir_inode(ip)); + trace_xfs_metafile_resv_free_space(ip, len); + + ip->i_nblocks -= len; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + /* + * Add the freed blocks back into the inode's delalloc reservation + * until it reaches the maximum size. Update the ondisk fdblocks only. + */ + to_resv = ip->i_meta_resv_asked - (ip->i_nblocks + ip->i_delayed_blks); + if (to_resv > 0) { + to_resv = min_t(int64_t, to_resv, len); + ip->i_delayed_blks += to_resv; + xfs_mod_delalloc(ip, 0, to_resv); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, to_resv); + len -= to_resv; + } + + /* + * Everything else goes back to the filesystem, so update the in-core + * and on-disk counters. + */ + if (len) + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len); +} + +/* Release a metadata file's space reservation. */ +void +xfs_metafile_resv_free( + struct xfs_inode *ip) +{ + /* Non-btree metadata inodes don't need space reservations. */ + if (!ip || !ip->i_meta_resv_asked) + return; + + ASSERT(xfs_is_metadir_inode(ip)); + trace_xfs_metafile_resv_free(ip, 0); + + if (ip->i_delayed_blks) { + xfs_mod_delalloc(ip, 0, -ip->i_delayed_blks); + xfs_add_fdblocks(ip->i_mount, ip->i_delayed_blks); + ip->i_delayed_blks = 0; + } + ip->i_meta_resv_asked = 0; +} + +/* Set up a metadata file's space reservation. */ +int +xfs_metafile_resv_init( + struct xfs_inode *ip, + xfs_filblks_t ask) +{ + xfs_filblks_t hidden_space; + xfs_filblks_t used; + int error; + + if (!ip || ip->i_meta_resv_asked > 0) + return 0; + + ASSERT(xfs_is_metadir_inode(ip)); + + /* + * Space taken by all other metadata btrees are accounted on-disk as + * used space. We therefore only hide the space that is reserved but + * not used by the trees. + */ + used = ip->i_nblocks; + if (used > ask) + ask = used; + hidden_space = ask - used; + + error = xfs_dec_fdblocks(ip->i_mount, hidden_space, true); + if (error) { + trace_xfs_metafile_resv_init_error(ip, error, _RET_IP_); + return error; + } + + xfs_mod_delalloc(ip, 0, hidden_space); + ip->i_delayed_blks = hidden_space; + ip->i_meta_resv_asked = ask; + + trace_xfs_metafile_resv_init(ip, ask); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_metafile.h b/fs/xfs/libxfs/xfs_metafile.h index acec400123db0..8d8f08a6071c2 100644 --- a/fs/xfs/libxfs/xfs_metafile.h +++ b/fs/xfs/libxfs/xfs_metafile.h @@ -21,6 +21,17 @@ void xfs_metafile_set_iflag(struct xfs_trans *tp, struct xfs_inode *ip, enum xfs_metafile_type metafile_type); void xfs_metafile_clear_iflag(struct xfs_trans *tp, struct xfs_inode *ip); +/* Space reservations for metadata inodes. */ +struct xfs_alloc_arg; + +bool xfs_metafile_resv_critical(struct xfs_inode *ip); +void xfs_metafile_resv_alloc_space(struct xfs_inode *ip, + struct xfs_alloc_arg *args); +void xfs_metafile_resv_free_space(struct xfs_inode *ip, struct xfs_trans *tp, + xfs_filblks_t len); +void xfs_metafile_resv_free(struct xfs_inode *ip); +int xfs_metafile_resv_init(struct xfs_inode *ip, xfs_filblks_t ask); + /* Code specific to kernel/userspace; must be provided externally. */ int xfs_trans_metafile_iget(struct xfs_trans *tp, xfs_ino_t ino, diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index bf33c2b1e43e5..ca2401c1facda 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -202,6 +202,13 @@ enum xfs_ag_resv_type { * altering fdblocks. If you think you need this you're wrong. */ XFS_AG_RESV_IGNORE, + + /* + * This allocation activity is being done on behalf of a metadata file. + * These files maintain their own permanent space reservations and are + * required to adjust fdblocks using the xfs_metafile_resv_* helpers. + */ + XFS_AG_RESV_METAFILE, }; /* Results of scanning a btree keyspace to check occupancy. */ diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index 2d7ca7e1bbca0..4ebb5f8459e8f 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -749,6 +749,7 @@ xrep_tempexch_reserve_quota( * or the two inodes have the same dquots. */ if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 || + xfs_is_metadir_inode(req->ip1) || (req->ip1->i_udquot == req->ip2->i_udquot && req->ip1->i_gdquot == req->ip2->i_gdquot && req->ip1->i_pdquot == req->ip2->i_pdquot)) diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index c617bac75361b..61217adf5ba55 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -160,6 +160,9 @@ static inline struct xfs_dquot *xfs_inode_dquot( struct xfs_inode *ip, xfs_dqtype_t type) { + if (xfs_is_metadir_inode(ip)) + return NULL; + switch (type) { case XFS_DQTYPE_USER: return ip->i_udquot; diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 78cdc5064a8c4..dbd87e1376943 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -63,6 +63,7 @@ static unsigned int xfs_errortag_random_default[] = { XFS_RANDOM_WB_DELAY_MS, XFS_RANDOM_WRITE_DELAY_MS, XFS_RANDOM_EXCHMAPS_FINISH_ONE, + XFS_RANDOM_METAFILE_RESV_CRITICAL, }; struct xfs_errortag_attr { @@ -181,6 +182,7 @@ XFS_ERRORTAG_ATTR_RW(attr_leaf_to_node, XFS_ERRTAG_ATTR_LEAF_TO_NODE); XFS_ERRORTAG_ATTR_RW(wb_delay_ms, XFS_ERRTAG_WB_DELAY_MS); XFS_ERRORTAG_ATTR_RW(write_delay_ms, XFS_ERRTAG_WRITE_DELAY_MS); XFS_ERRORTAG_ATTR_RW(exchmaps_finish_one, XFS_ERRTAG_EXCHMAPS_FINISH_ONE); +XFS_ERRORTAG_ATTR_RW(metafile_resv_crit, XFS_ERRTAG_METAFILE_RESV_CRITICAL); static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(noerror), @@ -227,6 +229,7 @@ static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(wb_delay_ms), XFS_ERRORTAG_ATTR_LIST(write_delay_ms), XFS_ERRORTAG_ATTR_LIST(exchmaps_finish_one), + XFS_ERRORTAG_ATTR_LIST(metafile_resv_crit), NULL, }; ATTRIBUTE_GROUPS(xfs_errortag); diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index 265c424498933..f340a2015c4c7 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -119,6 +119,9 @@ xfs_exchrange_reserve_quota( int ip1_error = 0; int error; + ASSERT(!xfs_is_metadir_inode(req->ip1)); + ASSERT(!xfs_is_metadir_inode(req->ip2)); + /* * Don't bother with a quota reservation if we're not enforcing them * or the two inodes have the same dquots. diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 28dde215c8995..e1145107d8cbd 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -21,6 +21,7 @@ #include "xfs_ag.h" #include "xfs_ag_resv.h" #include "xfs_trace.h" +#include "xfs_rtalloc.h" /* * Write new AG headers to disk. Non-transactional, but need to be @@ -541,6 +542,19 @@ xfs_fs_reserve_ag_blocks( xfs_warn(mp, "Error %d reserving per-AG metadata reserve pool.", error); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + + if (xfs_has_realtime(mp)) { + err2 = xfs_rt_resv_init(mp); + if (err2 && err2 != -ENOSPC) { + xfs_warn(mp, + "Error %d reserving realtime metadata reserve pool.", err2); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + } + + if (err2 && !error) + error = err2; } return error; @@ -555,6 +569,9 @@ xfs_fs_unreserve_ag_blocks( { struct xfs_perag *pag = NULL; + if (xfs_has_realtime(mp)) + xfs_rt_resv_free(mp); + while ((pag = xfs_perag_next(mp, pag))) xfs_ag_resv_free(pag); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1648dc5a80688..c08093a65352e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -25,9 +25,19 @@ struct xfs_dquot; typedef struct xfs_inode { /* Inode linking and identification information. */ struct xfs_mount *i_mount; /* fs mount struct ptr */ - struct xfs_dquot *i_udquot; /* user dquot */ - struct xfs_dquot *i_gdquot; /* group dquot */ - struct xfs_dquot *i_pdquot; /* project dquot */ + union { + struct { + struct xfs_dquot *i_udquot; /* user dquot */ + struct xfs_dquot *i_gdquot; /* group dquot */ + struct xfs_dquot *i_pdquot; /* project dquot */ + }; + + /* + * Space that has been set aside to accomodate expansions of a + * metadata btree rooted in this file. + */ + uint64_t i_meta_resv_asked; + }; /* Inode location stuff */ xfs_ino_t i_ino; /* inode number (agno/agino)*/ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5918f433dba75..97137126b16f5 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -650,6 +650,15 @@ xfs_agbtree_compute_maxlevels( mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels); } +/* Compute maximum possible height for realtime btree types for this fs. */ +static inline void +xfs_rtbtree_compute_maxlevels( + struct xfs_mount *mp) +{ + /* This will be filled in later. */ + mp->m_rtbtree_maxlevels = 0; +} + /* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct @@ -721,6 +730,7 @@ xfs_mountfs( xfs_refcountbt_compute_maxlevels(mp); xfs_agbtree_compute_maxlevels(mp); + xfs_rtbtree_compute_maxlevels(mp); /* * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index db9dade7d22a1..ddb9d19a3a3d5 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -165,6 +165,7 @@ typedef struct xfs_mount { uint m_rmap_maxlevels; /* max rmap btree levels */ uint m_refc_maxlevels; /* max refcount btree level */ unsigned int m_agbtree_maxlevels; /* max level of all AG btrees */ + unsigned int m_rtbtree_maxlevels; /* max level of all rt btrees */ xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ uint m_alloc_set_aside; /* space we can't use */ uint m_ag_max_usable; /* max space per AG */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index dc8b1010d4d33..3abab5fb593e3 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -428,6 +428,8 @@ void xfs_qm_dqdetach( xfs_inode_t *ip) { + if (xfs_is_metadir_inode(ip)) + return; if (!(ip->i_udquot || ip->i_gdquot || ip->i_pdquot)) return; diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index d7565462af3dc..105e6eb576201 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -29,11 +29,6 @@ struct xfs_buf; (XFS_IS_GQUOTA_ON(mp) && (ip)->i_gdquot == NULL) || \ (XFS_IS_PQUOTA_ON(mp) && (ip)->i_pdquot == NULL)) -#define XFS_IS_DQDETACHED(ip) \ - ((ip)->i_udquot == NULL && \ - (ip)->i_gdquot == NULL && \ - (ip)->i_pdquot == NULL) - #define XFS_QM_NEED_QUOTACHECK(mp) \ ((XFS_IS_UQUOTA_ON(mp) && \ (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index fcfa6e0eb3ad2..5128c5ad72f5d 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1344,6 +1344,12 @@ xfs_growfs_rt( if (!error) error = error2; + + /* Reset the rt metadata btree space reservations. */ + xfs_rt_resv_free(mp); + error2 = xfs_rt_resv_init(mp); + if (error2 && error2 != -ENOSPC) + error = error2; } out_unlock: @@ -1487,6 +1493,21 @@ xfs_rtalloc_reinit_frextents( return 0; } +/* Free space reservations for rt metadata inodes. */ +void +xfs_rt_resv_free( + struct xfs_mount *mp) +{ +} + +/* Reserve space for rt metadata inodes' space expansion. */ +int +xfs_rt_resv_init( + struct xfs_mount *mp) +{ + return 0; +} + /* * Read in the bmbt of an rt metadata inode so that we never have to load them * at runtime. This enables the use of shared ILOCKs for rtbitmap scans. Use diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 8e2a07b8174b7..d87523e6a5500 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -34,6 +34,9 @@ int /* error */ xfs_rtmount_inodes( struct xfs_mount *mp); /* file system mount structure */ +void xfs_rt_resv_free(struct xfs_mount *mp); +int xfs_rt_resv_init(struct xfs_mount *mp); + /* * Grow the realtime area of the filesystem. */ @@ -60,6 +63,8 @@ xfs_rtmount_init( } # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (-ENOSYS)) # define xfs_rtunmount_inodes(m) +# define xfs_rt_resv_free(mp) ((void)0) +# define xfs_rt_resv_init(mp) (0) #endif /* CONFIG_XFS_RT */ #endif /* __XFS_RTALLOC_H__ */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7b16cdd72e9dd..cbda663fe6e81 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -5574,6 +5574,51 @@ DEFINE_EVENT(xfs_metadir_class, name, \ TP_ARGS(dp, name, ino)) DEFINE_METADIR_EVENT(xfs_metadir_lookup); +/* metadata inode space reservations */ + +DECLARE_EVENT_CLASS(xfs_metafile_resv_class, + TP_PROTO(struct xfs_inode *ip, xfs_filblks_t len), + TP_ARGS(ip, len), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned long long, freeblks) + __field(unsigned long long, reserved) + __field(unsigned long long, asked) + __field(unsigned long long, used) + __field(unsigned long long, len) + ), + TP_fast_assign( + struct xfs_mount *mp = ip->i_mount; + + __entry->dev = mp->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->freeblks = percpu_counter_sum(&mp->m_fdblocks); + __entry->reserved = ip->i_delayed_blks; + __entry->asked = ip->i_meta_resv_asked; + __entry->used = ip->i_nblocks; + __entry->len = len; + ), + TP_printk("dev %d:%d ino 0x%llx freeblks %llu resv %llu ask %llu used %llu len %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->freeblks, + __entry->reserved, + __entry->asked, + __entry->used, + __entry->len) +) +#define DEFINE_METAFILE_RESV_EVENT(name) \ +DEFINE_EVENT(xfs_metafile_resv_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_filblks_t len), \ + TP_ARGS(ip, len)) +DEFINE_METAFILE_RESV_EVENT(xfs_metafile_resv_init); +DEFINE_METAFILE_RESV_EVENT(xfs_metafile_resv_free); +DEFINE_METAFILE_RESV_EVENT(xfs_metafile_resv_alloc_space); +DEFINE_METAFILE_RESV_EVENT(xfs_metafile_resv_free_space); +DEFINE_METAFILE_RESV_EVENT(xfs_metafile_resv_critical); +DEFINE_INODE_ERROR_EVENT(xfs_metafile_resv_init_error); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 4cd25717c9d13..f53f82456288e 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1266,6 +1266,9 @@ retry: xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + if (xfs_is_metadir_inode(ip)) + goto out; + error = xfs_qm_dqattach_locked(ip, false); if (error) { /* Caller should have allocated the dquots! */ @@ -1334,6 +1337,7 @@ retry: goto out_cancel; } +out: *tpp = tp; return 0; diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 713b6d243e563..765456bf34285 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -156,7 +156,8 @@ xfs_trans_mod_ino_dquot( unsigned int field, int64_t delta) { - ASSERT(!xfs_is_metadir_inode(ip) || XFS_IS_DQDETACHED(ip)); + if (xfs_is_metadir_inode(ip)) + return; xfs_trans_mod_dquot(tp, dqp, field, delta); @@ -246,11 +247,10 @@ xfs_trans_mod_dquot_byino( xfs_mount_t *mp = tp->t_mountp; if (!XFS_IS_QUOTA_ON(mp) || - xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) + xfs_is_quota_inode(&mp->m_sb, ip->i_ino) || + xfs_is_metadir_inode(ip)) return; - ASSERT(!xfs_is_metadir_inode(ip) || XFS_IS_DQDETACHED(ip)); - if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) xfs_trans_mod_ino_dquot(tp, ip, ip->i_udquot, field, delta); if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) |
