From e3f705d0c461689fcfd36e405c61df39d9a83e0b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 13 Jul 2022 20:58:28 -0500 Subject: [PATCH 1/4] mkfs: terminate getsubopt arrays properly Having not drank any (or maybe too much) coffee this morning, I typed: $ mkfs.xfs -d agcount=3 -d nrext64=0 Segmentation fault I traced this down to getsubopt walking off the end of the dopts.subopts array. The manpage says you're supposed to terminate the suboptions string array with a NULL entry, but the structure definition uses MAX_SUBOPTS/D_MAX_OPTS directly, which means there is no terminator. Explicitly terminate each suboption array with a NULL entry after making room for it. Signed-off-by: Darrick J. Wong [sandeen: explicitly add NULL terminators & clarify comment] Reviewed-by: Eric Sandeen Signed-off-by: Eric Sandeen Signed-off-by: Ferry Meng Reviewed-by: Joseph Qi --- mkfs/xfs_mkfs.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c index 36360ba7..37747698 100644 --- a/mkfs/xfs_mkfs.c +++ b/mkfs/xfs_mkfs.c @@ -122,8 +122,11 @@ enum { M_MAX_OPTS, }; -/* Just define the max options array size manually right now */ -#define MAX_SUBOPTS D_MAX_OPTS +/* + * Just define the max options array size manually to the largest + * enum right now, leaving room for a NULL terminator at the end + */ +#define MAX_SUBOPTS (D_MAX_OPTS + 1) #define SUBOPT_NEEDS_VAL (-1LL) #define MAX_CONFLICTS 8 @@ -224,6 +227,7 @@ static struct opt_params bopts = { .name = 'b', .subopts = { [B_SIZE] = "size", + [B_MAX_OPTS] = NULL, }, .subopt_params = { { .index = B_SIZE, @@ -255,6 +259,7 @@ static struct opt_params dopts = { [D_PROJINHERIT] = "projinherit", [D_EXTSZINHERIT] = "extszinherit", [D_COWEXTSIZE] = "cowextsize", + [D_MAX_OPTS] = NULL, }, .subopt_params = { { .index = D_AGCOUNT, @@ -384,6 +389,7 @@ static struct opt_params iopts = { [I_ATTR] = "attr", [I_PROJID32BIT] = "projid32bit", [I_SPINODES] = "sparse", + [I_MAX_OPTS] = NULL, }, .subopt_params = { { .index = I_ALIGN, @@ -449,6 +455,7 @@ static struct opt_params lopts = { [L_FILE] = "file", [L_NAME] = "name", [L_LAZYSBCNTR] = "lazy-count", + [L_MAX_OPTS] = NULL, }, .subopt_params = { { .index = L_AGNUM, @@ -540,6 +547,7 @@ static struct opt_params nopts = { [N_SIZE] = "size", [N_VERSION] = "version", [N_FTYPE] = "ftype", + [N_MAX_OPTS] = NULL, }, .subopt_params = { { .index = N_SIZE, @@ -574,6 +582,7 @@ static struct opt_params ropts = { [R_FILE] = "file", [R_NAME] = "name", [R_NOALIGN] = "noalign", + [R_MAX_OPTS] = NULL, }, .subopt_params = { { .index = R_EXTSIZE, @@ -620,6 +629,7 @@ static struct opt_params sopts = { .subopts = { [S_SIZE] = "size", [S_SECTSIZE] = "sectsize", + [S_MAX_OPTS] = NULL, }, .subopt_params = { { .index = S_SIZE, @@ -655,6 +665,7 @@ static struct opt_params mopts = { [M_REFLINK] = "reflink", [M_INOBTCNT] = "inobtcount", [M_BIGTIME] = "bigtime", + [M_MAX_OPTS] = NULL, }, .subopt_params = { { .index = M_CRC, -- Gitee From a4b3ccd82a079b63df1630482eaf26c0d4bdf4bc Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 13 Jul 2022 20:58:25 -0500 Subject: [PATCH 2/4] xfs_repair: ignore empty xattr leaf blocks As detailed in the commit: 5e572d1a xfs: empty xattr leaf header blocks are not corruption empty xattr leaf blocks can be the benign byproduct of the system going down during the multi-step process of adding a large xattr to a file that has no xattrs. If we find one at attr fork offset 0, we should clear it, but this isn't a corruption. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Eric Sandeen Signed-off-by: Ferry Meng Reviewed-by: Joseph Qi --- repair/attr_repair.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/repair/attr_repair.c b/repair/attr_repair.c index 5ad81c09..8cd9e2de 100644 --- a/repair/attr_repair.c +++ b/repair/attr_repair.c @@ -584,6 +584,26 @@ process_leaf_attr_block( firstb = mp->m_sb.sb_blocksize; stop = xfs_attr3_leaf_hdr_size(leaf); + /* + * Empty leaf blocks at offset zero can occur as a race between + * setxattr and the system going down, so we only take action if we're + * running in modify mode. See xfs_attr3_leaf_verify for details of + * how we've screwed this up many times. + */ + if (!leafhdr.count && da_bno == 0) { + if (no_modify) { + do_log( + _("would clear empty leaf attr block 0, inode %" PRIu64 "\n"), + ino); + return 0; + } + + do_warn( + _("will clear empty leaf attr block 0, inode %" PRIu64 "\n"), + ino); + return 1; + } + /* does the count look sorta valid? */ if (!leafhdr.count || leafhdr.count * sizeof(xfs_attr_leaf_entry_t) + stop > -- Gitee From 65e52d802372dc59d64c719997a7cc05782934e9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 4 Apr 2022 11:29:34 -0700 Subject: [PATCH 3/4] mkfs: fix missing validation of -l size against maximum internal log size If a sysadmin specifies a log size explicitly, we don't actually check that against the maximum internal log size that we compute for the default log size computation. We're going to add more validation soon, so refactor the max internal log blocks into a common variable and add a check. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Ferry Meng Reviewed-by: Joseph Qi --- mkfs/xfs_mkfs.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c index 37747698..c9130805 100644 --- a/mkfs/xfs_mkfs.c +++ b/mkfs/xfs_mkfs.c @@ -3184,6 +3184,7 @@ calculate_log_size( { struct xfs_sb *sbp = &mp->m_sb; int min_logblocks; + int max_logblocks; /* absolute max for this AG */ struct xfs_mount mount; /* we need a temporary mount to calculate the minimum log size. */ @@ -3223,6 +3224,19 @@ _("external log device %lld too small, must be at least %lld blocks\n"), return; } + /* + * Make sure the log fits wholly within an AG + * + * XXX: If agf->freeblks ends up as 0 because the log uses all + * the free space, it causes the kernel all sorts of problems + * with per-ag reservations. Right now just back it off one + * block, but there's a whole can of worms here that needs to be + * opened to decide what is the valid maximum size of a log in + * an AG. + */ + max_logblocks = libxfs_alloc_ag_max_usable(mp) - 1; + + /* internal log - if no size specified, calculate automatically */ if (!cfg->logblocks) { if (cfg->dblocks < GIGABYTES(1, cfg->blocklog)) { @@ -3248,21 +3262,10 @@ _("external log device %lld too small, must be at least %lld blocks\n"), cfg->logblocks = cfg->logblocks >> cfg->blocklog; } - /* Ensure the chosen size meets minimum log size requirements */ + /* Ensure the chosen size fits within log size requirements */ cfg->logblocks = max(min_logblocks, cfg->logblocks); - /* - * Make sure the log fits wholly within an AG - * - * XXX: If agf->freeblks ends up as 0 because the log uses all - * the free space, it causes the kernel all sorts of problems - * with per-ag reservations. Right now just back it off one - * block, but there's a whole can of worms here that needs to be - * opened to decide what is the valid maximum size of a log in - * an AG. - */ - cfg->logblocks = min(cfg->logblocks, - libxfs_alloc_ag_max_usable(mp) - 1); + cfg->logblocks = min(cfg->logblocks, max_logblocks); /* and now clamp the size to the maximum supported size */ cfg->logblocks = min(cfg->logblocks, XFS_MAX_LOG_BLOCKS); @@ -3270,6 +3273,13 @@ _("external log device %lld too small, must be at least %lld blocks\n"), cfg->logblocks = XFS_MAX_LOG_BYTES >> cfg->blocklog; validate_log_size(cfg->logblocks, cfg->blocklog, min_logblocks); + } else if (cfg->logblocks > max_logblocks) { + /* check specified log size */ + fprintf(stderr, +_("internal log size %lld too large, must be less than %d\n"), + (long long)cfg->logblocks, + max_logblocks); + usage(); } if (cfg->logblocks > sbp->sb_agblocks - libxfs_prealloc_blocks(mp)) { -- Gitee From 92f6722af80cee37ddc5aca6955c5fd31ed81322 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 11 Mar 2022 17:32:02 -0800 Subject: [PATCH 4/4] mkfs: don't let internal logs bump the root dir inode chunk to AG 1 Currently, we don't let an internal log consume every last block in an AG. According to the comment, we're doing this to avoid tripping AGF verifiers if freeblks==0, but on a modern filesystem this isn't sufficient to avoid problems because we need to have enough space in the AG to allocate an aligned root inode chunk, if it should be the case that the log also ends up in AG 0: $ truncate -s 6366g /tmp/a ; mkfs.xfs -f /tmp/a -d agcount=3200 -l agnum=0 meta-data=/tmp/a isize=512 agcount=3200, agsize=521503 blks = sectsz=512 attr=2, projid32bit=1 = crc=1 finobt=1, sparse=1, rmapbt=0 = reflink=1 bigtime=0 inobtcount=0 data = bsize=4096 blocks=1668808704, imaxpct=5 = sunit=0 swidth=0 blks naming =version 2 bsize=4096 ascii-ci=0, ftype=1 log =internal log bsize=4096 blocks=521492, version=2 = sectsz=512 sunit=0 blks, lazy-count=1 realtime =none extsz=4096 blocks=0, rtextents=0 mkfs.xfs: root inode created in AG 1, not AG 0 Therefore, modify the maximum internal log size calculation to constrain the maximum internal log size so that the aligned inode chunk allocation will always succeed. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Ferry Meng Reviewed-by: Joseph Qi --- mkfs/xfs_mkfs.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c index c9130805..6d0109a8 100644 --- a/mkfs/xfs_mkfs.c +++ b/mkfs/xfs_mkfs.c @@ -3176,6 +3176,49 @@ validate_log_size(uint64_t logblocks, int blocklog, int min_logblocks) } } +static void +adjust_ag0_internal_logblocks( + struct mkfs_params *cfg, + struct xfs_mount *mp, + int min_logblocks, + int *max_logblocks) +{ + int backoff = 0; + int ichunk_blocks; + + /* + * mkfs will trip over the write verifiers if the log is allocated in + * AG 0 and consumes enough space that we cannot allocate a non-sparse + * inode chunk for the root directory. The inode allocator requires + * that the AG have enough free space for the chunk itself plus enough + * to fix up the freelist with aligned blocks if we need to fill the + * allocation from the AGFL. + */ + ichunk_blocks = XFS_INODES_PER_CHUNK * cfg->inodesize >> cfg->blocklog; + backoff = ichunk_blocks * 4; + + /* + * We try to align inode allocations to the data device stripe unit, + * so ensure there's enough space to perform an aligned allocation. + * The inode geometry structure isn't set up yet, so compute this by + * hand. + */ + backoff = max(backoff, cfg->dsunit * 2); + + *max_logblocks -= backoff; + + /* If the specified log size is too big, complain. */ + if (cli_opt_set(&lopts, L_SIZE) && cfg->logblocks > *max_logblocks) { + fprintf(stderr, +_("internal log size %lld too large, must be less than %d\n"), + (long long)cfg->logblocks, + *max_logblocks); + usage(); + } + + cfg->logblocks = min(cfg->logblocks, *max_logblocks); +} + static void calculate_log_size( struct mkfs_params *cfg, @@ -3301,6 +3344,10 @@ _("log ag number %lld too large, must be less than %lld\n"), } else cfg->logagno = (xfs_agnumber_t)(sbp->sb_agcount / 2); + if (cfg->logagno == 0) + adjust_ag0_internal_logblocks(cfg, mp, min_logblocks, + &max_logblocks); + cfg->logstart = XFS_AGB_TO_FSB(mp, cfg->logagno, libxfs_prealloc_blocks(mp)); -- Gitee