From e3f705d0c461689fcfd36e405c61df39d9a83e0b Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 13 Jul 2022 20:58:28 -0500
Subject: [PATCH 1/4] mkfs: terminate getsubopt arrays properly

Having not drank any (or maybe too much) coffee this morning, I typed:

$ mkfs.xfs -d agcount=3 -d nrext64=0
Segmentation fault

I traced this down to getsubopt walking off the end of the dopts.subopts
array.  The manpage says you're supposed to terminate the suboptions
string array with a NULL entry, but the structure definition uses
MAX_SUBOPTS/D_MAX_OPTS directly, which means there is no terminator.

Explicitly terminate each suboption array with a NULL entry after
making room for it.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
[sandeen: explicitly add NULL terminators & clarify comment]
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Ferry Meng <mengferry@linux.alibaba.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
 mkfs/xfs_mkfs.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index 36360ba7..37747698 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -122,8 +122,11 @@ enum {
 	M_MAX_OPTS,
 };
 
-/* Just define the max options array size manually right now */
-#define MAX_SUBOPTS	D_MAX_OPTS
+/*
+ * Just define the max options array size manually to the largest
+ * enum right now, leaving room for a NULL terminator at the end
+ */
+#define MAX_SUBOPTS	(D_MAX_OPTS + 1)
 
 #define SUBOPT_NEEDS_VAL	(-1LL)
 #define MAX_CONFLICTS	8
@@ -224,6 +227,7 @@ static struct opt_params bopts = {
 	.name = 'b',
 	.subopts = {
 		[B_SIZE] = "size",
+		[B_MAX_OPTS] = NULL,
 	},
 	.subopt_params = {
 		{ .index = B_SIZE,
@@ -255,6 +259,7 @@ static struct opt_params dopts = {
 		[D_PROJINHERIT] = "projinherit",
 		[D_EXTSZINHERIT] = "extszinherit",
 		[D_COWEXTSIZE] = "cowextsize",
+		[D_MAX_OPTS] = NULL,
 	},
 	.subopt_params = {
 		{ .index = D_AGCOUNT,
@@ -384,6 +389,7 @@ static struct opt_params iopts = {
 		[I_ATTR] = "attr",
 		[I_PROJID32BIT] = "projid32bit",
 		[I_SPINODES] = "sparse",
+		[I_MAX_OPTS] = NULL,
 	},
 	.subopt_params = {
 		{ .index = I_ALIGN,
@@ -449,6 +455,7 @@ static struct opt_params lopts = {
 		[L_FILE] = "file",
 		[L_NAME] = "name",
 		[L_LAZYSBCNTR] = "lazy-count",
+		[L_MAX_OPTS] = NULL,
 	},
 	.subopt_params = {
 		{ .index = L_AGNUM,
@@ -540,6 +547,7 @@ static struct opt_params nopts = {
 		[N_SIZE] = "size",
 		[N_VERSION] = "version",
 		[N_FTYPE] = "ftype",
+		[N_MAX_OPTS] = NULL,
 	},
 	.subopt_params = {
 		{ .index = N_SIZE,
@@ -574,6 +582,7 @@ static struct opt_params ropts = {
 		[R_FILE] = "file",
 		[R_NAME] = "name",
 		[R_NOALIGN] = "noalign",
+		[R_MAX_OPTS] = NULL,
 	},
 	.subopt_params = {
 		{ .index = R_EXTSIZE,
@@ -620,6 +629,7 @@ static struct opt_params sopts = {
 	.subopts = {
 		[S_SIZE] = "size",
 		[S_SECTSIZE] = "sectsize",
+		[S_MAX_OPTS] = NULL,
 	},
 	.subopt_params = {
 		{ .index = S_SIZE,
@@ -655,6 +665,7 @@ static struct opt_params mopts = {
 		[M_REFLINK] = "reflink",
 		[M_INOBTCNT] = "inobtcount",
 		[M_BIGTIME] = "bigtime",
+		[M_MAX_OPTS] = NULL,
 	},
 	.subopt_params = {
 		{ .index = M_CRC,
-- 
Gitee


From a4b3ccd82a079b63df1630482eaf26c0d4bdf4bc Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 13 Jul 2022 20:58:25 -0500
Subject: [PATCH 2/4] xfs_repair: ignore empty xattr leaf blocks

As detailed in the commit:

5e572d1a xfs: empty xattr leaf header blocks are not corruption

empty xattr leaf blocks can be the benign byproduct of the system
going down during the multi-step process of adding a large xattr
to a file that has no xattrs.  If we find one at attr fork offset 0,
we should clear it, but this isn't a corruption.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Ferry Meng <mengferry@linux.alibaba.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
 repair/attr_repair.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/repair/attr_repair.c b/repair/attr_repair.c
index 5ad81c09..8cd9e2de 100644
--- a/repair/attr_repair.c
+++ b/repair/attr_repair.c
@@ -584,6 +584,26 @@ process_leaf_attr_block(
 	firstb = mp->m_sb.sb_blocksize;
 	stop = xfs_attr3_leaf_hdr_size(leaf);
 
+	/*
+	 * Empty leaf blocks at offset zero can occur as a race between
+	 * setxattr and the system going down, so we only take action if we're
+	 * running in modify mode.  See xfs_attr3_leaf_verify for details of
+	 * how we've screwed this up many times.
+	 */
+	if (!leafhdr.count && da_bno == 0) {
+		if (no_modify) {
+			do_log(
+	_("would clear empty leaf attr block 0, inode %" PRIu64 "\n"),
+					ino);
+			return 0;
+		}
+
+		do_warn(
+	_("will clear empty leaf attr block 0, inode %" PRIu64 "\n"),
+				ino);
+		return 1;
+	}
+
 	/* does the count look sorta valid? */
 	if (!leafhdr.count ||
 	    leafhdr.count * sizeof(xfs_attr_leaf_entry_t) + stop >
-- 
Gitee


From 65e52d802372dc59d64c719997a7cc05782934e9 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 4 Apr 2022 11:29:34 -0700
Subject: [PATCH 3/4] mkfs: fix missing validation of -l size against maximum
 internal log size

If a sysadmin specifies a log size explicitly, we don't actually check
that against the maximum internal log size that we compute for the
default log size computation.  We're going to add more validation soon,
so refactor the max internal log blocks into a common variable and
add a check.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ferry Meng <mengferry@linux.alibaba.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
 mkfs/xfs_mkfs.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index 37747698..c9130805 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -3184,6 +3184,7 @@ calculate_log_size(
 {
 	struct xfs_sb		*sbp = &mp->m_sb;
 	int			min_logblocks;
+	int			max_logblocks;	/* absolute max for this AG */
 	struct xfs_mount	mount;
 
 	/* we need a temporary mount to calculate the minimum log size. */
@@ -3223,6 +3224,19 @@ _("external log device %lld too small, must be at least %lld blocks\n"),
 		return;
 	}
 
+	/*
+	 * Make sure the log fits wholly within an AG
+	 *
+	 * XXX: If agf->freeblks ends up as 0 because the log uses all
+	 * the free space, it causes the kernel all sorts of problems
+	 * with per-ag reservations. Right now just back it off one
+	 * block, but there's a whole can of worms here that needs to be
+	 * opened to decide what is the valid maximum size of a log in
+	 * an AG.
+	 */
+	max_logblocks = libxfs_alloc_ag_max_usable(mp) - 1;
+
+
 	/* internal log - if no size specified, calculate automatically */
 	if (!cfg->logblocks) {
 		if (cfg->dblocks < GIGABYTES(1, cfg->blocklog)) {
@@ -3248,21 +3262,10 @@ _("external log device %lld too small, must be at least %lld blocks\n"),
 			cfg->logblocks = cfg->logblocks >> cfg->blocklog;
 		}
 
-		/* Ensure the chosen size meets minimum log size requirements */
+		/* Ensure the chosen size fits within log size requirements */
 		cfg->logblocks = max(min_logblocks, cfg->logblocks);
 
-		/*
-		 * Make sure the log fits wholly within an AG
-		 *
-		 * XXX: If agf->freeblks ends up as 0 because the log uses all
-		 * the free space, it causes the kernel all sorts of problems
-		 * with per-ag reservations. Right now just back it off one
-		 * block, but there's a whole can of worms here that needs to be
-		 * opened to decide what is the valid maximum size of a log in
-		 * an AG.
-		 */
-		cfg->logblocks = min(cfg->logblocks,
-				     libxfs_alloc_ag_max_usable(mp) - 1);
+		cfg->logblocks = min(cfg->logblocks, max_logblocks);
 
 		/* and now clamp the size to the maximum supported size */
 		cfg->logblocks = min(cfg->logblocks, XFS_MAX_LOG_BLOCKS);
@@ -3270,6 +3273,13 @@ _("external log device %lld too small, must be at least %lld blocks\n"),
 			cfg->logblocks = XFS_MAX_LOG_BYTES >> cfg->blocklog;
 
 		validate_log_size(cfg->logblocks, cfg->blocklog, min_logblocks);
+	} else if (cfg->logblocks > max_logblocks) {
+		/* check specified log size */
+		fprintf(stderr,
+_("internal log size %lld too large, must be less than %d\n"),
+			(long long)cfg->logblocks,
+			max_logblocks);
+		usage();
 	}
 
 	if (cfg->logblocks > sbp->sb_agblocks - libxfs_prealloc_blocks(mp)) {
-- 
Gitee


From 92f6722af80cee37ddc5aca6955c5fd31ed81322 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Fri, 11 Mar 2022 17:32:02 -0800
Subject: [PATCH 4/4] mkfs: don't let internal logs bump the root dir inode
 chunk to AG 1

Currently, we don't let an internal log consume every last block in an
AG.  According to the comment, we're doing this to avoid tripping AGF
verifiers if freeblks==0, but on a modern filesystem this isn't
sufficient to avoid problems because we need to have enough space in the
AG to allocate an aligned root inode chunk, if it should be the case
that the log also ends up in AG 0:

$ truncate -s 6366g /tmp/a ; mkfs.xfs -f /tmp/a -d agcount=3200 -l agnum=0
meta-data=/tmp/a                 isize=512    agcount=3200, agsize=521503 blks
         =                       sectsz=512   attr=2, projid32bit=1
         =                       crc=1        finobt=1, sparse=1, rmapbt=0
         =                       reflink=1    bigtime=0 inobtcount=0
data     =                       bsize=4096   blocks=1668808704, imaxpct=5
         =                       sunit=0      swidth=0 blks
naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
log      =internal log           bsize=4096   blocks=521492, version=2
         =                       sectsz=512   sunit=0 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0
mkfs.xfs: root inode created in AG 1, not AG 0

Therefore, modify the maximum internal log size calculation to constrain
the maximum internal log size so that the aligned inode chunk allocation
will always succeed.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ferry Meng <mengferry@linux.alibaba.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
 mkfs/xfs_mkfs.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index c9130805..6d0109a8 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -3176,6 +3176,49 @@ validate_log_size(uint64_t logblocks, int blocklog, int min_logblocks)
 	}
 }
 
+static void
+adjust_ag0_internal_logblocks(
+	struct mkfs_params	*cfg,
+	struct xfs_mount	*mp,
+	int			min_logblocks,
+	int			*max_logblocks)
+{
+	int			backoff = 0;
+	int			ichunk_blocks;
+
+	/*
+	 * mkfs will trip over the write verifiers if the log is allocated in
+	 * AG 0 and consumes enough space that we cannot allocate a non-sparse
+	 * inode chunk for the root directory.  The inode allocator requires
+	 * that the AG have enough free space for the chunk itself plus enough
+	 * to fix up the freelist with aligned blocks if we need to fill the
+	 * allocation from the AGFL.
+	 */
+	ichunk_blocks = XFS_INODES_PER_CHUNK * cfg->inodesize >> cfg->blocklog;
+	backoff = ichunk_blocks * 4;
+
+	/*
+	 * We try to align inode allocations to the data device stripe unit,
+	 * so ensure there's enough space to perform an aligned allocation.
+	 * The inode geometry structure isn't set up yet, so compute this by
+	 * hand.
+	 */
+	backoff = max(backoff, cfg->dsunit * 2);
+
+	*max_logblocks -= backoff;
+
+	/* If the specified log size is too big, complain. */
+	if (cli_opt_set(&lopts, L_SIZE) && cfg->logblocks > *max_logblocks) {
+		fprintf(stderr,
+_("internal log size %lld too large, must be less than %d\n"),
+			(long long)cfg->logblocks,
+			*max_logblocks);
+		usage();
+	}
+
+	cfg->logblocks = min(cfg->logblocks, *max_logblocks);
+}
+
 static void
 calculate_log_size(
 	struct mkfs_params	*cfg,
@@ -3301,6 +3344,10 @@ _("log ag number %lld too large, must be less than %lld\n"),
 	} else
 		cfg->logagno = (xfs_agnumber_t)(sbp->sb_agcount / 2);
 
+	if (cfg->logagno == 0)
+		adjust_ag0_internal_logblocks(cfg, mp, min_logblocks,
+				&max_logblocks);
+
 	cfg->logstart = XFS_AGB_TO_FSB(mp, cfg->logagno,
 				       libxfs_prealloc_blocks(mp));
 
-- 
Gitee