diff --git a/0001-Coverity-fixes-resources-leaks.patch b/0001-Coverity-fixes-resources-leaks.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d493053c47f74387462b2414c17ae1f8c9b20f3 --- /dev/null +++ b/0001-Coverity-fixes-resources-leaks.patch @@ -0,0 +1,91 @@ +From 8f54ce5b7eb0ca982803e270082e33f50897b9a6 Mon Sep 17 00:00:00 2001 +From: Nigel Croxon +Date: Mon, 4 Nov 2024 11:17:46 -0500 +Subject: [PATCH 01/39] Coverity fixes resources leaks + +Handle variable going out of scope leaks the handle. + +Signed-off-by: Nigel Croxon +--- + Assemble.c | 3 ++- + Incremental.c | 2 +- + bitmap.c | 7 +++++-- + 3 files changed, 8 insertions(+), 4 deletions(-) + +diff --git a/Assemble.c b/Assemble.c +index 37a530ee..f8099cd3 100644 +--- a/Assemble.c ++++ b/Assemble.c +@@ -753,6 +753,7 @@ static int load_devices(struct devs *devices, char *devmap, + tst->ss->free_super(tst); + free(tst); + *stp = st; ++ free(best); + return -1; + } + close(dfd); +@@ -834,7 +835,6 @@ static int load_devices(struct devs *devices, char *devmap, + inargv ? "the list" : + "the\n DEVICE list in mdadm.conf" + ); +- free(best); + *stp = st; + goto error; + } +@@ -857,6 +857,7 @@ error: + close(mdfd); + free(devices); + free(devmap); ++ free(best); + return -1; + + } +diff --git a/Incremental.c b/Incremental.c +index aa5db3bf..9b455a12 100644 +--- a/Incremental.c ++++ b/Incremental.c +@@ -282,7 +282,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c, + * clustering resource agents + */ + if (info.array.state & (1 << MD_SB_CLUSTERED)) +- goto out; ++ goto out_unlock; + + /* Couldn't find an existing array, maybe make a new one */ + mdfd = create_mddev(match ? match->devname : NULL, name_to_use, trustworthy, +diff --git a/bitmap.c b/bitmap.c +index c62d18d4..3f8da63d 100644 +--- a/bitmap.c ++++ b/bitmap.c +@@ -260,8 +260,11 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) + return rv; + + info = bitmap_fd_read(fd, brief); +- if (!info) ++ if (!info) { ++ close_fd(&fd); ++ free(info); + return rv; ++ } + sb = &info->sb; + if (sb->magic != BITMAP_MAGIC) { + pr_err("This is an md array. To view a bitmap you need to examine\n"); +@@ -336,7 +339,6 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) + printf(" Cluster name : %-64s\n", sb->cluster_name); + for (i = 0; i < (int)sb->nodes; i++) { + st = NULL; +- free(info); + fd = bitmap_file_open(filename, &st, i, fd); + if (fd < 0) { + printf(" Unable to open bitmap file on node: %i\n", i); +@@ -347,6 +349,7 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) + printf(" Unable to read bitmap on node: %i\n", i); + continue; + } ++ free(sb); + sb = &info->sb; + if (sb->magic != BITMAP_MAGIC) + pr_err("invalid bitmap magic 0x%x, the bitmap file appears to be corrupted\n", sb->magic); +-- +2.41.0 + diff --git a/0013-mdadm-raid6check-add-xmalloc.h-to-raid6check.c.patch b/0013-mdadm-raid6check-add-xmalloc.h-to-raid6check.c.patch new file mode 100644 index 0000000000000000000000000000000000000000..d71864c943f166cf4eb49e211ef1eb37709fdb80 --- /dev/null +++ b/0013-mdadm-raid6check-add-xmalloc.h-to-raid6check.c.patch @@ -0,0 +1,32 @@ +From e0df6c4c984d564e9e40913727e916a6cd8f466e Mon Sep 17 00:00:00 2001 +From: Xiao Ni +Date: Fri, 17 Jan 2025 15:15:40 +0800 +Subject: [PATCH 13/39] mdadm/raid6check: add xmalloc.h to raid6check.c + +It reports building error: +raid6check.c:324:26: error: implicit declaration of function xmalloc + +Add xmalloc.h to raid6check.c file to fix this. + +Signed-off-by: Xiao Ni +Link: https://lore.kernel.org/r/20250117071540.4094-1-xni@redhat.com +Signed-off-by: Song Liu +--- + raid6check.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/raid6check.c b/raid6check.c +index 99477761..95533f7d 100644 +--- a/raid6check.c ++++ b/raid6check.c +@@ -23,6 +23,7 @@ + */ + + #include "mdadm.h" ++#include "xmalloc.h" + #include + #include + +-- +2.41.0 + diff --git a/0014-mdopen-add-sbin-path-to-env-PATH-when-call-system-mo.patch b/0014-mdopen-add-sbin-path-to-env-PATH-when-call-system-mo.patch new file mode 100644 index 0000000000000000000000000000000000000000..6c538ab1e0a5ed44339b69d1c5488e5996693371 --- /dev/null +++ b/0014-mdopen-add-sbin-path-to-env-PATH-when-call-system-mo.patch @@ -0,0 +1,56 @@ +From b1ee932b89a16c881a3336f9fd728d46c1f8c65d Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Wed, 22 Jan 2025 23:18:59 +0800 +Subject: [PATCH 14/39] mdopen: add sbin path to env PATH when call + system("modprobe md_mod") + +During the boot process if mdadm is called in udev context, sbin paths +like /sbin, /usr/sbin, /usr/local/sbin normally not defined in PATH env +variable, calling system("modprobe md_mod") in create_named_array() may +fail with 'sh: modprobe: command not found' error message. + +We don't want to move modprobe binary into udev private directory, so +setting the PATH env is a more proper method to avoid the above issue. + +This patch sets PATH env variable with "/sbin:/usr/sbin:/usr/local/sbin" +before calling system("modprobe md_mod"). The change only takes effect +within the udev worker context, not seen by global udev environment. + +Signed-off-by: Coly Li +Signed-off-by: Mariusz Tkaczyk +--- + mdopen.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/mdopen.c b/mdopen.c +index 26f0c716..57252b64 100644 +--- a/mdopen.c ++++ b/mdopen.c +@@ -39,6 +39,24 @@ int create_named_array(char *devnm) + + fd = open(new_array_file, O_WRONLY); + if (fd < 0 && errno == ENOENT) { ++ char buf[PATH_MAX] = {0}; ++ char *env_ptr; ++ ++ env_ptr = getenv("PATH"); ++ /* ++ * When called by udev worker context, path of modprobe ++ * might not be in env PATH. Set sbin paths into PATH ++ * env to avoid potential failure when run modprobe here. ++ */ ++ if (env_ptr) ++ snprintf(buf, PATH_MAX - 1, "%s:%s", env_ptr, ++ "/sbin:/usr/sbin:/usr/local/sbin"); ++ else ++ snprintf(buf, PATH_MAX - 1, "%s", ++ "/sbin:/usr/sbin:/usr/local/sbin"); ++ ++ setenv("PATH", buf, 1); ++ + if (system("modprobe md_mod") == 0) + fd = open(new_array_file, O_WRONLY); + } +-- +2.41.0 + diff --git a/0016-mdadm-fix-grow-with-add-for-linear.patch b/0016-mdadm-fix-grow-with-add-for-linear.patch new file mode 100644 index 0000000000000000000000000000000000000000..e0ed44c586f5b4f9027b7356064f93d4cd9706ff --- /dev/null +++ b/0016-mdadm-fix-grow-with-add-for-linear.patch @@ -0,0 +1,36 @@ +From c09ae8417dc9e11da1d5bf2867c6498050c6ddb9 Mon Sep 17 00:00:00 2001 +From: Yu Kuai +Date: Fri, 27 Dec 2024 14:07:02 +0800 +Subject: [PATCH 16/39] mdadm: fix --grow with --add for linear + +For the case mdadm --grow with --add, the s.btype should not be +initialized yet, hence BitmapUnknown should be checked instead of +BitmapNone. + +Noted that this behaviour should only support by md-linear, which is +removed from kernel, howerver, it turns out md-linear is used widely +in home NAS and we're planning to reintroduce it soon. + +Fixes: 581ba1341017 ("mdadm: remove bitmap file support") +Signed-off-by: Yu Kuai +Signed-off-by: Mariusz Tkaczyk +--- + mdadm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mdadm.c b/mdadm.c +index a72058b4..6200cd0e 100644 +--- a/mdadm.c ++++ b/mdadm.c +@@ -1619,7 +1619,7 @@ int main(int argc, char *argv[]) + if (devs_found > 1 && s.raiddisks == 0 && s.level == UnSet) { + /* must be '-a'. */ + if (s.size > 0 || s.chunk || +- s.layout_str || s.btype != BitmapNone) { ++ s.layout_str || s.btype != BitmapUnknown) { + pr_err("--add cannot be used with other geometry changes in --grow mode\n"); + rv = 1; + break; +-- +2.41.0 + diff --git a/0018-super-ddf-Prevent-crash-when-handling-DDF-metadata.patch b/0018-super-ddf-Prevent-crash-when-handling-DDF-metadata.patch new file mode 100644 index 0000000000000000000000000000000000000000..cc27630f0386e86980d6f6f63315b7680d6df02a --- /dev/null +++ b/0018-super-ddf-Prevent-crash-when-handling-DDF-metadata.patch @@ -0,0 +1,61 @@ +From 9e8b3b1492cff63dafb759382c74a479460f49e6 Mon Sep 17 00:00:00 2001 +From: lilinzhe +Date: Mon, 16 Dec 2024 12:00:02 +0800 +Subject: [PATCH 18/39] super-ddf: Prevent crash when handling DDF metadata + +A dummy function is defined because availability of ss->update_super is +not always verified. + +This fix addresses a crash reported when assembling a RAID array using +mdadm with DDF metadata. For more details, see the discussion at: +https://lore.kernel.org/all/ +CALHdMH30LuxR4tz9jP2ykDaDJtZ3P7L3LrZ+9e4Fq=Q6NwSM=Q@mail.gmail.com/ + +The discussion centers on an issue with mdadm where attempting to +assemble a RAID array caused a null pointer dereference. The problem +was traced to a missing update_super() function in super-ddf.c, which +led to a crash in Assemble.c. + +Signed-off-by: lilinzhe +--- + super-ddf.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/super-ddf.c b/super-ddf.c +index 6cd099ab..a06ed435 100644 +--- a/super-ddf.c ++++ b/super-ddf.c +@@ -5195,6 +5195,21 @@ static void default_geometry_ddf(struct supertype *st, int *level, int *layout, + *layout = ddf_level_to_layout(*level); + } + ++static int update_super_ddf_dummy(struct supertype *st, struct mdinfo *info, ++ enum update_opt update, ++ char *devname, int verbose, ++ int uuid_set, char *homehost) ++{ ++ /* ++ * A dummy update_super function is required to ensure ++ * reliable handling of DDF metadata in mdadm. ++ * This implementation acts as a placeholder for cases ++ * where ss->update_super is not verified. ++ */ ++ dprintf("update_super is not implemented in DDF\n"); ++ return 0; ++} ++ + struct superswitch super_ddf = { + .examine_super = examine_super_ddf, + .brief_examine_super = brief_examine_super_ddf, +@@ -5213,6 +5228,8 @@ struct superswitch super_ddf = { + .uuid_from_super= uuid_from_super_ddf, + .getinfo_super = getinfo_super_ddf, + ++ .update_super = update_super_ddf_dummy, ++ + .avail_size = avail_size_ddf, + + .compare_super = compare_super_ddf, +-- +2.41.0 + diff --git a/0024-mdmon-imsm-fix-metadata-corruption-when-managing-new.patch b/0024-mdmon-imsm-fix-metadata-corruption-when-managing-new.patch new file mode 100644 index 0000000000000000000000000000000000000000..58fa3ceeb3debbf80f77dee2b155b33717bb09ef --- /dev/null +++ b/0024-mdmon-imsm-fix-metadata-corruption-when-managing-new.patch @@ -0,0 +1,122 @@ +From 7d29b3823c18a24d6efbb502f08638788f97e04b Mon Sep 17 00:00:00 2001 +From: Junxiao Bi +Date: Tue, 18 Feb 2025 10:48:31 -0800 +Subject: [PATCH 24/39] mdmon: imsm: fix metadata corruption when managing new + array + +When manager thread detects new array, it will invoke manage_new(). +For imsm array, it will further invoke imsm_open_new(). Since +commit bbab0940fa75("imsm: write bad block log on metadata sync"), +it preallocates bad block log when opening the array, that requires +increasing the mpb buffer size. +For that, imsm_open_new() invokes function imsm_update_metadata_locally(), +which first uses imsm_prepare_update() to allocate a larger mpb buffer +and store it at "mpb->next_buf", and then invoke imsm_process_update() +to copy the content from current mpb buffer "mpb->buf" to "mpb->next_buf", +and then free the current mpb buffer and set the new buffer as current. + +There is a small race window, when monitor thread is syncing metadata, +it gets current buffer pointer in imsm_sync_metadata()->write_super_imsm(), +but before flushing the buffer to disk, manager thread does above switching +buffer which frees current buffer, then monitor thread will run into +use-after-free issue and could cause on-disk metadata corruption. +If system keeps running, further metadata update could fix the corruption, +because after switching buffer, the new buffer will contain good metadata, +but if panic/power cycle happens while disk metadata is corrupted, +the system will run into bootup failure if array is used as root, +otherwise the array can not be assembled after boot if not used as root. + +This issue will not happen for imsm array with only one member array, +because the memory array has not be opened yet, monitor thread will not +do any metadata updates. +This can happen for imsm array with at lease two member array, in the +following two scenarios: +1. Restarting mdmon process with at least two member array +This will happen during system boot up or user restart mdmon after mdadm +upgrade +2. Adding new member array to exist imsm array with at least one member +array. + +To fix this, delay the switching buffer operation to monitor thread. + +Fixes: bbab0940fa75 ("imsm: write bad block log on metadata sync") +Signed-off-by: Junxiao Bi +--- + managemon.c | 10 ++++++++-- + super-intel.c | 14 +++++++++++--- + 2 files changed, 19 insertions(+), 5 deletions(-) + +diff --git a/managemon.c b/managemon.c +index d7981328..74b64bfc 100644 +--- a/managemon.c ++++ b/managemon.c +@@ -721,11 +721,12 @@ static void manage_new(struct mdstat_ent *mdstat, + * the monitor. + */ + ++ struct metadata_update *update = NULL; + struct active_array *new = NULL; + struct mdinfo *mdi = NULL, *di; +- int i, inst; +- int failed = 0; + char buf[SYSFS_MAX_BUF_SIZE]; ++ int failed = 0; ++ int i, inst; + + /* check if array is ready to be monitored */ + if (!mdstat->active || !mdstat->level) +@@ -824,9 +825,14 @@ static void manage_new(struct mdstat_ent *mdstat, + /* if everything checks out tell the metadata handler we want to + * manage this instance + */ ++ container->update_tail = &update; + if (!aa_ready(new) || container->ss->open_new(container, new, inst) < 0) { ++ container->update_tail = NULL; + goto error; + } else { ++ if (update) ++ queue_metadata_update(update); ++ container->update_tail = NULL; + replace_array(container, victim, new); + if (failed) { + new->check_degraded = 1; +diff --git a/super-intel.c b/super-intel.c +index cab84198..4988eef1 100644 +--- a/super-intel.c ++++ b/super-intel.c +@@ -8467,12 +8467,15 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev, + return failed; + } + ++static int imsm_prepare_update(struct supertype *st, ++ struct metadata_update *update); + static int imsm_open_new(struct supertype *c, struct active_array *a, + int inst) + { + struct intel_super *super = c->sb; + struct imsm_super *mpb = super->anchor; +- struct imsm_update_prealloc_bb_mem u; ++ struct imsm_update_prealloc_bb_mem *u; ++ struct metadata_update mu; + + if (inst >= mpb->num_raid_devs) { + pr_err("subarry index %d, out of range\n", inst); +@@ -8482,8 +8485,13 @@ static int imsm_open_new(struct supertype *c, struct active_array *a, + dprintf("imsm: open_new %d\n", inst); + a->info.container_member = inst; + +- u.type = update_prealloc_badblocks_mem; +- imsm_update_metadata_locally(c, &u, sizeof(u)); ++ u = xmalloc(sizeof(*u)); ++ u->type = update_prealloc_badblocks_mem; ++ mu.len = sizeof(*u); ++ mu.buf = (char *)u; ++ imsm_prepare_update(c, &mu); ++ if (c->update_tail) ++ append_metadata_update(c, u, sizeof(*u)); + + return 0; + } +-- +2.41.0 + diff --git a/0025-Regression-fix-156.patch b/0025-Regression-fix-156.patch new file mode 100644 index 0000000000000000000000000000000000000000..9c6dfe0d4a1a5a4b19f9542b7cb6d5bd43466cbc --- /dev/null +++ b/0025-Regression-fix-156.patch @@ -0,0 +1,217 @@ +From c2fbf66ba0243f499f78ed43fa1207a9bd9361b5 Mon Sep 17 00:00:00 2001 +From: XiaoNi87 +Date: Tue, 18 Mar 2025 08:18:04 +0800 +Subject: [PATCH 25/39] Regression fix (#156) + +Signed-off-by: Xiao Ni +--- + .github/tools/run_mdadm_tests.sh | 2 +- + test | 22 +++++++++++++++++++--- + tests/05r6tor0.broken | 15 +++++++++++++++ + tests/07revert-inplace.broken | 8 ++++++++ + tests/10ddf-create.broken | 5 ----- + tests/10ddf-fail-two-spares.broken | 5 ----- + tests/20raid5journal.broken | 17 +++++++++++++++++ + tests/env-ddf-template | 3 +-- + tests/skiptests | 9 +++++++++ + util.c | 2 +- + 10 files changed, 71 insertions(+), 17 deletions(-) + create mode 100644 tests/05r6tor0.broken + create mode 100644 tests/07revert-inplace.broken + delete mode 100644 tests/10ddf-create.broken + delete mode 100644 tests/10ddf-fail-two-spares.broken + create mode 100644 tests/20raid5journal.broken + create mode 100644 tests/skiptests + +diff --git a/.github/tools/run_mdadm_tests.sh b/.github/tools/run_mdadm_tests.sh +index 456874b5..22d89a8c 100755 +--- a/.github/tools/run_mdadm_tests.sh ++++ b/.github/tools/run_mdadm_tests.sh +@@ -11,7 +11,7 @@ sudo ./test setup + + #sudo ./test --tests=00createnames + +-sudo ./test --skip-broken --no-error --disable-integrity --disable-multipath --disable-linear --keep-going ++sudo ./test --skip-broken --no-error --disable-integrity --disable-multipath --disable-linear --keep-going --skip-bigcase + + ret=$? + sudo ./test cleanup +diff --git a/test b/test +index 88e44f18..7fa68177 100755 +--- a/test ++++ b/test +@@ -26,6 +26,10 @@ savelogs=0 + exitonerror=1 + ctrl_c_error=0 + skipbroken=0 ++skipbigcase=0 ++skipfile="skiptests" ++skipcheckfile=$testdir/$skipfile ++checkscript="" + loop=1 + prefix='[0-9][0-9]' + +@@ -192,6 +196,7 @@ do_help() { + --loop=N Run tests N times (0 to run forever) + --skip-broken Skip tests that are known to be broken + --skip-always-broken Skip tests that are known to always fail ++ --skip-bigcase Skip tests that need time than 200 seconds + --dev=loop|lvm|ram|disk Use loop devices (default), LVM, RAM or disk + --disks= Provide a bunch of physical devices for test + --volgroup=name LVM volume group for LVM test +@@ -295,6 +300,9 @@ parse_args() { + --skip-always-broken ) + skipbroken=always + ;; ++ --skip-bigcase ) ++ skipbigcase=all ++ ;; + --disable-multipath ) + unset MULTIPATH + ;; +@@ -369,9 +377,17 @@ main() { + else + for script in $testdir/$prefix $testdir/$prefix*[^~] + do +- case $script in *.broken) ;; +- *) +- do_test $script ++ checkscript="${script##*/}" ++ case $script in ++ *.broken) ++ ;; ++ *) ++ if grep -q "$checkscript" "$skipcheckfile"; then ++ if [ "$skipbigcase" == "all" ]; then ++ continue ++ fi ++ fi ++ do_test $script + esac + done + fi +diff --git a/tests/05r6tor0.broken b/tests/05r6tor0.broken +new file mode 100644 +index 00000000..930a0941 +--- /dev/null ++++ b/tests/05r6tor0.broken +@@ -0,0 +1,15 @@ ++Sometimes ++ +++++ pgrep -f 'mdadm --grow --continue' ++++ [[ '' != '' ]] ++++ break ++++ echo 100 ++++ echo 500 ++++ sleep 2 ++++ check raid5 ++++ case $1 in ++++ grep -sq 'active raid5 ' /proc/mdstat ++++ die 'active raid5 not found' ++++ echo -e '\n\tERROR: active raid5 not found \n' ++ ++ ERROR: active raid5 not found +diff --git a/tests/07revert-inplace.broken b/tests/07revert-inplace.broken +new file mode 100644 +index 00000000..73d98a04 +--- /dev/null ++++ b/tests/07revert-inplace.broken +@@ -0,0 +1,8 @@ ++always fails ++ ++Fails with errors: ++ ++ /usr/sbin/mdadm -A /dev/md0 --update=revert-reshape /dev/loop0 /dev/loop1 /dev/loop2 /dev/loop3 /dev/loop4 --backup-file=/tmp/md-backup ++++ rv=1 ++++ case $* in ++++ cat /var/tmp/stderr ++mdadm: failed to RUN_ARRAY /dev/md0: Invalid argument +diff --git a/tests/10ddf-create.broken b/tests/10ddf-create.broken +deleted file mode 100644 +index 0f7d25e5..00000000 +--- a/tests/10ddf-create.broken ++++ /dev/null +@@ -1,5 +0,0 @@ +-Fails due to segmentation fault at assemble. +- +-Too much effort to diagnose this now, marking as broken to make CI clear. +- ++ /usr/sbin/mdadm -A /dev/md/ddf0 /dev/loop8 /dev/loop9 /dev/loop10 /dev/loop11 /dev/loop12 +- ./test: line 76: 101955 Segmentation fault (core dumped) $mdadm "$@" 2> $targetdir/stderr +diff --git a/tests/10ddf-fail-two-spares.broken b/tests/10ddf-fail-two-spares.broken +deleted file mode 100644 +index eeea56d9..00000000 +--- a/tests/10ddf-fail-two-spares.broken ++++ /dev/null +@@ -1,5 +0,0 @@ +-fails infrequently +- +-Fails roughly 1 in 3 with error: +- +- ERROR: /dev/md/vol1 should be optimal in meta data +diff --git a/tests/20raid5journal.broken b/tests/20raid5journal.broken +new file mode 100644 +index 00000000..c7b214af +--- /dev/null ++++ b/tests/20raid5journal.broken +@@ -0,0 +1,17 @@ ++always fail ++ ++++ /usr/sbin/mdadm -I /dev/loop4 ++++ rv=0 ++++ case $* in ++++ cat /var/tmp/stderr ++mdadm: /dev/loop4 attached to /dev/md/0_0, which has been started. ++++ return 0 ++++ check raid5 ++++ case $1 in ++++ grep -sq 'active raid5 ' /proc/mdstat ++++ die 'active raid5 not found' ++++ echo -e '\n\tERROR: active raid5 not found \n' ++ ++ ERROR: active raid5 not found ++ ++++ save_log fail +diff --git a/tests/env-ddf-template b/tests/env-ddf-template +index 4f4ad0f3..ebc0ebf3 100644 +--- a/tests/env-ddf-template ++++ b/tests/env-ddf-template +@@ -3,8 +3,7 @@ sha1_sum() { + } + + get_rootdev() { +- local part=$(grep ' / ' /proc/mounts | awk '{print $1}') +- local bd=/dev/$(lsblk -no PKNAME $part) ++ local bd=$(grep ' / ' /proc/mounts | awk '{print $1}') + [ -b $bd ] || exit 1 + echo $bd + } +diff --git a/tests/skiptests b/tests/skiptests +new file mode 100644 +index 00000000..fd0893f1 +--- /dev/null ++++ b/tests/skiptests +@@ -0,0 +1,9 @@ ++casename:seconds ++01raid6integ:1732 ++01replace:396 ++07layouts:836 ++11spare-migration:1140 ++12imsm-r0_2d-grow-r0_5d:218 ++13imsm-r0_r0_2d-grow-r0_r0_4d:218 ++13imsm-r0_r0_2d-grow-r0_r0_5d:246 ++19raid6check:268 +diff --git a/util.c b/util.c +index 8c45f0e1..9fe2d227 100644 +--- a/util.c ++++ b/util.c +@@ -2310,7 +2310,7 @@ mdadm_status_t continue_via_systemd(char *devnm, char *service_name, char *prefi + dprintf("Start %s service\n", service_name); + /* Simply return that service cannot be started */ + if (check_env("MDADM_NO_SYSTEMCTL")) +- return MDADM_STATUS_SUCCESS; ++ return MDADM_STATUS_ERROR; + + /* Fork in attempt to start services */ + switch (fork()) { +-- +2.41.0 + diff --git a/0026-super1-Clear-extra-flags-when-initializing-metadata.patch b/0026-super1-Clear-extra-flags-when-initializing-metadata.patch new file mode 100644 index 0000000000000000000000000000000000000000..6d04f57a95e534718db27ede3e5ded1446a8b2a7 --- /dev/null +++ b/0026-super1-Clear-extra-flags-when-initializing-metadata.patch @@ -0,0 +1,72 @@ +From 4e2e208c8d3e9ba0fae88136d7c4cd0292af73b0 Mon Sep 17 00:00:00 2001 +From: Wu Guanghao +Date: Tue, 11 Mar 2025 03:11:55 +0000 +Subject: [PATCH 26/39] super1: Clear extra flags when initializing metadata + +When adding a disk to a RAID1 array, the metadata is read from the +existing member disks for sync. However, only the bad_blocks flag are +copied, the bad_blocks records are not copied, so the bad_blocks +records are all zeros. The kernel function super_1_load() detects +bad_blocks flag and reads the bad_blocks record, then sets the bad +block using badblocks_set(). + +After the kernel commit 1726c7746783 (badblocks: improve badblocks_set() +for multiple ranges handling) if the length of a bad_blocks record is 0, +it will return a failure. Therefore the device addition will fail. + +So when adding a new disk, some flags cannot be sync and need to be clead. + +Signed-off-by: Wu Guanghao +--- + super1.c | 3 +++ + tests/05r1-add-badblocks | 24 ++++++++++++++++++++++++ + 2 files changed, 27 insertions(+) + create mode 100644 tests/05r1-add-badblocks + +diff --git a/super1.c b/super1.c +index fe3c4c64..c828b682 100644 +--- a/super1.c ++++ b/super1.c +@@ -1971,6 +1971,9 @@ static int write_init_super1(struct supertype *st) + long bm_offset; + bool raid0_need_layout = false; + ++ /* Clear extra flags */ ++ sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_BAD_BLOCKS | MD_FEATURE_REPLACEMENT); ++ + /* Since linux kernel v5.4, raid0 always has a layout */ + if (has_raid0_layout(sb) && get_linux_version() >= 5004000) + raid0_need_layout = true; +diff --git a/tests/05r1-add-badblocks b/tests/05r1-add-badblocks +new file mode 100644 +index 00000000..6192327a +--- /dev/null ++++ b/tests/05r1-add-badblocks +@@ -0,0 +1,24 @@ ++# ++# create a raid1 with a drive and set badblocks for the drive. ++# add a new drive does not cause an error. ++# ++ ++# create raid1 ++mdadm -CR $md0 -l1 -n2 -e1.0 $dev1 missing ++testdev $md0 1 $mdsize1a 64 ++sleep 3 ++ ++# set badblocks for the drive ++dev1_name=$(basename $dev1) ++echo "100 100" > /sys/block/md0/md/dev-$dev1_name/bad_blocks ++echo "write_error" > /sys/block/md0/md/dev-$dev1_name/state ++ ++# write badblocks to metadata ++dd if=/dev/zero of=$md0 bs=512 count=200 oflag=direct ++ ++# re-add and recovery ++mdadm $md0 -a $dev2 ++check recovery ++ ++mdadm -S $md0 ++ +-- +2.41.0 + diff --git a/0027-imsm-Fix-RAID0-to-RAID10-migration.patch b/0027-imsm-Fix-RAID0-to-RAID10-migration.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2991bfafdc1778bede5ac1d7a3995f2a159ea60 --- /dev/null +++ b/0027-imsm-Fix-RAID0-to-RAID10-migration.patch @@ -0,0 +1,74 @@ +From 127e38b59cbdf717d1569bcdc75b8d823d8485f3 Mon Sep 17 00:00:00 2001 +From: Blazej Kucman +Date: Mon, 31 Mar 2025 12:46:52 +0200 +Subject: [PATCH 27/39] imsm: Fix RAID0 to RAID10 migration + +Support for RAID10 with +4 disks in IMSM introduced an inconsistency +between the VROC UEFI driver and Linux IMSM. VROC UEFI does not +support RAID10 with +4 disks, therefore appropriate protections were +added to the mdadm IMSM code that results in skipping processing of +such RAID in the UEFI phase. Unfortunately the case of migration +RAID0 2 disks to RAID10 4 disks was omitted, this case requires +maintaining compatibility with the VROC UEFI driver because it is +supported. + +For RAID10 +4 disk the MPB_ATTRIB_RAID10_EXT attribute is set in the +metadata, thanks to which the UEFI driver does not process such RAID. +In the series adding support, a new metadata raid level value +IMSM_T_RAID10 was also introduced. It is not recognized by VROC UEFI. + +The issue is caused by the fact that in the case of the mentioned +migration, IMSM_T_RAID10 is entered into the metadata but attribute +MPB_ATTRIB_RAID10_EXT is not entered, which causes an attempt to +process such RAID in the UEFI phase. This situation results in +the platform hang during booting in UEFI phase, this also results in +data loss after failed and interrupted RAID processing in VROC UEFI. + +The above situation is result of the update_imsm_raid_level() +function, for the mentioned migration function is executed on a map +with a not yet updated number of disks. + +The fix is to explicitly handle migration in the function mentioned +above to maintain compatibility with VROC UEFI driver. + +Steps to reproduce: +mdadm -C /dev/md/imsm0 -e imsm -n 2 /dev/nvme[1,2]n1 -R +mdadm -C /dev/md/vol -l 0 -n 2 /dev/nvme[1,2]n1 --assume-clean -R +mdadm -a /dev/md127 /dev/nvme3n1 +mdadm -a /dev/md127 /dev/nvme4n1 +mdadm -G /dev/md126 -l 10 +reboot + +Fixes: 27550b13297a ("imsm: add support for literal RAID 10") +Signed-off-by: Blazej Kucman +--- + super-intel.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/super-intel.c b/super-intel.c +index 4988eef1..b7b030a2 100644 +--- a/super-intel.c ++++ b/super-intel.c +@@ -1327,6 +1327,19 @@ static void update_imsm_raid_level(struct imsm_map *map, int new_level) + return; + } + ++ /* ++ * RAID0 to RAID10 migration. ++ * Due to the compatibility with VROC UEFI must be maintained, this case must be handled ++ * separately, because the map does not have an updated number of disks. ++ */ ++ if (map->raid_level == IMSM_T_RAID0) { ++ if (map->num_members == 2) ++ map->raid_level = IMSM_T_RAID1; ++ else ++ map->raid_level = IMSM_T_RAID10; ++ return; ++ } ++ + if (map->num_members == 4) { + if (map->raid_level == IMSM_T_RAID10 || map->raid_level == IMSM_T_RAID1) + return; +-- +2.41.0 + diff --git a/md-auto-readd.rule b/md-auto-readd.rule new file mode 100644 index 0000000000000000000000000000000000000000..5ce29fb30556ac7969056eb15f2ca37ef835befd --- /dev/null +++ b/md-auto-readd.rule @@ -0,0 +1,27 @@ +# +# Enable/Disable - default is Disabled +# to disable this rule, GOTO="md_end" should be the first active command. +# to enable this rule, Comment out GOTO="md_end". +GOTO="md_end" + +# Required: MD arrays must have a bitmap for transient devices to +# be added back in the array. +# mdadm -CR /dev/md0 -l1 -n2 /dev/sd[ab] –bitmap=internal + +# Don't process any events if anaconda is running as anaconda brings up +# raid devices manually +ENV{ANACONDA}=="?*", GOTO="md_end" + +# Also don't process disks that are slated to be a multipath device +ENV{DM_MULTIPATH_DEVICE_PATH}=="1", GOTO="md_end" + +# We process add events on block devices (since they are ready as soon as +# they are added to the system) + +ACTION!="add", GOTO="md_end" +ENV{ID_FS_TYPE}!="linux_raid_member", GOTO="md_end" +SUBSYSTEM=="block", RUN{program}+="/usr/sbin/md-auto-readd.sh $devnode" + +# +# Land here to exit cleanly +LABEL="md_end" diff --git a/md-auto-readd.sh b/md-auto-readd.sh new file mode 100644 index 0000000000000000000000000000000000000000..f15c4827f06d858a78649cb2c9aded5c0a9b6b92 --- /dev/null +++ b/md-auto-readd.sh @@ -0,0 +1,17 @@ +#!/usr/bin/bash +MDADM=/sbin/mdadm +DEVNAME=$1 + +export $(${MDADM} --examine --export ${DEVNAME}) +if [ -z "${MD_UUID}" ]; then + exit 1 +fi + +UUID_LINK=$(readlink /dev/disk/by-id/md-uuid-${MD_UUID}) +MD_DEVNAME=${UUID_LINK##*/} +export $(${MDADM} --detail --export /dev/${MD_DEVNAME}) +if [ -z "${MD_METADATA}" ] ; then + exit 1 +fi + +${MDADM} --manage /dev/${MD_DEVNAME} --re-add ${DEVNAME} --verbose diff --git a/mdadm-4.3.tar.xz b/mdadm-4.3.tar.xz deleted file mode 100644 index de58ca08e0a212f03c304bb9522cb7901e3a97ab..0000000000000000000000000000000000000000 Binary files a/mdadm-4.3.tar.xz and /dev/null differ diff --git a/mdadm-4.4.tar.gz b/mdadm-4.4.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a03cf152c4edddf3e0aa6f9124d2a26d3adf2eb0 Binary files /dev/null and b/mdadm-4.4.tar.gz differ diff --git a/mdadm-check-posix-name-before-setting-name-and-devna.patch b/mdadm-check-posix-name-before-setting-name-and-devna.patch new file mode 100644 index 0000000000000000000000000000000000000000..9d1678a43a02dbacf6c5b68bb1e5cbf1ad84a262 --- /dev/null +++ b/mdadm-check-posix-name-before-setting-name-and-devna.patch @@ -0,0 +1,88 @@ +From bcfe5a4220a0d49f703700033f7e5eaafdbf52ff Mon Sep 17 00:00:00 2001 +From: Xiao Ni +Date: Wed, 30 Apr 2025 08:51:49 -0400 +Subject: [PATCH 39/39] mdadm: check posix name before setting name and devname + +It's good to has limitations for name when creating an array. But the +arrays which were created before patch e2eb503 (mdadm: Follow POSIX +Portable Character Set) can't be assembled. So remove the POSIX check +for assemble mode. + +This can be reproduced: +* build mdadm without patch e2eb503 +* mdadm -CR /dev/md/node1:pv1 -l0 -n2 /dev/loop0 /dev/loop1 +* mdadm -Ss +* build with latest mdadm, and try to assemble it. +* mdadm -A /dev/md/node1:pv1 --name node1:pv1 + +Fixes: e2eb503 (mdadm: Follow POSIX Portable Character Set) +Signed-off-by: Xiao Ni +--- + config.c | 8 ++------ + mdadm.c | 12 ++++++++++++ + 2 files changed, 14 insertions(+), 6 deletions(-) + +diff --git a/config.c b/config.c +index 8a8ae5e4..8abdba44 100644 +--- a/config.c ++++ b/config.c +@@ -208,11 +208,6 @@ static mdadm_status_t ident_check_name(const char *name, const char *prop_name, + return MDADM_STATUS_ERROR; + } + +- if (!is_name_posix_compatible(name)) { +- ident_log(prop_name, name, "Not POSIX compatible", cmdline); +- return MDADM_STATUS_ERROR; +- } +- + return MDADM_STATUS_SUCCESS; + } + +@@ -512,7 +507,8 @@ void arrayline(char *line) + + for (w = dl_next(line); w != line; w = dl_next(w)) { + if (w[0] == '/' || strchr(w, '=') == NULL) { +- _ident_set_devname(&mis, w, false); ++ if (is_name_posix_compatible(basename(w))) ++ _ident_set_devname(&mis, w, false); + } else if (strncasecmp(w, "uuid=", 5) == 0) { + if (mis.uuid_set) + pr_err("only specify uuid once, %s ignored.\n", +diff --git a/mdadm.c b/mdadm.c +index 6200cd0e..2147b278 100644 +--- a/mdadm.c ++++ b/mdadm.c +@@ -732,6 +732,11 @@ int main(int argc, char *argv[]) + exit(2); + } + ++ if (mode != ASSEMBLE && ++ !is_name_posix_compatible(basename(optarg))) { ++ pr_err("%s Not POSIX compatible\n", basename(optarg)); ++ exit(2); ++ } + if (ident_set_name(&ident, optarg) != MDADM_STATUS_SUCCESS) + exit(2); + +@@ -1284,11 +1289,18 @@ int main(int argc, char *argv[]) + mode == GROW || (mode == ASSEMBLE && ! c.scan)) { + struct stat stb; + int ret; ++ char *bname = basename(devlist->devname); + + if (devs_found < 1) { + pr_err("an md device must be given in this mode\n"); + exit(2); + } ++ ++ if (mode != ASSEMBLE && !is_name_posix_compatible(bname)) { ++ pr_err("%s Not POSIX compatible\n", bname); ++ exit(2); ++ } ++ + if (ident_set_devname(&ident, devlist->devname) != MDADM_STATUS_SUCCESS) + exit(1); + +-- +2.41.0 + diff --git a/mdadm-fix-building-errors.patch b/mdadm-fix-building-errors.patch new file mode 100644 index 0000000000000000000000000000000000000000..d1c97cd7426a5417e0a2e7128d389a04d84abeea --- /dev/null +++ b/mdadm-fix-building-errors.patch @@ -0,0 +1,64 @@ +From 46940fbca6df3ddffa71541e459a277d79584fc0 Mon Sep 17 00:00:00 2001 +From: Xiao Ni +Date: Wed, 30 Apr 2025 06:47:08 -0400 +Subject: [PATCH 38/39] mdadm: fix building errors + +This is a rhel-only patch and this patch will be sent to upstream. + +Signed-off-by: Xiao Ni +--- + super-ddf.c | 9 +++++---- + super-intel.c | 2 +- + 2 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/super-ddf.c b/super-ddf.c +index 6e7db924..285d3b8b 100644 +--- a/super-ddf.c ++++ b/super-ddf.c +@@ -1606,9 +1606,9 @@ static void examine_vd(int n, struct ddf_super *sb, char *guid) + map_num(ddf_sec_level, vc->srl) ?: "-unknown-"); + } + printf(" Device Size[%d] : %llu\n", n, +- be64_to_cpu(vc->blocks)/2); ++ (unsigned long long)(be64_to_cpu(vc->blocks)/2)); + printf(" Array Size[%d] : %llu\n", n, +- be64_to_cpu(vc->array_blocks)/2); ++ (unsigned long long)(be64_to_cpu(vc->array_blocks)/2)); + } + } + +@@ -1665,7 +1665,7 @@ static void examine_pds(struct ddf_super *sb) + printf(" %3d %08x ", i, + be32_to_cpu(pd->refnum)); + printf("%8lluK ", +- be64_to_cpu(pd->config_size)>>1); ++ (unsigned long long)be64_to_cpu(pd->config_size)>>1); + for (dl = sb->dlist; dl ; dl = dl->next) { + if (be32_eq(dl->disk.refnum, pd->refnum)) { + char *dv = map_dev(dl->major, dl->minor, 0); +@@ -2901,7 +2901,8 @@ static unsigned int find_unused_pde(const struct ddf_super *ddf) + static void _set_config_size(struct phys_disk_entry *pde, const struct dl *dl) + { + __u64 cfs, t; +- cfs = min(dl->size - 32*1024*2ULL, be64_to_cpu(dl->primary_lba)); ++ cfs = min((unsigned long long)dl->size - 32*1024*2ULL, ++ (unsigned long long)be64_to_cpu(dl->primary_lba)); + t = be64_to_cpu(dl->secondary_lba); + if (t != ~(__u64)0) + cfs = min(cfs, t); +diff --git a/super-intel.c b/super-intel.c +index b7b030a2..caa583d8 100644 +--- a/super-intel.c ++++ b/super-intel.c +@@ -2325,7 +2325,7 @@ static void export_examine_super_imsm(struct supertype *st) + printf("MD_LEVEL=container\n"); + printf("MD_UUID=%s\n", nbuf+5); + printf("MD_DEVICES=%u\n", mpb->num_disks); +- printf("MD_CREATION_TIME=%llu\n", __le64_to_cpu(mpb->creation_time)); ++ printf("MD_CREATION_TIME=%llu\n", (unsigned long long)__le64_to_cpu(mpb->creation_time)); + } + + static void detail_super_imsm(struct supertype *st, char *homehost, +-- +2.41.0 + diff --git a/mdadm-use-standard-libc-nftw.patch b/mdadm-use-standard-libc-nftw.patch new file mode 100644 index 0000000000000000000000000000000000000000..c92cea84515faf02f9a96e5b8385c9dfcce59cb3 --- /dev/null +++ b/mdadm-use-standard-libc-nftw.patch @@ -0,0 +1,53 @@ +commit e549ac6ab2ce5e7ec182310f8f5f2e41c6ac9233 +Author: Xiao Ni +Date: Wed May 7 18:06:59 2025 +0800 + + mdadm: use standard libc nftw + + commit bd648e3bec3d ("mdadm: Remove klibc and uclibc support") removes + macro HAVE_NFTW/HAVE_FTW and uses libc header ftw.h. But it leaves the + codes in lib.c which let mdadm command call nftw defined in lib.c. It + needs to remove these codes. + + The bug can be reproduced by: + mdadm -CR /dev/md0 --level raid5 --metadata=1.1 --chunk=32 --raid-disks 3 + --size 10000 /dev/loop1 /dev/loop2 /dev/loop3 + mdadm /dev/md0 --grow --chunk=64 + mdadm: /dev/md0: cannot open component -unknown- + + Fixes: bd648e3bec3d ("mdadm: Remove klibc and uclibc support") + Signed-off-by: Xiao Ni + +diff --git a/lib.c b/lib.c +index f36ae03a..eb6cc119 100644 +--- a/lib.c ++++ b/lib.c +@@ -245,28 +245,6 @@ int add_dev(const char *name, const struct stat *stb, int flag, struct FTW *s) + return 0; + } + +-#ifndef HAVE_NFTW +-#ifdef HAVE_FTW +-int add_dev_1(const char *name, const struct stat *stb, int flag) +-{ +- return add_dev(name, stb, flag, NULL); +-} +-int nftw(const char *path, +- int (*han)(const char *name, const struct stat *stb, +- int flag, struct FTW *s), int nopenfd, int flags) +-{ +- return ftw(path, add_dev_1, nopenfd); +-} +-#else +-int nftw(const char *path, +- int (*han)(const char *name, const struct stat *stb, +- int flag, struct FTW *s), int nopenfd, int flags) +-{ +- return 0; +-} +-#endif /* HAVE_FTW */ +-#endif /* HAVE_NFTW */ +- + /* + * Find a block device with the right major/minor number. + * If we find multiple names, choose the shortest. diff --git a/mdadm.spec b/mdadm.spec index a3c8c53e43e45bb2c5b0bfd47c5960896b13154c..1cb8ea11608e4aacf5c4a2897240abe3586206e8 100644 --- a/mdadm.spec +++ b/mdadm.spec @@ -1,24 +1,38 @@ -%define anolis_release 1 +%define anolis_release 1 Name: mdadm -Version: 4.3 +Version: 4.4 Release: %{anolis_release}%{?dist} Summary: The mdadm program controls Linux md devices (software RAID arrays) URL: http://www.kernel.org/pub/linux/utils/raid/mdadm/ +Source0: mdadm-raid-check-sysconfig +Source1: md-auto-readd.rule +Source2: mdadm_event.conf +Source3: raid-check +Source4: mdcheck +Source5: https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/snapshot/mdadm-4.4.tar.gz +Source6: raid-check.timer +Source7: md-auto-readd.sh +Source8: mdmonitor.service +Source9: raid-check.service +Source10: mdadm.conf License: GPLv2+ -Source: https://mirrors.edge.kernel.org/pub/linux/utils/raid/mdadm/%{name}-%{version}.tar.xz -Source1: raid-check -Source2: mdadm.rules -Source3: mdadm-raid-check-sysconfig -Source4: mdmonitor.service -Source5: mdadm.conf -Source6: mdadm_event.conf -Source7: raid-check.timer -Source8: raid-check.service # Build without -Werror. Patch00: disable-Werror.patch +Patch1: 0024-mdmon-imsm-fix-metadata-corruption-when-managing-new.patch +Patch2: mdadm-fix-building-errors.patch +Patch3: 0027-imsm-Fix-RAID0-to-RAID10-migration.patch +Patch4: mdadm-check-posix-name-before-setting-name-and-devna.patch +Patch5: 0018-super-ddf-Prevent-crash-when-handling-DDF-metadata.patch +Patch6: 0025-Regression-fix-156.patch +Patch7: mdadm-use-standard-libc-nftw.patch +Patch8: 0001-Coverity-fixes-resources-leaks.patch +Patch9: 0026-super1-Clear-extra-flags-when-initializing-metadata.patch +Patch10: 0014-mdopen-add-sbin-path-to-env-PATH-when-call-system-mo.patch +Patch11: 0013-mdadm-raid6check-add-xmalloc.h-to-raid6check.c.patch +Patch12: 0016-mdadm-fix-grow-with-add-for-linear.patch BuildRequires: make BuildRequires: systemd-rpm-macros binutils-devel gcc systemd-devel @@ -90,6 +104,20 @@ install -m644 %{SOURCE6} %{buildroot}/etc/libreport/events.d /etc/libreport/events.d/* %changelog +* Thu Aug 28 2025 wenyuzifang - 4.4-1 +- Updated to version 4.4 to fix xxxxxxxxxx +- Prevents metadata corruption and ensures safe buffer switching in IMSM arrays. +- Fixes type mismatches and ensures portability for clean compilation and upstream readiness. +- Apply patch to prevent UEFI boot hangs and data loss during RAID0 to RAID10 migration. +- Fix backward compatibility for assembling arrays with non-POSIX names. +- Prevents crashes during RAID assembly with DDF metadata. +- Removes stale code, resolves RAID management errors, ensures standard libc usage. +- Fix resource leaks, improve reliability, and ensure proper error handling in mdadm. +- Apply patch to prevent disk addition failures on newer kernels. +- Enhance PATH to ensure modprobe runs successfully in udev context during boot. +- Fix compilation error, ensures correct build, improves code compliance with modern C standards. +- Fix incorrect bitmap type check to allow --add with --grow in linear RAID arrays. + * Tue Mar 18 2025 mgb01105731 - 4.3-1 - Update to 4.3 from 4.2 diff --git a/mdcheck b/mdcheck new file mode 100644 index 0000000000000000000000000000000000000000..700c3e252e72e526807a12673ee54ddc289f3e3d --- /dev/null +++ b/mdcheck @@ -0,0 +1,166 @@ +#!/bin/bash + +# Copyright (C) 2014-2017 Neil Brown +# +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Author: Neil Brown +# Email: + +# This script should be run periodically to automatically +# perform a 'check' on any md arrays. +# +# It supports a 'time budget' such that any incomplete 'check' +# will be checkpointed when that time has expired. +# A subsequent invocation can allow the 'check' to continue. +# +# Options are: +# --continue Don't start new checks, only continue old ones. +# --duration This is passed to "date --date=$duration" to find out +# when to finish +# +# To support '--continue', arrays are identified by UUID and the 'sync_completed' +# value is stored in /var/lib/mdcheck/$UUID + +# convert a /dev/md name into /sys/.../md equivalent +sysname() { + set `ls -lLd $1` + maj=${5%,} + min=$6 + readlink -f /sys/dev/block/$maj:$min +} + +args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@") +rv=$? +if [ $rv -ne 0 ]; then exit $rv; fi + +eval set -- $args + +cont= +endtime= +while [ " $1" != " --" ] +do + case $1 in + --help ) + echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]' + echo >&2 ' time-offset must be understood by "date --date"' + exit 0 + ;; + --continue ) cont=yes ;; + --duration ) shift; dur=$1 + endtime=$(date --date "$dur" "+%s") + ;; + esac + shift +done +shift + +# We need a temp file occasionally... +tmp=/var/lib/mdcheck/.md-check-$$ +trap 'rm -f "$tmp"' 0 2 3 15 + + +# firstly, clean out really old state files +mkdir -p /var/lib/mdcheck +find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \; + +# Now look at each md device. +cnt=0 +for dev in /dev/md?* +do + [ -e "$dev" ] || continue + sys=`sysname $dev` + if [ ! -f "$sys/md/sync_action" ] + then # cannot check this array + continue + fi + if [ "`cat $sys/md/sync_action`" != 'idle' ] + then # This array is busy + continue + fi + + mdadm --detail --export "$dev" | grep '^MD_UUID=' > $tmp || continue + source $tmp + fl="/var/lib/mdcheck/MD_UUID_$MD_UUID" + if [ -z "$cont" ] + then + start=0 + logger -p daemon.info mdcheck start checking $dev + elif [ -z "$MD_UUID" -o ! -f "$fl" ] + then + # Nothing to continue here + continue + else + start=`cat "$fl"` + logger -p daemon.info mdcheck continue checking $dev from $start + fi + + cnt=$[cnt+1] + eval MD_${cnt}_fl=\$fl + eval MD_${cnt}_sys=\$sys + eval MD_${cnt}_dev=\$dev + echo $start > $fl + echo $start > $sys/md/sync_min + echo check > $sys/md/sync_action +done + +if [ -z "$endtime" ] +then + exit 0 +fi + +while [ `date +%s` -lt $endtime ] +do + any= + for i in `eval echo {1..$cnt}` + do + eval fl=\$MD_${i}_fl + eval sys=\$MD_${i}_sys + eval dev=\$MD_${i}_dev + + if [ -z "$fl" ]; then continue; fi + + if [ "`cat $sys/md/sync_action`" != 'check' ] + then + logger -p daemon.info mdcheck finished checking $dev + eval MD_${i}_fl= + rm -f $fl + continue; + fi + read a rest < $sys/md/sync_completed + echo $a > $fl + any=yes + done + if [ -z "$any" ]; then exit 0; fi + sleep 120 +done + +# We've waited, and there are still checks running. +# Time to stop them. +for i in `eval echo {1..$cnt}` +do + eval fl=\$MD_${i}_fl + eval sys=\$MD_${i}_sys + eval dev=\$MD_${i}_dev + + if [ -z "$fl" ]; then continue; fi + + if [ "`cat $sys/md/sync_action`" != 'check' ] + then + eval MD_${i}_fl= + rm -f $fl + continue; + fi + echo idle > $sys/md/sync_action + cat $sys/md/sync_min > $fl + logger -p daemon.info pause checking $dev at `cat $fl` +done