From 3041e3270ec8043b7efe464501ed44ceaf3c17fd Mon Sep 17 00:00:00 2001 From: wangyueliang Date: Tue, 7 May 2024 09:28:23 +0800 Subject: [PATCH] Switch to virtiofs from virtio-9p by default [upstream] dedb310ccbe3fd548a6a501dd60394d155d29bc5 b91594c7f8aba85da2d6b81122a5ef6d82f94305 be8ddec472d6c2bfa87d9975d2d47b5a17d91150 --- mantle/cmd/kola/qemuexec.go | 16 ++--- mantle/platform/conf/conf.go | 15 +++-- mantle/platform/qemu.go | 126 +++++++++++++++++++++++++++++------ src/cmd-run | 2 +- src/cmdlib.sh | 2 +- src/deps.txt | 2 +- src/supermin-init-prelude.sh | 19 +++--- 7 files changed, 134 insertions(+), 48 deletions(-) diff --git a/mantle/cmd/kola/qemuexec.go b/mantle/cmd/kola/qemuexec.go index 6491202c..30b8b827 100644 --- a/mantle/cmd/kola/qemuexec.go +++ b/mantle/cmd/kola/qemuexec.go @@ -20,7 +20,6 @@ import ( "context" "fmt" "os" - "path/filepath" "strconv" "strings" @@ -90,8 +89,8 @@ func init() { cmdQemuExec.Flags().BoolVarP(&devshellConsole, "devshell-console", "c", false, "Connect directly to serial console in devshell mode") cmdQemuExec.Flags().StringVarP(&ignition, "ignition", "i", "", "Path to Ignition config") cmdQemuExec.Flags().StringVarP(&butane, "butane", "B", "", "Path to Butane config") - cmdQemuExec.Flags().StringArrayVar(&bindro, "bind-ro", nil, "Mount readonly via 9pfs a host directory (use --bind-ro=/path/to/host,/var/mnt/guest") - cmdQemuExec.Flags().StringArrayVar(&bindrw, "bind-rw", nil, "Same as above, but writable") + cmdQemuExec.Flags().StringArrayVar(&bindro, "bind-ro", nil, "Mount $hostpath,$guestpath readonly; for example --bind-ro=/path/on/host,/var/mnt/guest)") + cmdQemuExec.Flags().StringArrayVar(&bindrw, "bind-rw", nil, "Mount $hostpath,$guestpath writable; for example --bind-rw=/path/on/host,/var/mnt/guest)") cmdQemuExec.Flags().BoolVarP(&forceConfigInjection, "inject-ignition", "", false, "Force injecting Ignition config using guestfs") cmdQemuExec.Flags().BoolVar(&propagateInitramfsFailure, "propagate-initramfs-failure", false, "Error out if the system fails in the initramfs") cmdQemuExec.Flags().StringVarP(&consoleFile, "console-to-file", "", "", "Filepath in which to save serial console logs") @@ -210,8 +209,7 @@ func runQemuExec(cmd *cobra.Command, args []string) error { ignitionFragments = append(ignitionFragments, "autologin") cpuCountHost = true usernet = true - // Can't use 9p on RHEL8, need https://virtio-fs.gitlab.io/ instead in the future - if kola.Options.CosaWorkdir != "" && !strings.HasPrefix(filepath.Base(kola.QEMUOptions.DiskImage), "rhcos") && !strings.HasPrefix(filepath.Base(kola.QEMUOptions.DiskImage), "scos") && kola.Options.Distribution != "rhcos" && kola.Options.Distribution != "scos" { + if kola.Options.CosaWorkdir != "" { // Conservatively bind readonly to avoid anything in the guest (stray tests, whatever) // from destroying stuff bindro = append(bindro, fmt.Sprintf("%s,/var/mnt/workdir", kola.Options.CosaWorkdir)) @@ -287,18 +285,18 @@ func runQemuExec(cmd *cobra.Command, args []string) error { if err != nil { return err } - builder.Mount9p(src, dest, true) + builder.MountHost(src, dest, true) ensureConfig() - config.Mount9p(dest, true) + config.MountHost(dest, true) } for _, b := range bindrw { src, dest, err := parseBindOpt(b) if err != nil { return err } - builder.Mount9p(src, dest, false) + builder.MountHost(src, dest, false) ensureConfig() - config.Mount9p(dest, false) + config.MountHost(dest, false) } builder.ForceConfigInjection = forceConfigInjection if len(firstbootkargs) > 0 { diff --git a/mantle/platform/conf/conf.go b/mantle/platform/conf/conf.go index 05c30716..7ac78d94 100644 --- a/mantle/platform/conf/conf.go +++ b/mantle/platform/conf/conf.go @@ -1136,11 +1136,12 @@ resize_terminal() { PROMPT_COMMAND+=(resize_terminal)`, 0644) } -// Mount9p adds an Ignition config to mount an folder with 9p -func (c *Conf) Mount9p(dest string, readonly bool) { - readonlyStr := "" +// MountHost adds an Ignition config to mount an folder +func (c *Conf) MountHost(dest string, readonly bool) { + mountType := "virtiofs" + options := "" if readonly { - readonlyStr = ",ro" + options = "ro" } content := fmt.Sprintf(`[Unit] DefaultDependencies=no @@ -1149,11 +1150,11 @@ Before=basic.target [Mount] What=%s Where=%s -Type=9p -Options=trans=virtio,version=9p2000.L%s,msize=10485760 +Type=%s +Options=%s [Install] WantedBy=multi-user.target -`, dest, dest, readonlyStr) +`, dest, dest, mountType, options) c.AddSystemdUnit(fmt.Sprintf("%s.mount", systemdunit.UnitNameEscape(dest[1:])), content, Enable) } diff --git a/mantle/platform/qemu.go b/mantle/platform/qemu.go index 7ccbaad3..833af78a 100644 --- a/mantle/platform/qemu.go +++ b/mantle/platform/qemu.go @@ -17,7 +17,7 @@ // Why not libvirt? // Two main reasons. First, we really do want to use qemu, and not // something else. We rely on qemu features/APIs and there's a general -// assumption that the qemu process is local (e.g. we expose 9p filesystem +// assumption that the qemu process is local (e.g. we expose 9p/virtiofs filesystem // sharing). Second, libvirt runs as a daemon, but we want the // VMs "lifecycle bound" to their creating process (e.g. kola), // so that e.g. Ctrl-C (SIGINT) kills both reliably. @@ -145,10 +145,12 @@ type bootIso struct { // QemuInstance holds an instantiated VM through its lifecycle. type QemuInstance struct { - qemu exec.Cmd - tempdir string - swtpm exec.Cmd - nbdServers []exec.Cmd + qemu exec.Cmd + architecture string + tempdir string + swtpm exec.Cmd + // Helpers are child processes such as nbd or virtiofsd that should be lifecycle bound to qemu + helpers []exec.Cmd hostForwardedPorts []HostForwardPort journalPipe *os.File @@ -286,12 +288,12 @@ func (inst *QemuInstance) Destroy() { inst.swtpm.Kill() //nolint // Ignore errors inst.swtpm = nil } - for _, nbdServ := range inst.nbdServers { - if nbdServ != nil { - nbdServ.Kill() //nolint // Ignore errors + for _, p := range inst.helpers { + if p != nil { + p.Kill() //nolint // Ignore errors } } - inst.nbdServers = nil + inst.helpers = nil if inst.tempdir != "" { if err := os.RemoveAll(inst.tempdir); err != nil { @@ -420,6 +422,13 @@ func (inst *QemuInstance) RemovePrimaryBlockDevice() (err2 error) { return nil } +// A directory mounted from the host into the guest, via 9p or virtiofs +type HostMount struct { + src string + dest string + readonly bool +} + // QemuBuilder is a configurator that can then create a qemu instance type QemuBuilder struct { // ConfigFile is a path to Ignition configuration @@ -476,9 +485,10 @@ type QemuBuilder struct { finalized bool diskID uint disks []*Disk - fs9pID uint // virtioSerialID is incremented for each device virtioSerialID uint + // hostMounts is an array of directories mounted (via 9p or virtiofs) from the host + hostMounts []HostMount // fds is file descriptors we own to pass to qemu fds []*os.File @@ -701,16 +711,11 @@ func (builder *QemuBuilder) encryptIgnitionConfig() error { return nil } -// Mount9p sets up a mount point from the host to guest. To be replaced -// with https://virtio-fs.gitlab.io/ once it lands everywhere. -func (builder *QemuBuilder) Mount9p(source, destHint string, readonly bool) { - builder.fs9pID++ - readonlyStr := "" - if readonly { - readonlyStr = ",readonly=on" - } - builder.Append("--fsdev", fmt.Sprintf("local,id=fs%d,path=%s,security_model=mapped%s", builder.fs9pID, source, readonlyStr)) - builder.Append("-device", virtio(builder.architecture, "9p", fmt.Sprintf("fsdev=fs%d,mount_tag=%s", builder.fs9pID, destHint))) +// MountHost sets up a mount point from the host to guest. +// Note that virtiofs does not currently support read-only mounts (which is really surprising!). +// We do mount it read-only by default in the guest, however. +func (builder *QemuBuilder) MountHost(source, dest string, readonly bool) { + builder.hostMounts = append(builder.hostMounts, HostMount{src: source, dest: dest, readonly: readonly}) } // supportsFwCfg if the target system supports injecting @@ -1573,6 +1578,30 @@ func (builder *QemuBuilder) VirtioJournal(config *conf.Conf, queryArguments stri return stream, nil } +// createVirtiofsCmd returns a new command instance configured to launch virtiofsd. +func createVirtiofsCmd(directory, socketPath string) exec.Cmd { + args := []string{"--sandbox", "none", "--socket-path", socketPath, "--shared-dir", "."} + // Work around https://gitlab.com/virtio-fs/virtiofsd/-/merge_requests/197 + if os.Getuid() == 0 { + args = append(args, "--modcaps=-mknod:-setfcap") + } + // We don't need seccomp filtering; we trust our workloads. This incidentally + // works around issues like https://gitlab.com/virtio-fs/virtiofsd/-/merge_requests/200. + args = append(args, "--seccomp=none") + cmd := exec.Command("/usr/libexec/virtiofsd", args...) + // This sets things up so that the `.` we passed in the arguments is the target directory + cmd.Dir = directory + // Quiet the daemon by default + cmd.Env = append(cmd.Env, "RUST_LOG=ERROR") + // But we do want to see errors + cmd.Stderr = os.Stderr + // Like other processes, "lifecycle bind" it to us + cmd.SysProcAttr = &syscall.SysProcAttr{ + Pdeathsig: syscall.SIGTERM, + } + return cmd +} + // Exec tries to run a QEMU instance with the given settings. func (builder *QemuBuilder) Exec() (*QemuInstance, error) { builder.finalize() @@ -1689,7 +1718,7 @@ func (builder *QemuBuilder) Exec() (*QemuInstance, error) { if err := cmd.Start(); err != nil { return nil, errors.Wrapf(err, "spawing nbd server") } - inst.nbdServers = append(inst.nbdServers, cmd) + inst.helpers = append(inst.helpers, cmd) } } @@ -1769,6 +1798,61 @@ func (builder *QemuBuilder) Exec() (*QemuInstance, error) { return nil, err } + // Process virtiofs mounts + if len(builder.hostMounts) > 0 { + if err := builder.ensureTempdir(); err != nil { + return nil, err + } + + plog.Debug("creating virtiofs helpers") + + // Spawn off a virtiofsd helper per mounted path + virtiofsHelpers := make(map[string]exec.Cmd) + for i, hostmnt := range builder.hostMounts { + // By far the most common failure to spawn virtiofsd will be a typo'd source directory, + // so let's synchronously check that ourselves here. + if _, err := os.Stat(hostmnt.src); err != nil { + return nil, fmt.Errorf("failed to access virtiofs source directory %s", hostmnt.src) + } + virtiofsChar := fmt.Sprintf("virtiofschar%d", i) + virtiofsdSocket := filepath.Join(builder.tempdir, fmt.Sprintf("virtiofsd-%d.sock", i)) + builder.Append("-chardev", fmt.Sprintf("socket,id=%s,path=%s", virtiofsChar, virtiofsdSocket)) + builder.Append("-device", fmt.Sprintf("vhost-user-fs-pci,queue-size=1024,chardev=%s,tag=%s", virtiofsChar, hostmnt.dest)) + plog.Debugf("creating virtiofs helper for %s", hostmnt.src) + // TODO: Honor hostmnt.readonly somehow here (add an option to virtiofsd) + p := createVirtiofsCmd(hostmnt.src, virtiofsdSocket) + if err := p.Start(); err != nil { + return nil, fmt.Errorf("failed to start virtiofsd") + } + virtiofsHelpers[virtiofsdSocket] = p + } + // Loop waiting for the sockets to appear + err := util.RetryUntilTimeout(10*time.Minute, 1*time.Second, func() error { + found := []string{} + for sockpath := range virtiofsHelpers { + if _, err := os.Stat(sockpath); err == nil { + found = append(found, sockpath) + } + } + for _, sockpath := range found { + helper := virtiofsHelpers[sockpath] + inst.helpers = append(inst.helpers, helper) + delete(virtiofsHelpers, sockpath) + } + if len(virtiofsHelpers) == 0 { + return nil + } + waitingFor := []string{} + for socket := range virtiofsHelpers { + waitingFor = append(waitingFor, socket) + } + return fmt.Errorf("waiting for virtiofsd sockets: %s", strings.Join(waitingFor, " ")) + }) + if err != nil { + return nil, err + } + } + fdnum := 3 // first additional file starts at position 3 for i := range builder.fds { fdset := i + 1 // Start at 1 diff --git a/src/cmd-run b/src/cmd-run index a26b4325..23515160 100755 --- a/src/cmd-run +++ b/src/cmd-run @@ -9,7 +9,7 @@ set -euo pipefail # You can exit via either exiting the login session cleanly # in SSH (ctrl-d/exit in bash), or via `poweroff`. # -# If 9p is available and this is started from a coreos-assembler +# If this is started from a coreos-assembler # working directory, the build directory will be mounted readonly # at /var/mnt/workdir, and the tmp/ directory will be read *write* # at /var/mnt/workdir-tmp. This allows you to easily exchange diff --git a/src/cmdlib.sh b/src/cmdlib.sh index 2a2b70ea..5ee1d1cb 100755 --- a/src/cmdlib.sh +++ b/src/cmdlib.sh @@ -475,7 +475,7 @@ meta_key() { } # runvm generates and runs a minimal VM which we use to "bootstrap" our build -# process. It mounts the workdir via 9p. If you need to add new packages into +# process. It mounts the workdir via virtiofs. If you need to add new packages into # the vm, update `vmdeps.txt`. # If you need to debug it, one trick is to change the `-serial file` below # into `-serial stdio`, drop the <&- and virtio-serial stuff and then e.g. add diff --git a/src/deps.txt b/src/deps.txt index ac0cdc99..50cbafc2 100644 --- a/src/deps.txt +++ b/src/deps.txt @@ -29,7 +29,7 @@ genisoimage make git rpm-build # virt dependencies -libguestfs-tools libguestfs-tools-c /usr/bin/qemu-img qemu-kvm swtpm +libguestfs-tools libguestfs-tools-c virtiofsd /usr/bin/qemu-img qemu-kvm swtpm # And the main arch emulators for cross-arch testing #qemu-system-aarch64-core qemu-system-ppc-core qemu-system-s390x-core qemu-system-x86-core qemu-system-aarch64-core qemu-system-x86-core diff --git a/src/supermin-init-prelude.sh b/src/supermin-init-prelude.sh index 9aaac4d7..100e1e14 100644 --- a/src/supermin-init-prelude.sh +++ b/src/supermin-init-prelude.sh @@ -14,8 +14,6 @@ mount -t tmpfs tmpfs /dev/shm # load selinux policy LANG=C /sbin/load_policy -i -# load kernel module for 9pnet_virtio for 9pfs mount -/sbin/modprobe 9pnet_virtio # need fuse module for rofiles-fuse/bwrap during post scripts run /sbin/modprobe fuse @@ -33,14 +31,19 @@ fi umask 002 # set up workdir -# For 9p mounts set msize to 100MiB # https://github.com/coreos/coreos-assembler/issues/2171 mkdir -p "${workdir:?}" -mount -t 9p -o rw,trans=virtio,version=9p2000.L,msize=10485760 workdir "${workdir}" -if [ -L "${workdir}"/src/config ]; then - mkdir -p "$(readlink "${workdir}"/src/config)" - mount -t 9p -o rw,trans=virtio,version=9p2000.L,msize=10485760 source "${workdir}"/src/config -fi +mount -t virtiofs -o rw workdir "${workdir}" + +# This loop pairs with virtfs setups for qemu in cmdlib.sh. Keep them in sync. +for maybe_symlink in "${workdir}"/{src/config,src/yumrepos}; do + if [ -L "${maybe_symlink}" ]; then + bn=$(basename "${maybe_symlink}") + mkdir -p "$(readlink "${maybe_symlink}")" + mount -t virtiofs -o ro "/cosa/src/${bn}" "${maybe_symlink}" + fi +done + mkdir -p "${workdir}"/cache cachedev=$(blkid -lt LABEL=cosa-cache -o device || true) if [ -n "${cachedev}" ]; then -- Gitee