From c268ddf88102714c76cdccac3888f459d23a728e Mon Sep 17 00:00:00 2001 From: Jie Yang Date: Tue, 1 Mar 2022 16:39:35 +0800 Subject: [PATCH] Update version to 2.1.0-3 Signed-off-by: Jie Yang (cherry picked from commit 76b5920296ad84c4886f39335adc5471d49418d3) --- ...ment-a-safe-offset_of-macro-function.patch | 47 + ...-the-bug-that-parked-event-not-remov.patch | 51 + ...gration-use-device-id-as-snapshot-id.patch | 926 ++++++++++++++++++ ...ests-fix-the-test_standvm_quickstart.patch | 38 + ...t-the-log-of-printing-device-info-du.patch | 34 + 0011-tests-add-stand-kata-testcases.patch | 387 ++++++++ ...e-bug-when-tap-is-abnormally-removed.patch | 50 + ...ate-detailed-usage-for-standard-boot.patch | 376 +++++++ ...-error-access-queue-s-host-virtual-a.patch | 78 ++ ...create-a-new-document-for-using-vfio.patch | 146 +++ stratovirt.spec | 19 +- 11 files changed, 2150 insertions(+), 2 deletions(-) create mode 100644 0006-Implement-a-safe-offset_of-macro-function.patch create mode 100644 0007-loop_context-fix-the-bug-that-parked-event-not-remov.patch create mode 100644 0008-migration-use-device-id-as-snapshot-id.patch create mode 100644 0009-tests-fix-the-test_standvm_quickstart.patch create mode 100644 0010-root_port-correct-the-log-of-printing-device-info-du.patch create mode 100644 0011-tests-add-stand-kata-testcases.patch create mode 100644 0012-net-fix-the-bug-when-tap-is-abnormally-removed.patch create mode 100644 0013-docs-boot-update-detailed-usage-for-standard-boot.patch create mode 100644 0014-virtio-queue-fix-error-access-queue-s-host-virtual-a.patch create mode 100644 0015-vfio-doc-create-a-new-document-for-using-vfio.patch diff --git a/0006-Implement-a-safe-offset_of-macro-function.patch b/0006-Implement-a-safe-offset_of-macro-function.patch new file mode 100644 index 0000000..33e373b --- /dev/null +++ b/0006-Implement-a-safe-offset_of-macro-function.patch @@ -0,0 +1,47 @@ +From 03fe93ad1f5a5d8f3a132ea419e4a509069e2130 Mon Sep 17 00:00:00 2001 +From: ace yan +Date: Sun, 20 Feb 2022 14:57:34 +0800 +Subject: [PATCH 01/10] Implement a safe offset_of macro function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Original offset_of macro implementation reference to packed field is unaligned, +and creating a misaligned reference is undefined behavior。 +In the higher version of the rustc compiler, +there will be a compilation warning prompt: this was previously accepted by the compiler +but is being phased out; it will become a hard error in a future release! + +Signed-off-by: Yan Wen +--- + util/src/offsetof.rs | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/util/src/offsetof.rs b/util/src/offsetof.rs +index c2696c8..a6b55d8 100644 +--- a/util/src/offsetof.rs ++++ b/util/src/offsetof.rs +@@ -13,9 +13,17 @@ + /// Macro: Calculate offset of specified field in a type. + #[macro_export] + macro_rules! __offset_of { +- ($type_name:ty, $field:ident) => { +- unsafe { &(*(std::ptr::null::<$type_name>())).$field as *const _ as usize } +- }; ++ ($type_name:ty, $field:ident) => {{ ++ let tmp = core::mem::MaybeUninit::<$type_name>::uninit(); ++ let outer = tmp.as_ptr(); ++ // Safe because the pointer is valid and aligned, just not initialised; `addr_of` ensures that ++ // we don't actually read from `outer` (which would be UB) nor create an intermediate reference. ++ let inner = unsafe { core::ptr::addr_of!((*outer).$field) } as *const u8; ++ // Safe because the two pointers are within the same allocation block. ++ unsafe { ++ inner.offset_from(outer as *const u8) as usize ++ } ++ }}; + } + + /// Macro: Calculate offset of a field in a recursive type. +-- +2.25.1 + diff --git a/0007-loop_context-fix-the-bug-that-parked-event-not-remov.patch b/0007-loop_context-fix-the-bug-that-parked-event-not-remov.patch new file mode 100644 index 0000000..b855503 --- /dev/null +++ b/0007-loop_context-fix-the-bug-that-parked-event-not-remov.patch @@ -0,0 +1,51 @@ +From 4f51bd38fbc248e045dc1bcebba54c7bc5e4c66b Mon Sep 17 00:00:00 2001 +From: zhouli57 +Date: Tue, 15 Feb 2022 18:06:29 +0800 +Subject: [PATCH 02/10] loop_context: fix the bug that parked event not remove + from events_map + +Signed-off-by: zhouli57 +--- + util/src/loop_context.rs | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/util/src/loop_context.rs b/util/src/loop_context.rs +index 9ac8ed7..2c9c8c0 100644 +--- a/util/src/loop_context.rs ++++ b/util/src/loop_context.rs +@@ -222,20 +222,20 @@ impl EventLoopContext { + let mut events_map = self.events.write().unwrap(); + match events_map.get_mut(&event.raw_fd) { + Some(notifier) => { +- if let EventStatus::Parked = notifier.status { +- return Ok(()); +- } +- +- if let Err(error) = self.epoll.ctl( +- ControlOperation::Delete, +- notifier.raw_fd, +- EpollEvent::default(), +- ) { +- let error_num = error.raw_os_error().unwrap(); +- if error_num != libc::EBADF && error_num != libc::ENOENT { +- return Err(ErrorKind::BadSyscall(error).into()); ++ if let EventStatus::Alive = notifier.status { ++ // No need to delete fd if status is Parked, it's done in park_event. ++ if let Err(error) = self.epoll.ctl( ++ ControlOperation::Delete, ++ notifier.raw_fd, ++ EpollEvent::default(), ++ ) { ++ let error_num = error.raw_os_error().unwrap(); ++ if error_num != libc::EBADF && error_num != libc::ENOENT { ++ return Err(ErrorKind::BadSyscall(error).into()); ++ } + } + } ++ + notifier.status = EventStatus::Removed; + + if let Some(parked_fd) = notifier.parked_fd { +-- +2.25.1 + diff --git a/0008-migration-use-device-id-as-snapshot-id.patch b/0008-migration-use-device-id-as-snapshot-id.patch new file mode 100644 index 0000000..9c4da35 --- /dev/null +++ b/0008-migration-use-device-id-as-snapshot-id.patch @@ -0,0 +1,926 @@ +From 030d9763b2fc3b04a45ae764821914e9369e0491 Mon Sep 17 00:00:00 2001 +From: zhouli57 +Date: Wed, 5 Jan 2022 09:48:27 +0800 +Subject: [PATCH 03/10] migration: use device id as snapshot id + +At present, snapshots use the device initialization sequence as the key, +but after the introduction of the device hot-plug mechanism, due to the +dynamic addition of devices, the sequence of snapshot restoration may be +in consistent with the original sequence, resulting in abnormal device +state restoration. Therefore, a new interface is provided, which can +specify the device id during registration, so as to ensure that the +snapshot is restored to match the original device. +In addition, an unregister interface has been added for device +destruction to clean up related resources. + +Signed-off-by: zhouli57 +--- + Cargo.lock | 1 + + address_space/src/state.rs | 4 +- + .../src/interrupt_controller/aarch64/gicv3.rs | 10 +- + hypervisor/src/kvm/mod.rs | 6 +- + machine/src/lib.rs | 17 +- + machine/src/standard_vm/mod.rs | 44 +++-- + migration/Cargo.toml | 1 + + migration/src/device_state.rs | 7 - + migration/src/lib.rs | 4 +- + migration/src/manager.rs | 181 +++++++++++++----- + migration/src/snapshot.rs | 63 ++++-- + migration_derive/src/struct_parser.rs | 11 +- + pci/src/msix.rs | 10 +- + pci/src/root_port.rs | 9 +- + virtio/src/block.rs | 4 + + virtio/src/net.rs | 4 + + virtio/src/virtio_pci.rs | 20 +- + 17 files changed, 287 insertions(+), 109 deletions(-) + +diff --git a/Cargo.lock b/Cargo.lock +index df5dc88..215f1d5 100644 +--- a/Cargo.lock ++++ b/Cargo.lock +@@ -327,6 +327,7 @@ version = "2.1.0" + dependencies = [ + "error-chain", + "kvm-ioctls", ++ "log", + "migration_derive", + "once_cell", + "serde", +diff --git a/address_space/src/state.rs b/address_space/src/state.rs +index 2347378..eb34e91 100644 +--- a/address_space/src/state.rs ++++ b/address_space/src/state.rs +@@ -17,7 +17,7 @@ use std::sync::Arc; + + use crate::{AddressSpace, FileBackend, GuestAddress, HostMemMapping, Region}; + use migration::errors::{ErrorKind, Result, ResultExt}; +-use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; ++use migration::{DeviceStateDesc, FieldDesc, MigrationHook, StateTransfer}; + use util::byte_code::ByteCode; + use util::unix::host_page_size; + +@@ -76,7 +76,7 @@ impl StateTransfer for AddressSpace { + } + + impl MigrationHook for AddressSpace { +- fn pre_save(&self, _id: u64, writer: &mut dyn Write) -> Result<()> { ++ fn pre_save(&self, _id: &str, writer: &mut dyn Write) -> Result<()> { + let ram_state = self.get_state_vec()?; + writer.write_all(&ram_state)?; + let padding_buffer = +diff --git a/devices/src/interrupt_controller/aarch64/gicv3.rs b/devices/src/interrupt_controller/aarch64/gicv3.rs +index 21008a6..79ebb27 100644 +--- a/devices/src/interrupt_controller/aarch64/gicv3.rs ++++ b/devices/src/interrupt_controller/aarch64/gicv3.rs +@@ -21,7 +21,7 @@ use super::{ + use crate::interrupt_controller::errors::{ErrorKind, Result, ResultExt}; + use hypervisor::kvm::KVM_FDS; + use machine_manager::machine::{KvmVmState, MachineLifecycle}; +-use migration::MigrationManager; ++use migration::{MigrationManager, MigrationRestoreOrder}; + use util::device_tree::{self, FdtBuilder}; + + // See arch/arm64/include/uapi/asm/kvm.h file from the linux kernel. +@@ -393,10 +393,14 @@ impl GICDevice for GICv3 { + MigrationManager::register_device_instance( + GICv3ItsState::descriptor(), + gicv3.its_dev.as_ref().unwrap().clone(), +- true, ++ MigrationRestoreOrder::Gicv3Its, + ); + } +- MigrationManager::register_device_instance(GICv3State::descriptor(), gicv3.clone(), true); ++ MigrationManager::register_device_instance( ++ GICv3State::descriptor(), ++ gicv3.clone(), ++ MigrationRestoreOrder::Gicv3, ++ ); + + Ok(gicv3) + } +diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs +index a318a65..80012bb 100644 +--- a/hypervisor/src/kvm/mod.rs ++++ b/hypervisor/src/kvm/mod.rs +@@ -27,6 +27,8 @@ use once_cell::sync::Lazy; + use vmm_sys_util::eventfd::EventFd; + + use crate::errors::{Result, ResultExt}; ++#[cfg(target_arch = "x86_64")] ++use migration::{MigrationManager, MigrationRestoreOrder}; + + // See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/asm-generic/kvm.h + pub const KVM_SET_DEVICE_ATTR: u32 = 0x4018_aee1; +@@ -124,10 +126,10 @@ impl KVMFds { + }; + + #[cfg(target_arch = "x86_64")] +- migration::MigrationManager::register_device_instance( ++ MigrationManager::register_device_instance( + state::KvmDeviceState::descriptor(), + Arc::new(state::KvmDevice {}), +- false, ++ MigrationRestoreOrder::Default, + ); + + kvm_fds +diff --git a/machine/src/lib.rs b/machine/src/lib.rs +index 4421deb..7f88b22 100644 +--- a/machine/src/lib.rs ++++ b/machine/src/lib.rs +@@ -132,7 +132,7 @@ use machine_manager::config::{ + }; + use machine_manager::event_loop::EventLoop; + use machine_manager::machine::{KvmVmState, MachineInterface}; +-use migration::MigrationManager; ++use migration::{MigrationManager, MigrationRestoreOrder}; + use util::loop_context::{EventNotifier, NotifierCallback, NotifierOperation}; + use util::seccomp::{BpfRule, SeccompOpt, SyscallFilter}; + use vfio::{VfioDevice, VfioPciDevice}; +@@ -243,7 +243,11 @@ pub trait MachineOps { + )); + cpus.push(cpu.clone()); + +- MigrationManager::register_device_instance(cpu::ArchCPU::descriptor(), cpu, false); ++ MigrationManager::register_device_instance( ++ cpu::ArchCPU::descriptor(), ++ cpu, ++ MigrationRestoreOrder::Default, ++ ); + } + + if let Some(boot_config) = boot_cfg { +@@ -486,7 +490,11 @@ pub trait MachineOps { + let device_cfg = parse_blk(vm_config, cfg_args)?; + let device = Arc::new(Mutex::new(Block::new(device_cfg.clone()))); + self.add_virtio_pci_device(&device_cfg.id, &bdf, device.clone(), multi_func)?; +- MigrationManager::register_device_instance_mutex(BlockState::descriptor(), device); ++ MigrationManager::register_device_instance_mutex_with_id( ++ BlockState::descriptor(), ++ device, ++ &device_cfg.id, ++ ); + self.reset_bus(&device_cfg.id)?; + Ok(()) + } +@@ -502,9 +510,10 @@ pub trait MachineOps { + ))) + } else { + let device = Arc::new(Mutex::new(virtio::Net::new(device_cfg.clone()))); +- MigrationManager::register_device_instance_mutex( ++ MigrationManager::register_device_instance_mutex_with_id( + VirtioNetState::descriptor(), + device.clone(), ++ &device_cfg.id, + ); + device + }; +diff --git a/machine/src/standard_vm/mod.rs b/machine/src/standard_vm/mod.rs +index 1fca3bf..c96f89a 100644 +--- a/machine/src/standard_vm/mod.rs ++++ b/machine/src/standard_vm/mod.rs +@@ -80,10 +80,11 @@ use machine_manager::config::{ + }; + use machine_manager::machine::{DeviceInterface, KvmVmState}; + use machine_manager::qmp::{qmp_schema, QmpChannel, Response}; ++use migration::MigrationManager; + use pci::hotplug::{handle_plug, handle_unplug_request}; + use pci::PciBus; + use util::byte_code::ByteCode; +-use virtio::{qmp_balloon, qmp_query_balloon, Block, VhostKern, VirtioDevice}; ++use virtio::{qmp_balloon, qmp_query_balloon, Block, BlockState, VhostKern, VirtioNetState}; + + #[cfg(target_arch = "aarch64")] + use aarch64::{LayoutEntryType, MEM_LAYOUT}; +@@ -549,7 +550,7 @@ impl StdMachine { + + let blk = if let Some(conf) = self.get_vm_config().lock().unwrap().drives.get(drive) { + let dev = BlkDevConfig { +- id: conf.id.clone(), ++ id: args.id.clone(), + path_on_host: conf.path_on_host.clone(), + read_only: conf.read_only, + direct: conf.direct, +@@ -558,13 +559,22 @@ impl StdMachine { + iops: conf.iops, + }; + dev.check()?; +- Arc::new(Mutex::new(Block::new(dev))) ++ dev + } else { + bail!("Drive not found"); + }; + +- self.add_virtio_pci_device(&args.id, pci_bdf, blk, multifunction) +- .chain_err(|| "Failed to add virtio pci block device") ++ let blk_id = blk.id.clone(); ++ let blk = Arc::new(Mutex::new(Block::new(blk))); ++ self.add_virtio_pci_device(&args.id, pci_bdf, blk.clone(), multifunction) ++ .chain_err(|| "Failed to add virtio pci block device")?; ++ ++ MigrationManager::register_device_instance_mutex_with_id( ++ BlockState::descriptor(), ++ blk, ++ &blk_id, ++ ); ++ Ok(()) + } + + fn plug_virtio_pci_net( +@@ -581,7 +591,7 @@ impl StdMachine { + + let dev = if let Some(conf) = self.get_vm_config().lock().unwrap().netdevs.get(netdev) { + let dev = NetworkInterfaceConfig { +- id: conf.id.clone(), ++ id: args.id.clone(), + host_dev_name: conf.ifname.clone(), + mac: args.mac.clone(), + tap_fd: conf.tap_fd, +@@ -595,14 +605,22 @@ impl StdMachine { + bail!("Netdev not found"); + }; + +- let net: Arc> = if dev.vhost_type.is_some() { +- Arc::new(Mutex::new(VhostKern::Net::new(&dev, self.get_sys_mem()))) ++ if dev.vhost_type.is_some() { ++ let net = Arc::new(Mutex::new(VhostKern::Net::new(&dev, self.get_sys_mem()))); ++ self.add_virtio_pci_device(&args.id, &pci_bdf, net, multifunction) ++ .chain_err(|| "Failed to add virtio net device")?; + } else { +- Arc::new(Mutex::new(virtio::Net::new(dev))) +- }; +- +- self.add_virtio_pci_device(&args.id, &pci_bdf, net, multifunction) +- .chain_err(|| "Failed to add virtio pci net device") ++ let net_id = dev.id.clone(); ++ let net = Arc::new(Mutex::new(virtio::Net::new(dev))); ++ self.add_virtio_pci_device(&args.id, &pci_bdf, net.clone(), multifunction) ++ .chain_err(|| "Failed to add virtio net device")?; ++ MigrationManager::register_device_instance_mutex_with_id( ++ VirtioNetState::descriptor(), ++ net, ++ &net_id, ++ ); ++ } ++ Ok(()) + } + + fn plug_vfio_pci_device( +diff --git a/migration/Cargo.toml b/migration/Cargo.toml +index fc877ad..6991804 100644 +--- a/migration/Cargo.toml ++++ b/migration/Cargo.toml +@@ -11,6 +11,7 @@ kvm-ioctls = "0.6.0" + serde = { version = ">=1.0.114", features = ["derive"] } + serde_json = "1.0.55" + once_cell = "1.9.0" ++log = "0.4.8" + + [dev-dependencies] + migration_derive = { path = "../migration_derive" } +diff --git a/migration/src/device_state.rs b/migration/src/device_state.rs +index 75bf3b9..de9c16c 100644 +--- a/migration/src/device_state.rs ++++ b/migration/src/device_state.rs +@@ -171,13 +171,6 @@ pub mod tests { + use super::{DeviceStateDesc, FieldDesc, StateTransfer, VersionCheck}; + use util::byte_code::ByteCode; + +- struct MigrationManager {} +- impl MigrationManager { +- fn desc_db_len() -> u64 { +- 0 +- } +- } +- + #[derive(Default)] + // A simple device version 1. + pub struct DeviceV1 { +diff --git a/migration/src/lib.rs b/migration/src/lib.rs +index 9751fd3..9ccab5f 100644 +--- a/migration/src/lib.rs ++++ b/migration/src/lib.rs +@@ -19,6 +19,8 @@ extern crate error_chain; + #[cfg(test)] + #[macro_use] + extern crate migration_derive; ++#[macro_use] ++extern crate log; + + mod device_state; + mod header; +@@ -27,7 +29,7 @@ mod snapshot; + mod status; + + pub use device_state::{DeviceStateDesc, FieldDesc, StateTransfer}; +-pub use manager::{MigrationHook, MigrationManager}; ++pub use manager::{MigrationHook, MigrationManager, MigrationRestoreOrder}; + pub use status::MigrationStatus; + + pub mod errors { +diff --git a/migration/src/manager.rs b/migration/src/manager.rs +index f5d52b1..ef903f9 100644 +--- a/migration/src/manager.rs ++++ b/migration/src/manager.rs +@@ -10,8 +10,11 @@ + // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + // See the Mulan PSL v2 for more details. + +-use std::collections::{BTreeMap, HashMap}; ++use std::cmp; ++use std::collections::hash_map::DefaultHasher; ++use std::collections::HashMap; + use std::fs::File; ++use std::hash::{Hash, Hasher}; + use std::io::{Read, Write}; + use std::sync::{Arc, Mutex, RwLock}; + +@@ -23,11 +26,27 @@ use util::byte_code::ByteCode; + + /// Glocal MigrationManager to manage all migration combined interface. + pub(crate) static MIGRATION_MANAGER: Lazy = Lazy::new(|| MigrationManager { +- entry: Arc::new(RwLock::new(BTreeMap::::new())), ++ entry: Arc::new(RwLock::new([ ++ Vec::<(String, MigrationEntry)>::new(), ++ Vec::<(String, MigrationEntry)>::new(), ++ Vec::<(String, MigrationEntry)>::new(), ++ ])), + desc_db: Arc::new(RwLock::new(HashMap::::new())), + status: Arc::new(RwLock::new(MigrationStatus::None)), + }); + ++/// Used to map Device id from String to u64 only. ++/// Because instance_id in InstanceId can't be String for it has no Copy trait. ++/// ++/// # Arguments ++/// ++/// * `dev_id` - The device id. ++pub fn id_remap(dev_id: &str) -> u64 { ++ let mut hash = DefaultHasher::new(); ++ dev_id.hash(&mut hash); ++ hash.finish() ++} ++ + /// A hook for `Device` to save device state to `Write` object and load device + /// from `[u8]` slice. + /// +@@ -44,7 +63,7 @@ pub trait MigrationHook: StateTransfer { + /// * `id` - This unique id to represent a single device. It can be treated + /// as `object_id` in `InstanceId`. + /// * `writer` - The `Write` trait object to store or receive data. +- fn pre_save(&self, id: u64, writer: &mut dyn Write) -> Result<()> { ++ fn pre_save(&self, id: &str, writer: &mut dyn Write) -> Result<()> { + let state_data = self + .get_state_vec() + .chain_err(|| "Failed to get device state")?; +@@ -52,7 +71,7 @@ pub trait MigrationHook: StateTransfer { + let device_alias = self.get_device_alias(); + let instance_id = InstanceId { + object_type: device_alias, +- object_id: id, ++ object_id: id_remap(&id), + }; + + writer +@@ -131,11 +150,36 @@ pub enum MigrationEntry { + Memory(Arc), + } + ++/// Ensure the recovery sequence of different devices based on priorities. ++/// At present, we need to ensure that the state recovery of the gic device ++/// must be after the cpu, so different priorities are defined. ++#[derive(Debug)] ++pub enum MigrationRestoreOrder { ++ Default = 0, ++ Gicv3 = 1, ++ Gicv3Its = 2, ++ Max = 3, ++} ++ ++impl From for u16 { ++ fn from(order: MigrationRestoreOrder) -> u16 { ++ match order { ++ MigrationRestoreOrder::Default => 0, ++ MigrationRestoreOrder::Gicv3 => 1, ++ MigrationRestoreOrder::Gicv3Its => 2, ++ _ => 3, ++ } ++ } ++} ++ ++/// The entry list size is the same as the MigrationRestoreOrder number ++type MigrationEntryList = [Vec<(String, MigrationEntry)>; 3]; ++ + /// This structure is to manage all resource during migration. + /// It is also the only way to call on `MIGRATION_MANAGER`. + pub struct MigrationManager { + /// The map offers the device_id and combined migratable device entry. +- pub(crate) entry: Arc>>, ++ pub(crate) entry: Arc>, + /// The map offers the device type and its device state describe structure. + pub(crate) desc_db: Arc>>, + /// The status of migration work. +@@ -161,28 +205,23 @@ impl MigrationManager { + /// + /// * `device_desc` - The `DeviceStateDesc` of device instance. + /// * `entry` - Device instance with migratable interface. +- /// * `reverse` - Register device in order or in the reverse order. ++ /// * `restore_order` - device restore order. + pub fn register_device_instance( + device_desc: DeviceStateDesc, + device_entry: Arc, +- reverse: bool, ++ restore_order: MigrationRestoreOrder, + ) where + T: MigrationHook + Sync + Send + 'static, + { ++ let name = device_desc.name.clone(); + Self::register_device_desc(device_desc); + + let entry = MigrationEntry::Safe(device_entry); +- let nr_entry = if reverse { +- !0 - Self::entry_db_len() +- } else { +- Self::entry_db_len() +- }; +- +- MIGRATION_MANAGER +- .entry +- .write() +- .unwrap() +- .insert(nr_entry, entry); ++ info!( ++ "Register device instance: id {} order {:?}", ++ &name, &restore_order ++ ); ++ MigrationManager::insert_entry(name, restore_order.into(), entry, true); + } + + /// Register mutex device instance to entry hashmap with instance_id. +@@ -197,16 +236,34 @@ impl MigrationManager { + ) where + T: MigrationHook + Sync + Send + 'static, + { ++ let name = device_desc.name.clone(); ++ let order = MigrationRestoreOrder::Default.into(); + Self::register_device_desc(device_desc); + + let entry = MigrationEntry::Mutex(device_entry); +- let nr_entry = Self::entry_db_len(); ++ info!("Register device instance mutex: id {}", &name); ++ MigrationManager::insert_entry(name, order, entry, true); ++ } ++ ++ pub fn register_device_instance_mutex_with_id( ++ device_desc: DeviceStateDesc, ++ device_entry: Arc>, ++ id: &str, ++ ) where ++ T: MigrationHook + Sync + Send + 'static, ++ { ++ let name = device_desc.name.clone() + "/" + id; ++ let order = MigrationRestoreOrder::Default.into(); ++ Self::register_device_desc(device_desc); ++ let entry = MigrationEntry::Mutex(device_entry); ++ info!("Register device instance with id: id {}", &name); ++ MigrationManager::insert_entry(name, order, entry, false); ++ } + +- MIGRATION_MANAGER +- .entry +- .write() +- .unwrap() +- .insert(nr_entry, entry); ++ pub fn unregister_device_instance_mutex_by_id(device_desc: DeviceStateDesc, id: &str) { ++ let name = device_desc.name + "/" + id; ++ info!("Unregister device instance: id {}", &name); ++ MigrationManager::remove_entry(&name); + } + + /// Register memory instance. +@@ -219,23 +276,55 @@ impl MigrationManager { + T: MigrationHook + Sync + Send + 'static, + { + let entry = MigrationEntry::Memory(entry); +- let nr_entry = Self::entry_db_len(); +- +- MIGRATION_MANAGER +- .entry +- .write() +- .unwrap() +- .insert(nr_entry, entry); ++ info!("Register memory instance"); ++ MigrationManager::insert_entry(String::from("MemoryState/Memory"), 0, entry, true); + } + +- /// Get entry_db's length. +- pub fn entry_db_len() -> u64 { +- MIGRATION_MANAGER.entry.read().unwrap().len() as u64 ++ /// Insert entry. If the name is duplicated, you should set gen_instance_id to true to ++ /// generated instance id to ensure that the id is unique. ++ /// ++ /// # Arguments ++ /// ++ /// * `name` - Entry name. ++ /// * `order` - Restore order. ++ /// * `entry` - Instance with migratable interface. ++ /// * `gen_instance_id` - If auto-generated instance id. ++ fn insert_entry(name: String, order: u16, entry: MigrationEntry, gen_instance_id: bool) { ++ let mut entrys = MIGRATION_MANAGER.entry.write().unwrap(); ++ let mut index = 0; ++ if gen_instance_id { ++ for (key, _) in &entrys[order as usize] { ++ if let Some(pos) = key.rfind(':') { ++ let (tmp_id, num_id) = key.split_at(pos); ++ if tmp_id == name { ++ let num = num_id.strip_prefix(':').unwrap(); ++ index = cmp::max(index, num.parse::().unwrap() + 1); ++ } ++ } ++ } ++ } ++ // ID is format as "{name}:{instance_id}" ++ let id = format!("{}:{}", name, index); ++ debug!("Insert entry: id {}", &id); ++ entrys[order as usize].push((id, entry)); + } + +- /// Get desc_db's length. +- pub fn desc_db_len() -> u64 { +- MIGRATION_MANAGER.desc_db.read().unwrap().len() as u64 ++ /// Remove entry by the unique name. Not support to remove the entry with instance id. ++ /// ++ /// # Arguments ++ /// ++ /// * `name` - Entry name. ++ fn remove_entry(name: &str) { ++ let eid = format!("{}:0", name); ++ let mut entrys = MIGRATION_MANAGER.entry.write().unwrap(); ++ for (i, item) in entrys.iter().enumerate() { ++ let pos = item.iter().position(|(key, _)| key == &eid); ++ if let Some(index) = pos { ++ debug!("Remove entry: eid {}", &eid); ++ entrys[i].remove(index); ++ return; ++ } ++ } + } + + /// Get `Device`'s alias from device type string. +@@ -244,12 +333,7 @@ impl MigrationManager { + /// + /// * `device_type` - The type string of device instance. + pub fn get_desc_alias(device_type: &str) -> Option { +- MIGRATION_MANAGER +- .desc_db +- .read() +- .unwrap() +- .get(device_type) +- .map(|desc| desc.alias) ++ Some(id_remap(device_type)) + } + + /// Return `desc_db` value len(0 restored as `serde_json`) +@@ -340,23 +424,26 @@ mod tests { + let device_v2 = Arc::new(DeviceV2::default()); + let device_v2_mutex = Arc::new(Mutex::new(DeviceV2::default())); + +- MigrationManager::register_device_instance(DeviceV1State::descriptor(), device_v1, false); ++ MigrationManager::register_device_instance( ++ DeviceV1State::descriptor(), ++ device_v1, ++ MigrationRestoreOrder::Default, ++ ); + MigrationManager::register_memory_instance(device_v2); + MigrationManager::register_device_instance_mutex( + DeviceV2State::descriptor(), + device_v2_mutex, + ); + +- assert_eq!(MigrationManager::desc_db_len(), 2); + assert!(MigrationManager::get_desc_alias("DeviceV1State").is_some()); + assert_eq!( + MigrationManager::get_desc_alias("DeviceV1State").unwrap(), +- 0 ++ id_remap("DeviceV1State") + ); + assert!(MigrationManager::get_desc_alias("DeviceV2State").is_some()); + assert_eq!( + MigrationManager::get_desc_alias("DeviceV2State").unwrap(), +- 0 ++ id_remap("DeviceV2State") + ); + } + } +diff --git a/migration/src/snapshot.rs b/migration/src/snapshot.rs +index edea8ce..30ee13c 100644 +--- a/migration/src/snapshot.rs ++++ b/migration/src/snapshot.rs +@@ -23,7 +23,7 @@ use util::unix::host_page_size; + use crate::device_state::{DeviceStateDesc, VersionCheck}; + use crate::errors::{ErrorKind, Result, ResultExt}; + use crate::header::{FileFormat, MigrationHeader}; +-use crate::manager::{InstanceId, MigrationEntry, MigrationManager, MIGRATION_MANAGER}; ++use crate::manager::{id_remap, InstanceId, MigrationEntry, MigrationManager, MIGRATION_MANAGER}; + use crate::status::MigrationStatus; + + /// The length of `MigrationHeader` part occupies bytes in snapshot file. +@@ -187,10 +187,13 @@ impl MigrationManager { + /// + /// * `writer` - The `Write` trait object. + fn save_memory(writer: &mut dyn Write) -> Result<()> { +- for (id, entry) in MIGRATION_MANAGER.entry.read().unwrap().iter() { +- if let MigrationEntry::Memory(i) = entry { +- i.pre_save(*id, writer) +- .chain_err(|| "Failed to save vm memory")?; ++ let entry = MIGRATION_MANAGER.entry.read().unwrap(); ++ for item in entry.iter() { ++ for (id, entry) in item.iter() { ++ if let MigrationEntry::Memory(i) = entry { ++ i.pre_save(id, writer) ++ .chain_err(|| "Failed to save vm memory")?; ++ } + } + } + +@@ -205,10 +208,13 @@ impl MigrationManager { + fn load_memory(file: &mut File) -> Result<()> { + let mut state_bytes = [0_u8].repeat((host_page_size() as usize) * 2 - HEADER_LENGTH); + file.read_exact(&mut state_bytes)?; +- for (_, entry) in MIGRATION_MANAGER.entry.read().unwrap().iter() { +- if let MigrationEntry::Memory(i) = entry { +- i.pre_load(&state_bytes, Some(file)) +- .chain_err(|| "Failed to load vm memory")?; ++ let entry = MIGRATION_MANAGER.entry.read().unwrap(); ++ for item in entry.iter() { ++ for (_, entry) in item.iter() { ++ if let MigrationEntry::Memory(i) = entry { ++ i.pre_load(&state_bytes, Some(file)) ++ .chain_err(|| "Failed to load vm memory")?; ++ } + } + } + +@@ -221,11 +227,14 @@ impl MigrationManager { + /// + /// * `writer` - The `Write` trait object. + fn save_device_state(writer: &mut dyn Write) -> Result<()> { +- for (device_id, entry) in MIGRATION_MANAGER.entry.read().unwrap().iter() { +- match entry { +- MigrationEntry::Safe(i) => i.pre_save(*device_id, writer)?, +- MigrationEntry::Mutex(i) => i.lock().unwrap().pre_save(*device_id, writer)?, +- _ => {} ++ let entry = MIGRATION_MANAGER.entry.read().unwrap(); ++ for item in entry.iter() { ++ for (id, entry) in item.iter() { ++ match entry { ++ MigrationEntry::Safe(i) => i.pre_save(id, writer)?, ++ MigrationEntry::Mutex(i) => i.lock().unwrap().pre_save(id, writer)?, ++ _ => {} ++ } + } + } + +@@ -275,10 +284,19 @@ impl MigrationManager { + } + } + +- match device_entry.get(&instance_id.object_id).unwrap() { +- MigrationEntry::Safe(i) => i.pre_load(&state_data, None)?, +- MigrationEntry::Mutex(i) => i.lock().unwrap().pre_load_mut(&state_data, None)?, +- _ => {} ++ for item in device_entry.iter() { ++ for (key, state) in item { ++ if id_remap(key) == instance_id.object_id { ++ info!("Load VM state: key {}", key); ++ match state { ++ MigrationEntry::Safe(i) => i.pre_load(&state_data, None)?, ++ MigrationEntry::Mutex(i) => { ++ i.lock().unwrap().pre_load_mut(&state_data, None)? ++ } ++ _ => {} ++ } ++ } ++ } + } + } + +@@ -288,9 +306,12 @@ impl MigrationManager { + /// Resume recovered device. + /// This function will be called after restore device state. + fn resume() -> Result<()> { +- for (_, entry) in MIGRATION_MANAGER.entry.read().unwrap().iter() { +- if let MigrationEntry::Mutex(i) = entry { +- i.lock().unwrap().resume()? ++ let entry = MIGRATION_MANAGER.entry.read().unwrap(); ++ for item in entry.iter() { ++ for (_, state) in item { ++ if let MigrationEntry::Mutex(i) = state { ++ i.lock().unwrap().resume()? ++ } + } + } + Ok(()) +diff --git a/migration_derive/src/struct_parser.rs b/migration_derive/src/struct_parser.rs +index bc7d9d2..4e66d7e 100644 +--- a/migration_derive/src/struct_parser.rs ++++ b/migration_derive/src/struct_parser.rs +@@ -26,10 +26,19 @@ pub fn parse_struct( + + let fields = parse_fields(&input.fields, ident); + ++ use std::collections::hash_map::DefaultHasher; ++ use std::hash::{Hash, Hasher}; ++ ++ let id_remap = |s: &str| -> u64 { ++ let mut hash = DefaultHasher::new(); ++ s.hash(&mut hash); ++ hash.finish() ++ }; ++ let alias = id_remap(&name); + quote! { + #struct_ident { + name: #name.to_string(), +- alias: MigrationManager::desc_db_len(), ++ alias: #alias, + size: std::mem::size_of::<#ident>() as u32, + current_version: #current_version, + compat_version: #compat_version, +diff --git a/pci/src/msix.rs b/pci/src/msix.rs +index 83b3d05..71d172e 100644 +--- a/pci/src/msix.rs ++++ b/pci/src/msix.rs +@@ -405,6 +405,7 @@ pub fn init_msix( + vector_nr: u32, + config: &mut PciConfig, + dev_id: Arc, ++ id: &str, + ) -> Result<()> { + if vector_nr > MSIX_TABLE_SIZE_MAX as u32 + 1 { + bail!("Too many msix vectors."); +@@ -439,7 +440,7 @@ pub fn init_msix( + config.msix = Some(msix.clone()); + + #[cfg(not(test))] +- MigrationManager::register_device_instance_mutex(MsixState::descriptor(), msix); ++ MigrationManager::register_device_instance_mutex_with_id(MsixState::descriptor(), msix, id); + + Ok(()) + } +@@ -469,11 +470,12 @@ mod tests { + 0, + MSIX_TABLE_SIZE_MAX as u32 + 2, + &mut pci_config, +- Arc::new(AtomicU16::new(0)) ++ Arc::new(AtomicU16::new(0)), ++ "msix" + ) + .is_err()); + +- init_msix(1, 2, &mut pci_config, Arc::new(AtomicU16::new(0))).unwrap(); ++ init_msix(1, 2, &mut pci_config, Arc::new(AtomicU16::new(0)), "msix").unwrap(); + let msix_cap_start = 64_u8; + assert_eq!(pci_config.last_cap_end, 64 + MSIX_CAP_SIZE as u16); + // Capabilities pointer +@@ -538,7 +540,7 @@ mod tests { + #[test] + fn test_write_config() { + let mut pci_config = PciConfig::new(PCI_CONFIG_SPACE_SIZE, 2); +- init_msix(0, 2, &mut pci_config, Arc::new(AtomicU16::new(0))).unwrap(); ++ init_msix(0, 2, &mut pci_config, Arc::new(AtomicU16::new(0)), "msix").unwrap(); + let msix = pci_config.msix.as_ref().unwrap(); + let mut locked_msix = msix.lock().unwrap(); + locked_msix.enabled = false; +diff --git a/pci/src/root_port.rs b/pci/src/root_port.rs +index b00c14a..286e92d 100644 +--- a/pci/src/root_port.rs ++++ b/pci/src/root_port.rs +@@ -289,7 +289,7 @@ impl PciDevOps for RootPort { + .add_pcie_cap(self.devfn, self.port_num, PcieDevType::RootPort as u8)?; + + self.dev_id.store(self.devfn as u16, Ordering::SeqCst); +- init_msix(0, 1, &mut self.config, self.dev_id.clone())?; ++ init_msix(0, 1, &mut self.config, self.dev_id.clone(), &self.name)?; + + let parent_bus = self.parent_bus.upgrade().unwrap(); + let mut locked_parent_bus = parent_bus.lock().unwrap(); +@@ -303,6 +303,7 @@ impl PciDevOps for RootPort { + .add_subregion(self.sec_bus.lock().unwrap().mem_region.clone(), 0) + .chain_err(|| "Failed to register subregion in memory space.")?; + ++ let name = self.name.clone(); + let root_port = Arc::new(Mutex::new(self)); + #[allow(unused_mut)] + let mut locked_root_port = root_port.lock().unwrap(); +@@ -327,7 +328,11 @@ impl PciDevOps for RootPort { + } + // Need to drop locked_root_port in order to register root_port instance. + drop(locked_root_port); +- MigrationManager::register_device_instance_mutex(RootPortState::descriptor(), root_port); ++ MigrationManager::register_device_instance_mutex_with_id( ++ RootPortState::descriptor(), ++ root_port, ++ &name, ++ ); + + Ok(()) + } +diff --git a/virtio/src/block.rs b/virtio/src/block.rs +index e0ced06..a2e35e8 100644 +--- a/virtio/src/block.rs ++++ b/virtio/src/block.rs +@@ -967,6 +967,10 @@ impl VirtioDevice for Block { + } + + fn unrealize(&mut self) -> Result<()> { ++ MigrationManager::unregister_device_instance_mutex_by_id( ++ BlockState::descriptor(), ++ &self.blk_cfg.id, ++ ); + Ok(()) + } + +diff --git a/virtio/src/net.rs b/virtio/src/net.rs +index bbb1cc7..096121b 100644 +--- a/virtio/src/net.rs ++++ b/virtio/src/net.rs +@@ -620,6 +620,10 @@ impl VirtioDevice for Net { + } + + fn unrealize(&mut self) -> Result<()> { ++ MigrationManager::unregister_device_instance_mutex_by_id( ++ VirtioNetState::descriptor(), ++ &self.net_cfg.id, ++ ); + Ok(()) + } + +diff --git a/virtio/src/virtio_pci.rs b/virtio/src/virtio_pci.rs +index bf1f58a..9eae777 100644 +--- a/virtio/src/virtio_pci.rs ++++ b/virtio/src/virtio_pci.rs +@@ -24,7 +24,7 @@ use pci::config::{ + VENDOR_ID, + }; + use pci::errors::{ErrorKind, Result as PciResult, ResultExt}; +-use pci::msix::update_dev_id; ++use pci::msix::{update_dev_id, MsixState}; + use pci::{ + config::PciConfig, init_msix, init_multifunction, le_write_u16, ranges_overlap, PciBus, + PciDevOps, +@@ -939,6 +939,7 @@ impl PciDevOps for VirtioPciDevice { + nvectors as u32, + &mut self.config, + self.dev_id.clone(), ++ &self.name, + )?; + + self.assign_interrupt_cb(); +@@ -964,6 +965,7 @@ impl PciDevOps for VirtioPciDevice { + .realize() + .chain_err(|| "Failed to realize virtio device")?; + ++ let name = self.name.clone(); + let devfn = self.devfn; + let dev = Arc::new(Mutex::new(self)); + let pci_bus = dev.lock().unwrap().parent_bus.upgrade().unwrap(); +@@ -978,7 +980,11 @@ impl PciDevOps for VirtioPciDevice { + pci_device.unwrap().lock().unwrap().name() + ); + } +- MigrationManager::register_device_instance_mutex(VirtioPciState::descriptor(), dev); ++ MigrationManager::register_device_instance_mutex_with_id( ++ VirtioPciState::descriptor(), ++ dev, ++ &name, ++ ); + + Ok(()) + } +@@ -992,6 +998,15 @@ impl PciDevOps for VirtioPciDevice { + + let bus = self.parent_bus.upgrade().unwrap(); + self.config.unregister_bars(&bus)?; ++ ++ MigrationManager::unregister_device_instance_mutex_by_id( ++ MsixState::descriptor(), ++ &self.name, ++ ); ++ MigrationManager::unregister_device_instance_mutex_by_id( ++ VirtioPciState::descriptor(), ++ &self.name, ++ ); + Ok(()) + } + +@@ -1491,6 +1506,7 @@ mod tests { + virtio_pci.device.lock().unwrap().queue_num() as u32 + 1, + &mut virtio_pci.config, + virtio_pci.dev_id.clone(), ++ &virtio_pci.name, + ) + .unwrap(); + // Prepare valid queue config +-- +2.25.1 + diff --git a/0009-tests-fix-the-test_standvm_quickstart.patch b/0009-tests-fix-the-test_standvm_quickstart.patch new file mode 100644 index 0000000..176a71f --- /dev/null +++ b/0009-tests-fix-the-test_standvm_quickstart.patch @@ -0,0 +1,38 @@ +From e72471b34c285b6eb70860be23a87dfaea928280 Mon Sep 17 00:00:00 2001 +From: zhouli57 +Date: Thu, 6 Jan 2022 15:27:59 +0800 +Subject: [PATCH 04/10] tests: fix the test_standvm_quickstart + +Currently, device IDs are used to distinguish devices in snapshot. +Therefore, ensure that IDs before and after shapshot are consistent. + +Signed-off-by: zhouli57 +--- + tests/hydropper/virt/standvm.py | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/tests/hydropper/virt/standvm.py b/tests/hydropper/virt/standvm.py +index 5208ddd..ae6cc43 100644 +--- a/tests/hydropper/virt/standvm.py ++++ b/tests/hydropper/virt/standvm.py +@@ -205,13 +205,13 @@ class StandVM(BaseVM): + # only one device is supported(pcie root port) + if self.pcie_root_port_remain <= 0: + raise PcierootportError +- _temp_device_args = "virtio-net-pci,netdev=%s,id=%s,bus=pcie.%s,addr=0x0"\ +- % (tapname, tapname, self.pcie_root_port_remain + 4) ++ _temp_device_args = "virtio-net-pci,netdev=%s,id=net-%s,bus=pcie.%s,addr=0x0"\ ++ % (tapname, i, self.pcie_root_port_remain + 4) + self.pcie_root_port_remain -= 1 + self.pcie_root_port["net"] = False + else: +- _temp_device_args = "virtio-net-pci,netdev=%s,id=%s,bus=pcie.0,addr=%s.0x0"\ +- % (tapname, tapname, hex(i)) ++ _temp_device_args = "virtio-net-pci,netdev=%s,id=net-%s,bus=pcie.0,addr=%s.0x0"\ ++ % (tapname, i, hex(i)) + if self.multifunction["net"]: + _temp_device_args += ",multifunction=on" + if self.net_iothread: +-- +2.25.1 + diff --git a/0010-root_port-correct-the-log-of-printing-device-info-du.patch b/0010-root_port-correct-the-log-of-printing-device-info-du.patch new file mode 100644 index 0000000..f7afb62 --- /dev/null +++ b/0010-root_port-correct-the-log-of-printing-device-info-du.patch @@ -0,0 +1,34 @@ +From 415d62b9115181bcfc16a23b3b02542ee9753334 Mon Sep 17 00:00:00 2001 +From: zhouli57 +Date: Mon, 21 Feb 2022 16:38:18 +0800 +Subject: [PATCH 05/10] root_port: correct the log of printing device info + during hot unplug + +Signed-off-by: zhouli57 +--- + pci/src/root_port.rs | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/pci/src/root_port.rs b/pci/src/root_port.rs +index 286e92d..3dc97dd 100644 +--- a/pci/src/root_port.rs ++++ b/pci/src/root_port.rs +@@ -168,6 +168,7 @@ impl RootPort { + error!("{}", e.display_chain()); + error!("Failed to unrealize device {}.", locked_dev.name()); + } ++ info!("Device {} unplug from {}", locked_dev.name(), self.name); + + // Send QMP event for successful hot unplugging. + if QmpChannel::is_connected() { +@@ -237,7 +238,6 @@ impl RootPort { + && (old_ctl & PCI_EXP_SLTCTL_PCC != PCI_EXP_SLTCTL_PCC + || old_ctl & PCI_EXP_SLTCTL_PWR_IND_OFF != PCI_EXP_SLTCTL_PWR_IND_OFF) + { +- info!("Device {} unplug", self.name()); + self.remove_devices(); + + if let Err(e) = self.update_register_status() { +-- +2.25.1 + diff --git a/0011-tests-add-stand-kata-testcases.patch b/0011-tests-add-stand-kata-testcases.patch new file mode 100644 index 0000000..b730e99 --- /dev/null +++ b/0011-tests-add-stand-kata-testcases.patch @@ -0,0 +1,387 @@ +From b714b4272f8c84060a08f4966b87247e054680c6 Mon Sep 17 00:00:00 2001 +From: Zhu Huankai +Date: Tue, 18 Jan 2022 20:55:42 +0800 +Subject: [PATCH 06/10] tests:add stand kata testcases + +Add new testcode to test kata container of standvm and move +some functions of vfio to utils_coommon. + +Add some new testcases for standvm of isula: +1.test start kata container with initrd. +2.test start kata container with rootfs. +3.test kata container create template and start from template. +4.test start kata container in sandbox +5.test start kata container with vfio net device +6.test start kata container with vfrtio fs + +Signed-off-by: Zhu Huankai +--- + .../standvm/functional/test_standvm_isula.py | 228 ++++++++++++++++++ + .../standvm/functional/test_standvm_vfio.py | 34 +-- + tests/hydropper/utils/utils_common.py | 31 ++- + 3 files changed, 265 insertions(+), 28 deletions(-) + create mode 100644 tests/hydropper/testcases/standvm/functional/test_standvm_isula.py + +diff --git a/tests/hydropper/testcases/standvm/functional/test_standvm_isula.py b/tests/hydropper/testcases/standvm/functional/test_standvm_isula.py +new file mode 100644 +index 0000000..5e01685 +--- /dev/null ++++ b/tests/hydropper/testcases/standvm/functional/test_standvm_isula.py +@@ -0,0 +1,228 @@ ++# Copyright (c) 2021 Huawei Technologies Co.,Ltd. All rights reserved. ++# ++# StratoVirt is licensed under Mulan PSL v2. ++# You can use this software according to the terms and conditions of the Mulan ++# PSL v2. ++# You may obtain a copy of Mulan PSL v2 at: ++# http:#license.coscl.org.cn/MulanPSL2 ++# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY ++# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ++# NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. ++# See the Mulan PSL v2 for more details. ++"""Test standvm isula""" ++ ++import os ++import logging ++import subprocess ++import pytest ++import utils.utils_common as utils ++from utils.utils_logging import TestLog ++ ++LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s" ++logging.basicConfig(filename="/var/log/pytest.log", level=logging.DEBUG, format=LOG_FORMAT) ++LOG = TestLog.get_global_log() ++SHELL_TIMEOUT = 10 ++ ++def test_standvm_isula_initrd(container): ++ """ ++ Test run isula with initrd: ++ ++ 1) run isula with initrd ++ 2) execute shell command in isula ++ """ ++ LOG.info("----------test_standvm_isula_initrd----------") ++ kata_container = container ++ container_id = None ++ try: ++ kata_container.replace_configuration(cig_name='configuration-initrd-stand.toml') ++ container_id = kata_container.run_isula(options="-tid", ++ runtime="io.containerd.kata.v2", ++ image="busybox:latest", ++ name="initrd1-hydropper-stand") ++ LOG.info("initrd-stand container id:%s", container_id) ++ ++ session = kata_container.create_isula_shellsession("initrd1-hydropper-stand") ++ status, _ = session.cmd_status_output("ls", timeout=SHELL_TIMEOUT) ++ assert status == 0 ++ ++ session.close() ++ kata_container.stop_isula("initrd1-hydropper-stand") ++ finally: ++ kata_container.remove_isula_force("initrd1-hydropper-stand") ++ ++def test_standvm_isula_rootfs(container): ++ """ ++ Test run isula with rootfs: ++ ++ 1) run isula with rootfs ++ 2) execute shell command in isula ++ """ ++ LOG.info("----------test_standvm_isula_rootfs----------") ++ kata_container = container ++ container_id = None ++ try: ++ kata_container.replace_configuration(cig_name='configuration-rootfs-stand.toml') ++ container_id = kata_container.run_isula(options="-tid", ++ runtime="io.containerd.kata.v2", ++ image="busybox:latest", ++ name="rootfs1-hydropper-stand") ++ LOG.info("rootfs-stand container id:%s", container_id) ++ ++ session = kata_container.create_isula_shellsession("rootfs1-hydropper-stand") ++ status, _ = session.cmd_status_output("ls", timeout=SHELL_TIMEOUT) ++ assert status == 0 ++ ++ session.close() ++ kata_container.stop_isula("rootfs1-hydropper-stand") ++ finally: ++ kata_container.remove_isula_force("rootfs1-hydropper-stand") ++ ++def test_standvm_isula_template(container): ++ """ ++ Test run isula with template: ++ ++ 1) run template isula and create a template auto matically ++ 2) assert template has been created. ++ 3) run a new isula container from template ++ """ ++ LOG.info("----------test_standvm_isula_template----------") ++ kata_container = container ++ container_id1 = container_id2 = None ++ if os.path.exists("/run/vc/vm/template/"): ++ subprocess.run("rm -rf /run/vc/vm/template/", shell=True, check=True) ++ try: ++ kata_container.replace_configuration(cig_name='configuration-template-stand.toml') ++ container_id1 = kata_container.run_isula(options="-tid", ++ runtime="io.containerd.kata.v2", ++ image="busybox:latest", ++ name="template1-hydropper-stand") ++ LOG.info("template container id:%s", container_id1) ++ session = kata_container.create_isula_shellsession("template1-hydropper-stand") ++ status, _ = session.cmd_status_output("ls", timeout=SHELL_TIMEOUT) ++ assert status == 0 ++ session.close() ++ ++ assert os.path.exists("/run/vc/vm/template/") ++ ++ container_id2 = kata_container.run_isula(options="-tid", ++ runtime="io.containerd.kata.v2", ++ image="busybox:latest", ++ name="template2-hydropper-stand") ++ LOG.info("run container from template, id:%s", container_id2) ++ session = kata_container.create_isula_shellsession("template2-hydropper-stand") ++ status, _ = session.cmd_status_output("ls", timeout=SHELL_TIMEOUT) ++ assert status == 0 ++ session.close() ++ ++ kata_container.stop_isula("template1-hydropper-stand") ++ kata_container.stop_isula("template2-hydropper-stand") ++ finally: ++ kata_container.remove_isula_force("template1-hydropper-stand") ++ kata_container.remove_isula_force("template2-hydropper-stand") ++ if os.path.exists("/run/vc/vm/template/"): ++ subprocess.run("rm -rf /run/vc/vm/template/", shell=True, check=True) ++ ++def test_standvm_isula_sandbox(container): ++ """ ++ Test run isula with sandbox: ++ ++ 1) run podsandbox container firstly. ++ 2) run a new container in podsanbox. ++ """ ++ LOG.info("----------test_standvm_isula_sandbox----------") ++ kata_container = container ++ container_id = podsandbox_id = None ++ try: ++ kata_container.replace_configuration(cig_name='configuration-initrd-stand.toml') ++ podsandbox_id = kata_container.run_isula(options="-tid", ++ runtime="io.containerd.kata.v2", ++ image="busybox:latest", ++ name="sandbox1-hydropper-stand", ++ annotation="io.kubernetes.docker.type=podsandbox") ++ LOG.info("podsandbox container id:%s", podsandbox_id) ++ ++ podsandbox_id = podsandbox_id.strip('\n') ++ container_id = kata_container.run_isula(options="-tid", ++ runtime="io.containerd.kata.v2", ++ image="busybox:latest", ++ name="sandbox2-hydropper-stand", ++ annotation=["io.kubernetes.docker.type=container", ++ ("io.kubernetes.sandbox.id=%s" % podsandbox_id)]) ++ LOG.info("container id:%s", container_id) ++ session = kata_container.create_isula_shellsession("sandbox2-hydropper-stand") ++ status, _ = session.cmd_status_output("ls", timeout=SHELL_TIMEOUT) ++ assert status == 0 ++ session.close() ++ ++ kata_container.stop_isula("sandbox2-hydropper-stand") ++ kata_container.stop_isula("sandbox1-hydropper-stand") ++ finally: ++ kata_container.remove_isula_force("sandbox2-hydropper-stand") ++ kata_container.remove_isula_force("sandbox1-hydropper-stand") ++ ++@pytest.mark.skip ++@pytest.mark.parametrize("net_type, bdf, pf_name", ++ [('1822', '0000:03:00.0', 'enp3s0')]) ++def test_standvm_isula_vfionet(container, net_type, bdf, pf_name): ++ """ ++ Test run isula with vfio net device: ++ """ ++ LOG.info("----------test_standvm_isula_vfionet----------") ++ kata_container = container ++ container_id = None ++ vf_bdf = bdf.split('.')[0] + '.1' ++ try: ++ kata_container.replace_configuration(cig_name='configuration-initrd-stand.toml') ++ utils.config_host_vfio(net_type=net_type, number='2', bdf=bdf) ++ utils.check_vf(pf_name=pf_name) ++ subprocess.run("modprobe vfio-pci", shell=True, check=True) ++ utils.rebind_vfio_pci(bdf=vf_bdf) ++ iommu_group = utils.get_iommu_group(vf_bdf) ++ container_id = kata_container.run_isula(options="-tid", ++ runtime="io.containerd.kata.v2", ++ device="/dev/vfio/%s" % iommu_group, ++ net="none", ++ image="busybox:latest", ++ name="vfionet1-hydropper-stand") ++ LOG.info("vfio net container id:%s", container_id) ++ ++ session = kata_container.create_isula_shellsession("vfionet1-hydropper-stand") ++ status, _ = session.cmd_status_output("ip a", timeout=SHELL_TIMEOUT) ++ assert status == 0 ++ ++ session.close() ++ kata_container.stop_isula("vfionet1-hydropper-stand") ++ finally: ++ utils.clean_vf(bdf=bdf) ++ kata_container.remove_isula_force("vfionet1-hydropper-stand") ++ ++@pytest.mark.skip ++def test_standvm_isula_virtiofs(container): ++ """ ++ Test run isula with virtio fs: ++ """ ++ LOG.info("----------test_standvm_isula_virtiofs----------") ++ kata_container = container ++ container_id = None ++ test_dir = "/tmp/hydropper_virtio_fs" ++ if not os.path.exists(test_dir): ++ subprocess.run("mkdir %s" % test_dir, shell=True, check=True) ++ subprocess.run("touch %s/hydropper1.log" % test_dir, shell=True, check=True) ++ try: ++ kata_container.replace_configuration(cig_name='configuration-virtiofs-stand.toml') ++ container_id = kata_container.run_isula(options="-tid", ++ runtime="io.containerd.kata.v2", ++ net="none -v %s:/tmp/" % test_dir, ++ image="busybox:latest", ++ name="virtiofs1-hydropper-stand") ++ LOG.info("virtio fs container id:%s", container_id) ++ ++ session = kata_container.create_isula_shellsession("virtiofs1-hydropper-stand") ++ status, _ = session.cmd_status_output("ls /tmp/hydropper1.log", timeout=SHELL_TIMEOUT) ++ assert status == 0 ++ ++ session.close() ++ kata_container.stop_isula("virtiofs1-hydropper-stand") ++ finally: ++ kata_container.remove_isula_force("virtiofs1-hydropper-stand") ++ subprocess.run("rm -rf /tmp/hydropper_virtio_fs", shell=True, check=True) +diff --git a/tests/hydropper/testcases/standvm/functional/test_standvm_vfio.py b/tests/hydropper/testcases/standvm/functional/test_standvm_vfio.py +index e6ca2b3..dc399a5 100644 +--- a/tests/hydropper/testcases/standvm/functional/test_standvm_vfio.py ++++ b/tests/hydropper/testcases/standvm/functional/test_standvm_vfio.py +@@ -15,33 +15,13 @@ import logging + import pytest + import platform + from subprocess import run +- ++import utils.utils_common as utils + from utils.utils_logging import TestLog + + LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s" + logging.basicConfig(filename="/var/log/pytest.log", level=logging.DEBUG, format=LOG_FORMAT) + LOG = TestLog.get_global_log() + +-def config_host_vfio(net_type, number, bdf): +- """configure vf in host""" +- ret = run("lspci -v | grep 'Eth' | grep %s" % net_type, shell=True, check=True).stdout +- LOG.debug(ret) +- ret = run("echo %s > /sys/bus/pci/devices/%s/sriov_numvfs" % (number, bdf), shell=True, check=True) +- +-def rebind_vfio_pci(bdf): +- """unbind old driver and bind a new one""" +- run("echo %s > /sys/bus/pci/devices/%s/driver/unbind" % (bdf, bdf), shell=True, check=True) +- run("echo `lspci -ns %s | awk -F':| ' '{print $5\" \"$6}'` > /sys/bus/pci/drivers/vfio-pci/new_id"\ +- %bdf, shell=True, check=True) +- +-def check_vf(pf_name): +- """check whether vf is enabled""" +- run("ip link show %s | grep vf" % pf_name, shell=True, check=True) +- +-def clean_vf(bdf): +- """clean host vf""" +- ret = run("echo 0 > /sys/bus/pci/devices/%s/sriov_numvfs" % bdf, shell=True, check=True) +- + @pytest.mark.standvm_accept + @pytest.mark.parametrize("host_ip, net_type, bdf, pf_name", + [('9.13.7.139', '1822', '0000:03:00.0', 'enp3s0')]) +@@ -57,11 +37,11 @@ def test_standvm_vfio_net(standvm, host_ip, net_type, bdf, pf_name): + flag = False + + testvm = standvm +- config_host_vfio(net_type=net_type, number='2', bdf=bdf) ++ utils.config_host_vfio(net_type=net_type, number='2', bdf=bdf) + try: +- check_vf(pf_name=pf_name) ++ utils.check_vf(pf_name=pf_name) + run("modprobe vfio-pci", shell=True, check=True) +- rebind_vfio_pci(bdf=vf_bdf) ++ utils.rebind_vfio_pci(bdf=vf_bdf) + testvm.basic_config(vfio=True, bdf=vf_bdf) + testvm.launch() + _cmd = "ip a | awk '{ print $2 }' | cut -d ':' -f 1" +@@ -80,7 +60,7 @@ def test_standvm_vfio_net(standvm, host_ip, net_type, bdf, pf_name): + assert flag == True + finally: + testvm.shutdown() +- clean_vf(bdf=bdf) ++ utils.clean_vf(bdf=bdf) + + @pytest.mark.standvm_accept + @pytest.mark.parametrize("bdf",[('0000:08:00.0')]) +@@ -95,7 +75,7 @@ def test_standvm_vfio_ssd(standvm, bdf): + testvm = standvm + run("lspci | grep 'Non-Volatile memory'", shell=True, check=True) + run("modprobe vfio-pci", shell=True, check=True) +- rebind_vfio_pci(bdf=bdf) ++ utils.rebind_vfio_pci(bdf=bdf) + testvm.basic_config(vfio=True, bdf=bdf) + testvm.launch() + session = testvm.create_ssh_session() +@@ -111,4 +91,4 @@ def test_standvm_vfio_ssd(standvm, bdf): + assert ret == 0 + + session.close() +- testvm.shutdown() +\ No newline at end of file ++ testvm.shutdown() +diff --git a/tests/hydropper/utils/utils_common.py b/tests/hydropper/utils/utils_common.py +index 949cb5c..7713bef 100644 +--- a/tests/hydropper/utils/utils_common.py ++++ b/tests/hydropper/utils/utils_common.py +@@ -14,6 +14,8 @@ import os + import errno + import ctypes + import shutil ++from subprocess import run ++from subprocess import PIPE + from utils.utils_logging import TestLog + + LOG = TestLog.get_global_log() +@@ -57,4 +59,31 @@ def get_timestamp(timestamp): + minute = int(datetime.split(':')[1]) + second = int(datetime.split(':')[2]) + +- return float(str(second + minute * 60 + hour * 60 * 24) + '.' + mill) +\ No newline at end of file ++ return float(str(second + minute * 60 + hour * 60 * 24) + '.' + mill) ++ ++ ++def config_host_vfio(net_type, number, bdf): ++ """configure vf in host""" ++ ret = run("lspci -v | grep 'Eth' | grep %s" % net_type, shell=True, check=True).stdout ++ LOG.debug(ret) ++ ret = run("echo %s > /sys/bus/pci/devices/%s/sriov_numvfs" % (number, bdf), shell=True, check=True) ++ ++def rebind_vfio_pci(bdf): ++ """unbind old driver and bind a new one""" ++ run("echo %s > /sys/bus/pci/devices/%s/driver/unbind" % (bdf, bdf), shell=True, check=True) ++ run("echo `lspci -ns %s | awk -F':| ' '{print $5\" \"$6}'` > /sys/bus/pci/drivers/vfio-pci/new_id"\ ++ %bdf, shell=True, check=True) ++ ++def check_vf(pf_name): ++ """check whether vf is enabled""" ++ run("ip link show %s | grep vf" % pf_name, shell=True, check=True) ++ ++def clean_vf(bdf): ++ """clean host vf""" ++ ret = run("echo 0 > /sys/bus/pci/devices/%s/sriov_numvfs" % bdf, shell=True, check=True) ++ ++def get_iommu_group(bdf): ++ """get iommu group id""" ++ read_cmd = "readlink /sys/bus/pci/devices/%s/iommu_group" % bdf ++ return run(read_cmd, shell=True, check=True, stdout=PIPE) \ ++ .stdout.decode('utf-8').splitlines()[0].split('/')[-1] +-- +2.25.1 + diff --git a/0012-net-fix-the-bug-when-tap-is-abnormally-removed.patch b/0012-net-fix-the-bug-when-tap-is-abnormally-removed.patch new file mode 100644 index 0000000..7230a52 --- /dev/null +++ b/0012-net-fix-the-bug-when-tap-is-abnormally-removed.patch @@ -0,0 +1,50 @@ +From ea2c4cb7831aaa7a98f7d3d7379f6262bbbf2153 Mon Sep 17 00:00:00 2001 +From: zhouli57 +Date: Tue, 22 Feb 2022 10:51:40 +0800 +Subject: [PATCH 07/10] net: fix the bug when tap is abnormally removed + +If the backend tap device is removed, readv returns less than 0. +At this time, the content in the tap needs to be cleaned up. +Here, read is called to process, otherwise handle_rx may be triggered +all the time, resulting in an infinite loop. + +Signed-off-by: zhouli57 +--- + virtio/src/net.rs | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/virtio/src/net.rs b/virtio/src/net.rs +index bbb1cc7..69ecc36 100644 +--- a/virtio/src/net.rs ++++ b/virtio/src/net.rs +@@ -149,6 +149,18 @@ impl NetIoHandler { + if e.kind() == std::io::ErrorKind::WouldBlock { + break; + } ++ ++ // If the backend tap device is removed, readv returns less than 0. ++ // At this time, the content in the tap needs to be cleaned up. ++ // Here, read is called to process, otherwise handle_rx may be triggered all the time. ++ let mut buf = [0; 1024]; ++ match tap.read(&mut buf) { ++ Ok(cnt) => error!("Failed to call readv but tap read is ok: cnt {}", cnt), ++ Err(e) => { ++ // When the backend tap device is abnormally removed, read return EBADFD. ++ error!("Failed to read tap: {}", e); ++ } ++ } + bail!("Failed to call readv for net handle_rx: {}", e); + } + +@@ -446,7 +458,7 @@ impl EventNotifierHelper for NetIoHandler { + tap_fd, + Some(handler), + NotifierOperation::AddShared, +- EventSet::IN, ++ EventSet::IN | EventSet::EDGE_TRIGGERED, + )); + } + +-- +2.25.1 + diff --git a/0013-docs-boot-update-detailed-usage-for-standard-boot.patch b/0013-docs-boot-update-detailed-usage-for-standard-boot.patch new file mode 100644 index 0000000..f8b0f85 --- /dev/null +++ b/0013-docs-boot-update-detailed-usage-for-standard-boot.patch @@ -0,0 +1,376 @@ +From 08eae86c65a91827a0f0c8de3849b6f46f097fba Mon Sep 17 00:00:00 2001 +From: Jiajie Li +Date: Mon, 21 Feb 2022 18:10:01 +0800 +Subject: [PATCH 08/10] docs/boot: update detailed usage for standard boot + +1. Fix some clerical error in boot cmdline. +2. Add download link for kernel image. +3. Differentiate the two modes supported by standard boot to + make it easier to understand. + +Signed-off-by: Jiajie Li +--- + docs/boot.ch.md | 94 +++++++++++++++++++++++++++---------------------- + docs/boot.md | 89 +++++++++++++++++++++++++--------------------- + 2 files changed, 99 insertions(+), 84 deletions(-) + +diff --git a/docs/boot.ch.md b/docs/boot.ch.md +index b8eec27..31153a9 100644 +--- a/docs/boot.ch.md ++++ b/docs/boot.ch.md +@@ -1,13 +1,13 @@ + # StratoVirt 启动准备 + +-StratoVirt提供了微虚拟机和标准虚拟机两种机型。两种机型的启动过程如下。 ++StratoVirt提供了轻量虚拟机和标准虚拟机两种机型。两种机型的启动过程如下。 + +-## 微虚拟机启动过程 ++## 轻量虚拟机启动过程 + + ### 1. 构建内核镜像 + +-StratoVirt的微虚拟机机型在x86_64和aarch64平台都支持PE格式或是bzImage格式 +-(仅x86_64平台支持)的内核镜像。通过以下步骤来构建内核镜像: ++StratoVirt的轻量虚拟机机型在x86_64平台上支持PE格式或是bzImage格式的内核镜像,在 ++aarch64平台上支持PE格式的内核镜像。通过以下步骤来构建内核镜像: + + 1. 首先,获取openEuler内核源码: + +@@ -22,7 +22,7 @@ StratoVirt的微虚拟机机型在x86_64和aarch64平台都支持PE格式或是b + $ cd /usr/src/linux-5.10.0-0.0.0.7.oe1.$(uname -m)/ + ``` + +-2. 配置linux内核信息。你可以使用 [我们提供的微虚拟机内核配置文件](./kernel_config/micro_vm) ++2. 配置linux内核信息。你可以使用 [我们提供的轻量虚拟机内核配置文件](./kernel_config/micro_vm) + 并且将配置文件重命名为`.config`拷贝至`kernel`路径下。 当然你也可以通过命令修改内 + 核编译选项: + +@@ -33,12 +33,12 @@ StratoVirt的微虚拟机机型在x86_64和aarch64平台都支持PE格式或是b + 3. 构建并将内核镜像转换为PE格式。 + + ```shell +- $ make -j vmlinux && objcopy -O binary vmlinux vmlinux.bin ++ $ make -j$(nproc) vmlinux && objcopy -O binary vmlinux vmlinux.bin + ``` + + 4. 如果你想要在x86_64平台编译bzImage格式内核镜像。 + ```shell +- $ make -j bzImage ++ $ make -j$(nproc) bzImage + ``` + + ### 2. 构建rootfs镜像 +@@ -66,33 +66,10 @@ Rootfs镜像是一种文件系统镜像。在StratoVirt启动时可以挂载带 + 标准虚拟机有两种启动方式,第一种使用kernel+rootfs;另一种是使用预先安装好guest 操 + 作系统的raw格式镜像。 + +-### 1. 构建内核镜像 +- +-StratoVirt的标准虚拟机机型支持x86_64平台的bzImage格式内核镜像和aarch64平台的PE格 +-式内核镜像。内核镜像构建如下: +- +-1. 首先,获取openEuler内核源码: +- +- ```shell +- $ git clone -b kernel-5.10 --depth=1 https://gitee.com/openeuler/kernel +- $ cd kernel +- ``` +- +-2. 配置linux内核信息。你可以使用 [我们提供的标准虚拟机内核配置文件](./kernel_config/standard_vm) +- 并且将配置文件重命名为`.config`拷贝至`kernel`路径下。 +- +-3. 构建内核镜像 +- +- ```shell +- # 在aarch64平台,将内核镜像转换为PE格式。 +- $ make -j vmlinux && objcopy -O binary vmlinux vmlinux.bin +- +- # 在x86_64平台,将内核镜像转换为bzImage格式. +- $ make -j bzImage +- ``` +- ++接下来讲解如何通过以上所述的两种方式启动标准虚拟机。以上两种启动方式均需使用标准启动 ++固件,为此首先讲解如何获取标准启动固件。 + +-### 2. 获取标准启动固件 ++### 1. 获取标准启动固件 + + 标准启动需要启动固件。Stratovirt仅支持在x86_64和aarch64平台上从UEFI(统一可扩展 + 固件接口)启动。 +@@ -103,7 +80,7 @@ EDK2是一个实现了UEFI规范的开源项目。我们使用EDK2作为固件 + 有两种方法可以获取EDK2二进制文件,通过yum源直接安装或从源代码编译。具体步骤如下。 + 请注意,EDK2二进制文件包含两个文件,一个用于存储可执行代码,另一个用于存储引导数据。 + +-#### 2.1 直接安装EDK2 ++#### 1.1 直接安装EDK2 + + 在x86_64平台, 运行 + +@@ -121,7 +98,7 @@ $ sudo yum install -y edk2-aarch64 + `/usr/share/edk2/ovmf` 目录下。 在aarch64平台, `QEMU_EFI-pflash.raw` 和 + `vars-template-pflash.raw` 文件会存在于`/usr/share/edk2/aarch64` 目录下。 + +-#### 2.2 从源代码编译 ++#### 1.2 从源代码编译 + + ```shell + # 安装必要依赖包用于编译edk2。 +@@ -169,13 +146,44 @@ fi + 目录下。在aarch64平台, `STRATOVIRT_EFI.raw` 和 `STRATOVIRT_VAR.raw` 文件会位于 + `/home` 目录下. + +-### 3. 构建rootfs镜像 ++### 2. 以 kernel + rootfs 方式启动标准虚拟机 ++ ++#### 2.1 构建内核镜像 ++ ++StratoVirt的标准虚拟机机型支持x86_64平台的bzImage格式内核镜像和aarch64平台的PE格 ++式内核镜像。内核镜像构建如下: ++ ++1. 获取openEuler内核源码: ++ ++ ```shell ++ $ git clone -b kernel-5.10 --depth=1 https://gitee.com/openeuler/kernel ++ $ cd kernel ++ ``` ++ ++2. 配置linux内核信息。你可以使用我们提供的标准虚拟机 [内核配置文件](./kernel_config/standard_vm) ++ 并且将配置文件重命名为`.config`拷贝至`kernel`路径下。 ++ ++3. 构建内核镜像 ++ ++ ```shell ++ # 在aarch64平台,将内核镜像转换为PE格式。 ++ $ make -j$(nproc) vmlinux && objcopy -O binary vmlinux vmlinux.bin ++ ++ # 在x86_64平台,将内核镜像转换为bzImage格式. ++ $ make -j$(nproc) bzImage ++ ``` ++ ++除了手动构建内核镜像的方式以外,也可以直接从 openEuler 官网下载对应的 ++[内核镜像](https://repo.openeuler.org/openEuler-21.09/stratovirt_img/x86_64/std-vmlinuxz)。 ++ ++#### 2.2 构建rootfs镜像 + +-为标准虚拟机构建rootfs镜像实际上与微虚拟机相同。你可以通过[附录](#2附录)查看更多 ++为标准虚拟机构建rootfs镜像实际上与轻量虚拟机相同。你可以通过[附录](#2附录)查看更多 + 的详细信息。 + ++### 3. 以 raw 格式镜像启动标准虚拟机 + +-### 4. 获取 raw 格式镜像 ++#### 3.1 获取 raw 格式镜像 + + 你可以从 openEuler 官网下载已经安装好的 [qcow2 镜像](https://repo.openeuler.org/openEuler-21.03/virtual_machine_img/x86_64/openEuler-21.03-x86_64.qcow2.xz)。 + +@@ -189,7 +197,7 @@ $ qemu-img convert -f qcow2 -O raw openEuler-21.03-x86_64.qcow2 openEuler-21.03- + + 至此就获得了可以使用的 raw 格式镜像。 + +-### 5. 启动命令行样例 ++### 4. 启动命令行样例 + + 请注意,标准虚拟机需要两个PFlash设备,它们将使用来自与EDK2二进制的两个固件文件。 + 如果你不需要保持启动信息,单元序列为1的数据存储文件可以被省略。但是单元序号为0的 +@@ -215,11 +223,11 @@ fi + -kernel /path/to/kernel \ + -smp 1 \ + -m 2G \ +- -append "console=${con} reboot=k panic=1 root=/dev/vda" \ ++ -append "console=${con} reboot=k panic=1 root=/dev/vda rw" \ + -drive file=/path/to/rootfs,id=rootfs,readonly=off,direct=off \ + -device virtio-blk-device,drive=rootfs,id=rootfs \ + -drive file=/path/to/OVMF_CODE.fd,if=pflash,unit=0,readonly=true \ +- -drive file=/path/to/OVMF_VARS.fd,if=pfalsh,unit=1 \ ++ -drive file=/path/to/OVMF_VARS.fd,if=pflash,unit=1 \ + -qmp unix:/path/to/socket,server,nowait \ + -serial stdio + ``` +@@ -232,11 +240,11 @@ fi + -kernel /path/to/kernel \ + -smp 1 \ + -m 2G \ +- -append "console=${con} reboot=k panic=1 root=/dev/vda" \ ++ -append "console=${con} reboot=k panic=1 root=/dev/vda rw" \ + -drive file=/path/to/rootfs,id=rootfs,readonly=off,direct=off \ + -device virtio-blk-device,drive=rootfs \ + -drive file=/path/to/OVMF_CODE.fd,if=pflash,unit=0,readonly=true \ +- -drive file=/path/to/OVMF_VARS.fd,if=pfalsh,unit=1 \ ++ -drive file=/path/to/OVMF_VARS.fd,if=pflash,unit=1 \ + -qmp unix:/path/to/socket,server,nowait \ + -serial stdio + ``` +diff --git a/docs/boot.md b/docs/boot.md +index f7227df..64d75b4 100644 +--- a/docs/boot.md ++++ b/docs/boot.md +@@ -7,9 +7,9 @@ boot process of these two machines are as follows. + + ### 1. Build kernel + +-The microvm machine type of StratoVirt supports PE or bzImage (only x86_64) format +-kernel images on both x86_64 and aarch64 platforms. Kernel image can be built with +-following steps: ++The microvm machine type of StratoVirt supports PE or bzImage format kernel images ++on x86_64 platforms, and supports PE format kernel images on aarch64 platforms. ++Kernel image can be built with following steps: + + 1. Firstly, get the openEuler kernel source code with: + +@@ -35,12 +35,12 @@ and copy it to `kernel` path as `.config`. You can also modify config options by + 3. Build and transform kernel image to PE format. + + ```shell +- $ make -j vmlinux && objcopy -O binary vmlinux vmlinux.bin ++ $ make -j$(nproc) vmlinux && objcopy -O binary vmlinux vmlinux.bin + ``` + + 4. If you want to compile bzImage format kernel in x86_64. + ```shell +- $ make -j bzImage ++ $ make -j$(nproc) bzImage + ``` + + ### 2. Build rootfs +@@ -68,34 +68,10 @@ be mounted at boot time in StratoVirt. You can check [Appendix](#2Appendix). + Standard VMs can boot in two modes. The first mode is kernel + rootfs.The other + is to use the raw image that has been preinstalled with the guest OS. + +-### 1. Build kernel +- +-The standard_ machine in StratoVirt supports bzImage format kernel image +-on x86_64 platform; and supports PE format kernel image on aarch64 platform. +-Kernel image can be built with: +- +-1. Firstly, get the openEuler kernel source code with: +- +- ```shell +- $ git clone -b kernel-5.10 --depth=1 https://gitee.com/openeuler/kernel +- $ cd kernel +- ``` +- +-2. Configure your linux kernel. You should use [our recommended standard_vm config] +-(./kernel_config/standard_vm) and copy it to `kernel` path as `.config`. +- +-3. Build kernel image +- +- ```shell +- # on aarch64 platform, transform kernel image to PE format. +- $ make -j vmlinux && objcopy -O binary vmlinux vmlinux.bin +- +- # on x86_64 platform, get bzImage format kernel image. +- $ make -j bzImage +- ``` +- ++The preceding two boot modes both require standard boot firmware. So we first ++describe how to obtain the standard boot firmware. + +-### 2. Get firmware for standard boot ++### 1. Get firmware for standard boot + + Standard boot needs firmware. Stratovirt only supports booting from UEFI (Unified + Extensible Firmware Interface) on x86_64 and aarch64 platform. +@@ -108,7 +84,7 @@ or compiling from source code. The specific steps are as follows. Notes that EDK + binary contains two files, one for executable code storage and the other for boot + data storage. + +-#### 2.1 Directly install EDK II ++#### 1.1 Directly install EDK II + + On x86_64 platform, run + +@@ -126,7 +102,7 @@ After installing edk2, on x86_64 platform, `OVMF_CODE.fd` and `OVMF_VARS.fd` are + located in `/usr/share/edk2/ovmf` directory. On aarch64 platform, `QEMU_EFI-pflash.raw` + and `vars-template-pflash.raw` are located in `/usr/share/edk2/aarch64` directory. + +-#### 2.2 Compile from source code ++#### 1.2 Compile from source code + + ```shell + # Install necessary packages to compile edk2. +@@ -174,13 +150,44 @@ After compiling edk2, on x86_64 platform, `OVMF_CODE.fd` and `OVMF_VARS.fd` loca + underneath `/home` directory. On aarch64 platform, `STRATOVIRT_EFI.raw` and + `STRATOVIRT_VAR.raw` locates underneath `/home` directory. + +-### 3. Build rootfs ++### 2. Boot with kernel and rootfs ++#### 2.1 Build kernel ++ ++The standard_ machine in StratoVirt supports bzImage format kernel image ++on x86_64 platform; and supports PE format kernel image on aarch64 platform. ++Kernel image can be built with: ++ ++1. Firstly, get the openEuler kernel source code with: ++ ++ ```shell ++ $ git clone -b kernel-5.10 --depth=1 https://gitee.com/openeuler/kernel ++ $ cd kernel ++ ``` ++ ++2. Configure your linux kernel. You should use [our recommended standard_vm config] ++(./kernel_config/standard_vm) and copy it to `kernel` path as `.config`. ++ ++3. Build kernel image ++ ++ ```shell ++ # on aarch64 platform, transform kernel image to PE format. ++ $ make -j$(nproc) vmlinux && objcopy -O binary vmlinux vmlinux.bin ++ ++ # on x86_64 platform, get bzImage format kernel image. ++ $ make -j$(nproc) bzImage ++ ``` ++In addition to manually building the kernel image, you can also download the ++[kernel image](https://repo.openeuler.org/openEuler-21.09/stratovirt_img/x86_64/std-vmlinuxz) ++from the openEuler official website. ++ ++#### 2.2 Build rootfs + + The building of rootfs for standard VM is exactly the same with microvm. You can + check [Appendix](#2Appendix) for more detailed information. + + +-### 4. Get raw image ++### 3. Boot with raw image ++#### 3.1 Get raw image + + You can download the installed [qcow2 image](https://repo.openeuler.org/openEuler-21.03/virtual_machine_img/x86_64/openEuler-21.03-x86_64.qcow2.xz) + from the OpenEuler official website. +@@ -195,7 +202,7 @@ $ qemu-img convert -f qcow2 -O raw openEuler-21.03-x86_64.qcow2 openEuler-21.03- + + Now the available raw image is obtained. + +-### 5. Boot command line sample ++### 4. Boot command line sample + + Note that standard need two PFlash devices which will use two firmware files from + EDK II binary. If you don't need to store boot information, data storage file can +@@ -221,11 +228,11 @@ fi + -kernel /path/to/kernel \ + -smp 1 \ + -m 2G \ +- -append "console=${con} reboot=k panic=1 root=/dev/vda" \ ++ -append "console=${con} reboot=k panic=1 root=/dev/vda rw" \ + -drive file=/path/to/rootfs,id=rootfs,readonly=off,direct=off \ + -device virtio-blk-device,drive=rootfs,id=rootfs \ + -drive file=/path/to/OVMF_CODE.fd,if=pflash,unit=0,readonly=true \ +- -drive file=/path/to/OVMF_VARS.fd,if=pfalsh,unit=1 \ ++ -drive file=/path/to/OVMF_VARS.fd,if=pflash,unit=1 \ + -qmp unix:/path/to/socket,server,nowait \ + -serial stdio + ``` +@@ -240,10 +247,10 @@ The command for booting with the raw image is as follows: + -drive file=/path/to/raw_image,id=raw_image,readonly=off,direct=off \ + -device virtio-blk-device,drive=raw_image \ + -drive file=/path/to/OVMF_CODE.fd,if=pflash,unit=0,readonly=true \ +- -drive file=/path/to/OVMF_VARS.fd,if=pfalsh,unit=1 \ ++ -drive file=/path/to/OVMF_VARS.fd,if=pflash,unit=1 \ + -qmp unix:/path/to/socket,server,nowait \ + -serial stdio +-```F ++``` + + ## Appendix + +-- +2.25.1 + diff --git a/0014-virtio-queue-fix-error-access-queue-s-host-virtual-a.patch b/0014-virtio-queue-fix-error-access-queue-s-host-virtual-a.patch new file mode 100644 index 0000000..82bd783 --- /dev/null +++ b/0014-virtio-queue-fix-error-access-queue-s-host-virtual-a.patch @@ -0,0 +1,78 @@ +From 55c4ba8eb8b6a6bf58709ece92f689b3e3556a0d Mon Sep 17 00:00:00 2001 +From: "Xinle.Guo" +Date: Sat, 26 Feb 2022 12:05:10 +0800 +Subject: [PATCH 09/10] virtio/queue: fix error access queue's host virtual + address on x86_64 architecture + +On x86_64 architecture, there is a hole(2G ~ 4G) in the memory +layout. If just convert guest physical address(gpa) directly to +the host virtual address(hva), it may access address out of bounds. +Call `get_host_address()` function to get hva from gpa. + +Signed-off-by: Xinle.Guo +--- + virtio/src/queue.rs | 31 ++++++++++++------------------- + 1 file changed, 12 insertions(+), 19 deletions(-) + +diff --git a/virtio/src/queue.rs b/virtio/src/queue.rs +index bb39723..b2c7132 100644 +--- a/virtio/src/queue.rs ++++ b/virtio/src/queue.rs +@@ -423,8 +423,6 @@ impl SplitVringDesc { + fn get_indirect_desc( + &self, + sys_mem: &Arc, +- desc_table: GuestAddress, +- desc_table_host: u64, + index: u16, + cache: &mut Option, + elem: &mut Element, +@@ -434,13 +432,15 @@ impl SplitVringDesc { + } + + let desc_num = self.get_desc_num(); +- let desc_hva = desc_table_host + self.addr.0 - desc_table.0; +- let desc_table = self.addr; ++ let desc_hva = match sys_mem.get_host_address(self.addr) { ++ Some(addr) => addr, ++ None => bail!("Failed to get descriptor table entry host address"), ++ }; + let desc = Self::next_desc(sys_mem, desc_hva, desc_num, 0, cache)?; + Self::get_element(sys_mem, desc_hva, desc_num, index, desc, cache, elem) + .chain_err(|| +- format!("Failed to get element from indirect descriptor chain {}, table addr: 0x{:X}, size: {}", +- index, desc_table.raw_value(), desc_num) ++ format!("Failed to get element from indirect descriptor chain {}, table entry addr: 0x{:X}, size: {}", ++ index, self.addr.0, desc_num) + ) + } + +@@ -765,19 +765,12 @@ impl SplitVring { + bail!("Unexpected descriptor for writing only for popping avail ring"); + } + +- desc.get_indirect_desc( +- sys_mem, +- self.desc_table, +- self.addr_cache.desc_table_host, +- desc_index, +- &mut self.cache, +- elem, +- ) +- .map(|elem| { +- self.next_avail += Wrapping(1); +- elem +- }) +- .chain_err(|| "Failed to get indirect desc for popping avail ring")? ++ desc.get_indirect_desc(sys_mem, desc_index, &mut self.cache, elem) ++ .map(|elem| { ++ self.next_avail += Wrapping(1); ++ elem ++ }) ++ .chain_err(|| "Failed to get indirect desc for popping avail ring")? + } else { + desc.get_nonindirect_desc( + sys_mem, +-- +2.25.1 + diff --git a/0015-vfio-doc-create-a-new-document-for-using-vfio.patch b/0015-vfio-doc-create-a-new-document-for-using-vfio.patch new file mode 100644 index 0000000..ac9d8c1 --- /dev/null +++ b/0015-vfio-doc-create-a-new-document-for-using-vfio.patch @@ -0,0 +1,146 @@ +From f2d1cd2444616b91be7b99a0fefa9b4d0a5174b8 Mon Sep 17 00:00:00 2001 +From: "Xinle.Guo" +Date: Fri, 18 Feb 2022 16:17:57 +0800 +Subject: [PATCH 10/10] vfio/doc: create a new document for using vfio + +Signed-off-by: Xinle.Guo +--- + docs/config_guidebook.md | 26 +++++-------- + docs/vfio.md | 79 ++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 88 insertions(+), 17 deletions(-) + create mode 100644 docs/vfio.md + +diff --git a/docs/config_guidebook.md b/docs/config_guidebook.md +index e0b8bdc..f541e16 100644 +--- a/docs/config_guidebook.md ++++ b/docs/config_guidebook.md +@@ -447,32 +447,24 @@ Four properties can be set for PFlash device. + ``` + + ### 2.11 VFIO +-The VFIO driver is an IOMMU/device agnostic framework for exposing direct access to userspace, in a secure, IOMMU protected environment. Virtual machine often makes use of direct device access when configured for the highest possible I/O performance. ++The VFIO driver is an IOMMU/device agnostic framework for exposing direct access to userspace, in a secure, ++IOMMU protected environment. Virtual machine often makes use of direct device access when configured for the highest ++possible I/O performance. + +-In order to successfully use VFIO device, it is mandatory that hardware supports virtualization and IOMMU groups. +- +-Assume user wants to access PCI device 0000:1a:00.3. +-The device is attached to PCI bus, therefore user will make use of vfio-pci to manage the group: +-```shell +-# cmdline +-modprobe vfio-pci +-``` +-Binding this device to the vfio-pci driver, it will create the VFIO group character devices for this group. +-```shell +-# cmdline +-echo 0000:1a:00.3 > /sys/bus/pci/devices/0000:1a:00.3/driver/unbind +-echo `lspci -ns 0000:1a:00.3 | awk -F':| ' '{print $5" "$6}'` > /sys/bus/pci/drivers/vfio-pci/new_id +-``` + Four properties are supported for VFIO device +-* host: PCI device info in the system that contains domain, bus number, slot number and function number. ++* host: PCI device info in the system that contains domain, bus number, slot number and function number. + * id: VFIO device name. + * bus: bus number of VFIO device. + * addr: including slot number and function number. ++ + ```shell +-# cmdline + -device vfio-pci,host=0000:1a:00.3,id=net,bus=pcie.0,addr=0x03.0x0[,multifunction=on] + ``` + ++Note: the kernel must contain physical device drivers, otherwise it cannot be loaded normally. ++ ++See [VFIO](./vfio.md) for more details. ++ + ### 2.12 Chardev + The type of chardev backend could be: stdio, pty, socket and file(output only). + +diff --git a/docs/vfio.md b/docs/vfio.md +new file mode 100644 +index 0000000..bd0aeea +--- /dev/null ++++ b/docs/vfio.md +@@ -0,0 +1,79 @@ ++# VFIO User Manual ++ ++## Introduction ++ ++The VFIO driver is an IOMMU/device agnostic framework for exposing direct access to userspace, in a secure, ++IOMMU protected environment. Virtual machine often makes use of direct device access when configured for the highest ++possible I/O performance. ++ ++## Preparation ++ ++In order to successfully use VFIO device, it is mandatory that hardware supports virtualization and IOMMU groups. ++Execute the following command on your host OS to check whether the IOMMU has been turned on. ++```shell ++# dmesg | grep iommu ++``` ++If the IOMMU is turned on, the terminal display as follows: ++```shell ++iommu: Default domain type: Translated ++hibmc-drm 0000:0a:00.0: Adding to iommu group 0 ++ehci-pci 0000:7a:01.0: Adding to iommu group 1 ++ehci-pci 0000:ba:01.0: Adding to iommu group 2 ++ohci-pci 0000:7a:00.0: Adding to iommu group 3 ++ohci-pci 0000:ba:00.0: Adding to iommu group 4 ++xhci_hcd 0000:7a:02.0: Adding to iommu group 5 ++... ++``` ++Assume user wants to access PCI device 0000:1a:00.3. ++The device is attached to PCI bus, therefore user will make use of vfio-pci to manage the group: ++```shell ++# modprobe vfio-pci ++``` ++ ++## Bind VFIO device ++ ++Binding this device to the vfio-pci driver, it will create the VFIO group character devices for this group. ++```shell ++# echo 0000:1a:00.3 > /sys/bus/pci/devices/0000:1a:00.3/driver/unbind ++# echo `lspci -ns 0000:1a:00.3 | awk -F':| ' '{print $5" "$6}'` > /sys/bus/pci/drivers/vfio-pci/new_id ++``` ++ ++## Command line ++ ++Four properties are supported for VFIO device ++* host: PCI device info in the system that contains domain, bus number, slot number and function number. ++* id: VFIO device name. ++* bus: bus number of VFIO device. ++* addr: including slot number and function number. ++```shell ++-device vfio-pci,host=0000:1a:00.3,id=net,bus=pcie.0,addr=0x03.0x0[,multifunction=on] ++``` ++Note: the kernel must contain physical device drivers, otherwise it cannot be loaded normally. ++ ++## Hot plug management ++ ++StratoVirt standard VM supports hot-plug VFIO devices with QMP. ++Refer to qmp.md for specific command line parameters. ++ ++### Example ++ ++hot plug VFIO device: ++```json ++<- {"execute":"device_add", "arguments":{"id":"vfio-0", "driver":"vfio-pci", "bus": "pcie.1", "addr":"0x0", "host": "0000:1a:00.3"}} ++-> {"return": {}} ++``` ++hot unplug VFIO device: ++```json ++<- {"execute": "device_del", "arguments": {"id": "vfio-0"}} ++-> {"event":"DEVICE_DELETED","data":{"device":"vfio-0","path":"vfio-0"},"timestamp":{"seconds":1614310541,"microseconds":554250}} ++-> {"return": {}} ++``` ++ ++## Unbind VFIO device ++ ++If it is necessary to unbind VFIO device directly, you can execute the following command. ++Note: assume uses hinic driver ++```shell ++# echo 0000:03:00.0 > /sys/bus/pci/drivers/vfio-pci/unbind ++# echo 0000:03:00.0 > /sys/bus/pci/drivers/hinic/bind ++``` +-- +2.25.1 + diff --git a/stratovirt.spec b/stratovirt.spec index 8678b70..5f67662 100644 --- a/stratovirt.spec +++ b/stratovirt.spec @@ -6,7 +6,7 @@ Name: stratovirt Version: 2.1.0 -Release: 2 +Release: 3 Summary: StratoVirt is an opensource VMM(Virtual Machine Manager) which aims to perform next generation virtualization. License: Mulan PSL v2 @@ -18,7 +18,16 @@ Patch002: 0002-legacy-fwcfg-fix-bug-of-wrong-size-judgment.patch Patch003: 0003-standard_vm-add-FACS-acpi-table-on-x86-plantform.patch Patch004: 0004-docs-build_guide-Update-suggested-rustc-version.patch Patch005: 0005-standard_vm-syscall-Add-new-seccomp-rules.patch - +Patch006: 0006-Implement-a-safe-offset_of-macro-function.patch +Patch007: 0007-loop_context-fix-the-bug-that-parked-event-not-remov.patch +Patch008: 0008-migration-use-device-id-as-snapshot-id.patch +Patch009: 0009-tests-fix-the-test_standvm_quickstart.patch +Patch010: 0010-root_port-correct-the-log-of-printing-device-info-du.patch +Patch011: 0011-tests-add-stand-kata-testcases.patch +Patch012: 0012-net-fix-the-bug-when-tap-is-abnormally-removed.patch +Patch013: 0013-docs-boot-update-detailed-usage-for-standard-boot.patch +Patch014: 0014-virtio-queue-fix-error-access-queue-s-host-virtual-a.patch +Patch015: 0015-vfio-doc-create-a-new-document-for-using-vfio.patch ExclusiveArch: x86_64 aarch64 @@ -73,6 +82,12 @@ chmod 555 ${RPM_BUILD_ROOT}/usr/bin/stratovirt chmod 555 ${RPM_BUILD_ROOT}/usr/bin/ozone %changelog +* Fri Mar 01 2022 Jie Yang - 2.1.0-3 +- Fix memory snapshot failure with hotplugged devices. +- Fix address translation for virtio devices. +- Add some test cases for microvm. +- Update some documents. + * Fri Feb 18 2022 Jie Yang - 2.1.0-2 - Fix VFIO hotplugging failure caused by missing seccomp rules. - Fix booting failure from disk image on x86_64. -- Gitee