diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 29b9964733ed118f330753c04146bebf4580cddf..2bf034118240547f9a4e30836d1bd0339f91fc79 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -743,6 +743,119 @@ void xen_load_linux(PCMachineState *pcms) x86ms->fw_cfg = fw_cfg; } +static int try_create_2MB_page(uint32_t page_num) +{ + char nr_hp_num_s[256] = {0}; + char free_hp_num_s[256] = {0}; + const char *nr_hugepages_dir = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"; + const char *free_hugepages_dir = "/sys/kernel/mm/hugepages/hugepages-2048kB/free_hugepages"; + int nr_hp_num = -1, free_hp_num = -1, ret = -1; + int nr_fd = qemu_open_old(nr_hugepages_dir, O_RDWR); + int free_fd = qemu_open_old(free_hugepages_dir, O_RDONLY); + + if (nr_fd < 0 || free_fd < 0) { + error_report("%s: qemu_open failed: %s\n", __func__, strerror(errno)); + goto end; + } + + if (read(nr_fd, nr_hp_num_s, 256) < 0) + goto end; + if (read(free_fd, free_hp_num_s, 256) < 0) + goto end; + + nr_hp_num = atoi(nr_hp_num_s); + free_hp_num = atoi(free_hp_num_s); + if (nr_hp_num < 0 || free_hp_num < 0) + goto end; + + if (page_num <= free_hp_num) { + ret = 0; + goto end; + } + + nr_hp_num += (page_num - free_hp_num); + snprintf (nr_hp_num_s, 256, "%d", nr_hp_num); + if (write(nr_fd, nr_hp_num_s, strlen(nr_hp_num_s)) < 0) + goto end; + + ret = 0; +end: + if (nr_fd >= 0) + qemu_close(nr_fd); + if (free_fd >= 0) + qemu_close(free_fd); + return ret; +} + +#define HUGEPAGE_NUM_MAX 128 +#define HUGEPAGE_SIZE (1024*1024*2) +static void mem2_init(MachineState *ms, MemoryRegion *system_memory) +{ + MemoryRegion *mem2_mr[HUGEPAGE_NUM_MAX] = {NULL}; + char mr_name[128] = {0}; + void *ram = NULL; + int ret = 0, lock_fd = 0; + const char *lock_file = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"; + uint32_t page_num = ms->ram2_size / HUGEPAGE_SIZE, i; + + if (HUGEPAGE_NUM_MAX < page_num) { + error_report("\"-mem2 'size='\" needs to Less than %dM\n", + (HUGEPAGE_SIZE * HUGEPAGE_NUM_MAX) / (1024 * 1024)); + exit(EXIT_FAILURE); + } + + // Apply for hugepages from OS and use them, which needs to be synchronized + lock_fd = qemu_open_old(lock_file, O_WRONLY); + if (lock_fd < 0) { + error_report("%s: open %s failed: %s\n", __func__, lock_file, strerror(errno)); + exit(EXIT_FAILURE); + } + + // Non-blocking + while (qemu_lock_fd(lock_fd, 0, 0, true)) { + if (errno != EACCES && errno != EAGAIN) { + error_report("qemu_lock_fd failed: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + } + + /** try to create hugepage. + * If there are enough free hugepages, then do nothing. + */ + ret = try_create_2MB_page(page_num); + if (ret) { + error_report("%s: Failed to allocate hugepage\n", __func__); + goto unlock; + } + + for (i = 0; i < page_num; ++i) { + mem2_mr[i] = g_malloc(sizeof(MemoryRegion)); + ram = mmap(NULL, HUGEPAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | MAP_HUGETLB, -1, 0); + if (ram == MAP_FAILED) { + error_report("%s: mmap failed: %s", __func__, strerror(errno)); + goto unlock; + } + + sprintf(mr_name, "mem2-%d", i); + memory_region_init_ram_ptr(mem2_mr[i], NULL, mr_name, HUGEPAGE_SIZE, ram); + memory_region_add_subregion(system_memory, ms->ram2_base + (i * HUGEPAGE_SIZE), mem2_mr[i]); + } + + ret = 0; +unlock: + qemu_unlock_fd(lock_fd, 0, 0); + qemu_close(lock_fd); + + if (ret) { + for (i = 0; i < page_num; ++i) { + if (mem2_mr[i]) + g_free(mem2_mr[i]); + } + exit(EXIT_FAILURE); + } +} + #define PC_ROM_MIN_VGA 0xc0000 #define PC_ROM_MIN_OPTION 0xc8000 #define PC_ROM_MAX 0xe0000 @@ -965,6 +1078,22 @@ void pc_memory_init(PCMachineState *pcms, E820_RAM); } + if (machine->ram2_size && machine->ram2_base) { + if (0x100000000ULL + x86ms->above_4g_mem_size > machine->ram2_base) { + error_report("\"-mem2 'base'\" needs to greater 0x%llx\n", + 0x100000000ULL + x86ms->above_4g_mem_size); + exit(EXIT_FAILURE); + } + if (machine->ram2_base & (HUGEPAGE_SIZE - 1) || + machine->ram2_size & (HUGEPAGE_SIZE - 1)) { + error_report("\"-mem2 'base|size'\" needs to aligned to 0x%x\n", HUGEPAGE_SIZE); + exit(EXIT_FAILURE); + } + + mem2_init(machine, system_memory); + e820_add_entry(machine->ram2_base, machine->ram2_size, E820_RAM); + } + if (pcms->sgx_epc.size != 0) { e820_add_entry(pcms->sgx_epc.base, pcms->sgx_epc.size, E820_RESERVED); } diff --git a/hw/misc/psp.c b/hw/misc/psp.c index da0a69efdb80d91bcbbf8f3a4a5e8a5b781381a4..c2af21d34c5a13942321fe045d525fc5bcf4e255 100644 --- a/hw/misc/psp.c +++ b/hw/misc/psp.c @@ -15,6 +15,10 @@ #include "migration/vmstate.h" #include "hw/qdev-properties.h" #include "sysemu/runstate.h" +#include "exec/memory.h" +#include "exec/address-spaces.h" +#include "exec/ramblock.h" +#include "hw/i386/e820_memory_layout.h" #include #define TYPE_PSP_DEV "psp" @@ -35,6 +39,8 @@ struct PSPDevState { * the TKM module uses different key spaces based on different vids. */ uint32_t vid; + /* pinned hugepage numbers */ + int hp_num; }; #define PSP_DEV_PATH "/dev/hygon_psp_config" @@ -42,20 +48,135 @@ struct PSPDevState { #define PSP_IOC_MUTEX_ENABLE _IOWR(HYGON_PSP_IOC_TYPE, 1, NULL) #define PSP_IOC_MUTEX_DISABLE _IOWR(HYGON_PSP_IOC_TYPE, 2, NULL) #define PSP_IOC_VPSP_OPT _IOWR(HYGON_PSP_IOC_TYPE, 3, NULL) +#define PSP_IOC_PIN_USER_PAGE _IOWR(HYGON_PSP_IOC_TYPE, 4, NULL) +#define PSP_IOC_UNPIN_USER_PAGE _IOWR(HYGON_PSP_IOC_TYPE, 5, NULL) enum VPSP_DEV_CTRL_OPCODE { VPSP_OP_VID_ADD, VPSP_OP_VID_DEL, + VPSP_OP_SET_DEFAULT_VID_PERMISSION, + VPSP_OP_GET_DEFAULT_VID_PERMISSION, + VPSP_OP_SET_GPA, }; struct psp_dev_ctrl { unsigned char op; + unsigned char resv[3]; union { unsigned int vid; + // Set or check the permissions for the default VID + unsigned int def_vid_perm; + struct { + uint64_t gpa_start; + uint64_t gpa_end; + } gpa; unsigned char reserved[128]; - } data; + } __attribute__ ((packed)) data; }; +static MemoryRegion *find_memory_region_by_name(MemoryRegion *root, const char *name) { + MemoryRegion *subregion; + MemoryRegion *result; + + if (strcmp(root->name, name) == 0) + return root; + + QTAILQ_FOREACH(subregion, &root->subregions, subregions_link) { + result = find_memory_region_by_name(subregion, name); + if (result) { + return result; + } + } + + return NULL; +} + +static int pin_user_hugepage(int fd, uint64_t vaddr) +{ + int ret; + + ret = ioctl(fd, PSP_IOC_PIN_USER_PAGE, vaddr); + /* 22: Invalid argument, some old kernel doesn't support this ioctl command */ + if (ret != 0 && errno == EINVAL) { + ret = 0; + } + return ret; +} + +static int unpin_user_hugepage(int fd, uint64_t vaddr) +{ + int ret; + + ret = ioctl(fd, PSP_IOC_UNPIN_USER_PAGE, vaddr); + /* 22: Invalid argument, some old kernel doesn't support this ioctl command */ + if (ret != 0 && errno == EINVAL) { + ret = 0; + } + return ret; +} + +static int pin_psp_user_hugepages(struct PSPDevState *state, MemoryRegion *root) +{ + int ret = 0; + char mr_name[128] = {0}; + int i, pinned_num; + MemoryRegion *find_mr = NULL; + + for (i = 0 ; i < state->hp_num; ++i) { + sprintf(mr_name, "mem2-%d", i); + find_mr = find_memory_region_by_name(root, mr_name); + if (!find_mr) { + error_report("fail to find memory region by name %s.", mr_name); + ret = -ENOMEM; + goto end; + } + + ret = pin_user_hugepage(state->dev_fd, (uint64_t)find_mr->ram_block->host); + if (ret) { + error_report("fail to pin_user_hugepage, ret: %d.", ret); + goto end; + } + } +end: + if (ret) { + pinned_num = i; + for (i = 0 ; i < pinned_num; ++i) { + sprintf(mr_name, "mem2-%d", i); + find_mr = find_memory_region_by_name(root, mr_name); + if (!find_mr) { + continue; + } + unpin_user_hugepage(state->dev_fd, (uint64_t)find_mr->ram_block->host); + } + + } + return ret; +} + +static int unpin_psp_user_hugepages(struct PSPDevState *state, MemoryRegion *root) +{ + int ret = 0; + char mr_name[128] = {0}; + int i; + MemoryRegion *find_mr = NULL; + + for (i = 0 ; i < state->hp_num; ++i) { + sprintf(mr_name, "mem2-%d", i); + find_mr = find_memory_region_by_name(root, mr_name); + if (!find_mr) { + continue; + } + + ret = unpin_user_hugepage(state->dev_fd, (uint64_t)find_mr->ram_block->host); + if (ret) { + error_report("fail to unpin_user_hugepage, ret: %d.", ret); + goto end; + } + } +end: + return ret; +} + static void psp_dev_destroy(PSPDevState *state) { struct psp_dev_ctrl ctrl = { 0 }; @@ -64,6 +185,11 @@ static void psp_dev_destroy(PSPDevState *state) ctrl.op = VPSP_OP_VID_DEL; if (ioctl(state->dev_fd, PSP_IOC_VPSP_OPT, &ctrl) < 0) { error_report("VPSP_OP_VID_DEL: %d", -errno); + } + + /* Unpin hugepage memory */ + if (unpin_psp_user_hugepages(state, get_system_memory())) { + error_report("unpin_psp_user_hugepages failed"); } else { state->enabled = false; } @@ -88,8 +214,13 @@ static void psp_dev_shutdown_notify(Notifier *notifier, void *data) static void psp_dev_realize(DeviceState *dev, Error **errp) { + int i; + char mr_name[128] = {0}; struct psp_dev_ctrl ctrl = { 0 }; PSPDevState *state = PSP_DEV(dev); + MemoryRegion *root_mr = get_system_memory(); + MemoryRegion *find_mr = NULL; + uint64_t ram2_start = 0, ram2_end = 0; state->dev_fd = qemu_open_old(PSP_DEV_PATH, O_RDWR); if (state->dev_fd < 0) { @@ -104,9 +235,44 @@ static void psp_dev_realize(DeviceState *dev, Error **errp) goto end; } + for (i = 0 ;; ++i) { + sprintf(mr_name, "mem2-%d", i); + find_mr = find_memory_region_by_name(root_mr, mr_name); + if (!find_mr) + break; + + if (!ram2_start) + ram2_start = find_mr->addr; + ram2_end = find_mr->addr + find_mr->size - 1; + } + + state->hp_num = i; + + if (ram2_start != ram2_end) { + ctrl.op = VPSP_OP_SET_GPA; + ctrl.data.gpa.gpa_start = ram2_start; + ctrl.data.gpa.gpa_end = ram2_end; + if (ioctl(state->dev_fd, PSP_IOC_VPSP_OPT, &ctrl) < 0) { + error_setg(errp, "psp_dev_realize VPSP_OP_SET_GPA (start 0x%lx, end 0x%lx), return %d", + ram2_start, ram2_end, -errno); + goto del_vid; + } + + /* Pin hugepage memory */ + if(pin_psp_user_hugepages(state, root_mr)) { + error_setg(errp, "pin_psp_user_hugepages failed."); + goto del_vid; + } + } + state->enabled = true; state->shutdown_notifier.notify = psp_dev_shutdown_notify; qemu_register_shutdown_notifier(&state->shutdown_notifier); + + return; +del_vid: + ctrl.op = VPSP_OP_VID_DEL; + ioctl(state->dev_fd, PSP_IOC_VPSP_OPT, &ctrl); end: return; } diff --git a/include/hw/boards.h b/include/hw/boards.h index da85f86efb9185df05a444bd22eeec0557a3e3c8..8ac8cad2a2225e97df1475c2f20118ccf2af394e 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -389,6 +389,8 @@ struct MachineState { ram_addr_t ram_size; ram_addr_t maxram_size; + ram_addr_t ram2_base; + ram_addr_t ram2_size; uint64_t ram_slots; BootConfiguration boot_config; char *kernel_filename; diff --git a/qemu-options.hx b/qemu-options.hx index c260117a96cf8f2c072768111945aa36bc21a6b4..caeca1d9bdd7f87704c001fef750870ea0bf7a1c 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -5856,6 +5856,18 @@ SRST (qemu) qom-set /objects/iothread1 poll-max-ns 100000 ERST +DEF("mem2", HAS_ARG, QEMU_OPTION_mem2, + "-mem2 base=addr[G],size=n[MG]\n" + " Map guest memory using host hugepages\n" + " base: starting position of guest physical address\n" + " size: the size of mmaped memory\n" + "NOTE: Both `base` and `size` need to be aligned according to 2MB\n", + QEMU_ARCH_I386) +SRST +``-mem2 base=addr[G],size=n[MG]`` + Map the host's large page memory at the specified guest address + so that some devices can use larger contiguous physical memory. +ERST HXCOMM This is the last statement. Insert new options before this line! diff --git a/system/vl.c b/system/vl.c index e18fa3ce4654818683f18520d1361dcb4430c7fa..101c2df87713b54609b8fc9c2ff476bc44d6bacf 100644 --- a/system/vl.c +++ b/system/vl.c @@ -503,6 +503,23 @@ static QemuOptsList qemu_action_opts = { }, }; +static QemuOptsList qemu_mem2_opts = { + .name = "mem2", + .merge_lists = true, + .head = QTAILQ_HEAD_INITIALIZER(qemu_mem2_opts.head), + .desc = { + { + .name = "base", + .type = QEMU_OPT_SIZE, + }, + { + .name = "size", + .type = QEMU_OPT_SIZE, + }, + { /* end of list */ } + }, +}; + const char *qemu_get_vm_name(void) { return qemu_name; @@ -2090,6 +2107,45 @@ static void parse_memory_options(void) loc_pop(&loc); } +static void parse_mem2_options(void) +{ + uint64_t sz, base; + const char *sz_str = NULL, *base_str = NULL; + QemuOpts *opts = qemu_find_opts_singleton("mem2"); + Location loc; + + loc_push_none(&loc); + qemu_opts_loc_restore(opts); + + base_str = qemu_opt_get(opts, "base"); + sz_str = qemu_opt_get(opts, "size"); + + if (!base_str && !sz_str) + return; + + if ((!base_str || !*base_str) + || (!sz_str || !*sz_str)) { + error_report("missing 'base' or 'size' argument for -mem2 option"); + exit(EXIT_FAILURE); + } + + base = qemu_opt_get_size(opts, "base", 0); + if (!base) { + error_report("invalid 'base' value\n"); + exit(EXIT_FAILURE); + } + current_machine->ram2_base = base; + + sz = qemu_opt_get_size(opts, "size", 0); + if (!sz) { + error_report("invalid 'size' value\n"); + exit(EXIT_FAILURE); + } + current_machine->ram2_size = sz; + + loc_pop(&loc); +} + static void qemu_create_machine(QDict *qdict) { MachineClass *machine_class = select_machine(qdict, &error_fatal); @@ -2776,6 +2832,7 @@ void qemu_init(int argc, char **argv) qemu_add_opts(&qemu_semihosting_config_opts); qemu_add_opts(&qemu_fw_cfg_opts); qemu_add_opts(&qemu_action_opts); + qemu_add_opts(&qemu_mem2_opts); qemu_add_run_with_opts(); module_call_init(MODULE_INIT_OPTS); @@ -3635,7 +3692,13 @@ void qemu_init(int argc, char **argv) break; } #endif /* CONFIG_POSIX */ - + case QEMU_OPTION_mem2: + opts = qemu_opts_parse_noisily(qemu_find_opts("mem2"), + optarg, false); + if (!opts) { + exit(EXIT_FAILURE); + } + break; default: error_report("Option not supported in this build"); exit(1); @@ -3686,6 +3749,8 @@ void qemu_init(int argc, char **argv) qemu_create_machine(machine_opts_dict); + parse_mem2_options(); + suspend_mux_open(); qemu_disable_default_devices();