From 4970683a636a4e51f1fb312f9b0146a471d5ad7f Mon Sep 17 00:00:00 2001 From: Weifeng Su Date: Wed, 8 Jun 2022 20:40:20 +0800 Subject: [PATCH] Add support for HSAK Signed-off-by: Weifeng Su --- ...SAK-needed-head-file-and-API-to-spdk.patch | 870 ++++++ 0018-lib-bdev-Add-bdev-support-for-HSAK.patch | 953 ++++++ ...ib-env_dpdk-Add-config-args-for-HSAK.patch | 176 ++ 0020-lib-nvme-Add-nvme-support-for-HSAK.patch | 1441 +++++++++ ...dev-Add-bdev-module-support-for-HSAK.patch | 2729 +++++++++++++++++ ..._cmd_dataset_management-and-delete-s.patch | 122 + 0023-spdk-add-nvme-support-for-HSAK.patch | 1651 ++++++++++ 0024-Add-CUSE-switch-for-nvme-ctrlr.patch | 55 + ...-serial-vendor-special-opcode-in-CUS.patch | 45 + ...on-in-continuous-setup-and-teardown-.patch | 124 + 0027-Change-log-level-in-poll-timeout.patch | 28 + spdk.spec | 36 +- 12 files changed, 8228 insertions(+), 2 deletions(-) create mode 100644 0017-add-HSAK-needed-head-file-and-API-to-spdk.patch create mode 100644 0018-lib-bdev-Add-bdev-support-for-HSAK.patch create mode 100644 0019-lib-env_dpdk-Add-config-args-for-HSAK.patch create mode 100644 0020-lib-nvme-Add-nvme-support-for-HSAK.patch create mode 100644 0021-module-bdev-Add-bdev-module-support-for-HSAK.patch create mode 100644 0022-use-spdk_nvme_ns_cmd_dataset_management-and-delete-s.patch create mode 100644 0023-spdk-add-nvme-support-for-HSAK.patch create mode 100644 0024-Add-CUSE-switch-for-nvme-ctrlr.patch create mode 100644 0025-Adapt-for-ES3000-serial-vendor-special-opcode-in-CUS.patch create mode 100644 0026-Fix-race-condition-in-continuous-setup-and-teardown-.patch create mode 100644 0027-Change-log-level-in-poll-timeout.patch diff --git a/0017-add-HSAK-needed-head-file-and-API-to-spdk.patch b/0017-add-HSAK-needed-head-file-and-API-to-spdk.patch new file mode 100644 index 0000000..92e1ebd --- /dev/null +++ b/0017-add-HSAK-needed-head-file-and-API-to-spdk.patch @@ -0,0 +1,870 @@ +From f0710b6c37214457ab46bd1859f00ec413b01a7f Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Thu, 18 Feb 2021 10:52:24 +0800 +Subject: [PATCH 17/27] add HSAK needed head file and API to spdk + +Signed-off-by: sunshihao +--- + CONFIG | 3 + + Makefile | 6 + + configure | 8 ++ + etc/spdk/nvme.conf.in | 88 ++++++++++++ + include/spdk/bdev.h | 85 +++++++++++ + include/spdk/bdev_module.h | 89 ++++++++++++ + include/spdk/log.h | 2 +- + include/spdk/nvme.h | 230 ++++++++++++++++++++++++++++++ + include/spdk/thread.h | 18 +++ + include/spdk_internal/bdev_stat.h | 63 ++++++++ + include/spdk_internal/debug.h | 43 ++++++ + include/spdk_internal/thread.h | 2 + + mk/spdk.app_vars.mk | 4 +- + 13 files changed, 639 insertions(+), 2 deletions(-) + create mode 100644 etc/spdk/nvme.conf.in + create mode 100644 include/spdk_internal/bdev_stat.h + create mode 100644 include/spdk_internal/debug.h + +diff --git a/CONFIG b/CONFIG +index b5fffae..214e59e 100644 +--- a/CONFIG ++++ b/CONFIG +@@ -43,6 +43,9 @@ CONFIG_CROSS_PREFIX= + # Build with debug logging. Turn off for performance testing and normal usage + CONFIG_DEBUG=n + ++# Enable read and write NVMe for application ++CONFIG_APP_RW=n ++ + # Treat warnings as errors (fail the build on any warning). + CONFIG_WERROR=n + +diff --git a/Makefile b/Makefile +index a50fa94..1c98268 100644 +--- a/Makefile ++++ b/Makefile +@@ -37,6 +37,12 @@ S := + SPDK_ROOT_DIR := $(CURDIR) + include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + ++ifeq ($(CONFIG_APP_RW),y) ++# secure compile option ++CFLAGS += -fPIE -pie -fPIC -fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror ++CFLAGS += -Wl,-z,relro,-z,now,-z,noexecstack -Wtrampolines ++endif ++ + DIRS-y += lib + DIRS-y += module + DIRS-$(CONFIG_SHARED) += shared_lib +diff --git a/configure b/configure +index 5b48696..964322e 100644 +--- a/configure ++++ b/configure +@@ -25,6 +25,8 @@ function usage() + echo " example: aarch64-linux-gnu" + echo "" + echo " --enable-debug Configure for debug builds" ++ echo " --enable-err-injc Enable error injection feature" ++ echo " --enable-raw Enable read and write NVMe disk feature." + echo " --enable-werror Treat compiler warnings as errors" + echo " --enable-asan Enable address sanitizer" + echo " --enable-ubsan Enable undefined behavior sanitizer" +@@ -204,6 +206,12 @@ for i in "$@"; do + --disable-debug) + CONFIG[DEBUG]=n + ;; ++ --enable-raw) ++ CONFIG[APP_RW]=y ++ ;; ++ --enable-err-injc) ++ CONFIG[ERR_INJC]=y ++ ;; + --enable-asan) + CONFIG[ASAN]=y + ;; +diff --git a/etc/spdk/nvme.conf.in b/etc/spdk/nvme.conf.in +new file mode 100644 +index 0000000..a3df92b +--- /dev/null ++++ b/etc/spdk/nvme.conf.in +@@ -0,0 +1,88 @@ ++#NVME configuration file ++# ++# Please write all parameters using ASCII. ++# The parameter must be quoted if it includes whitespace. ++# ++# Configuration syntax: ++# Leading whitespace is ignored. ++# Lines starting with '#' are comments. ++# Lines ending with '\' are concatenated with the next line. ++# Bracketed ([]) names define sections ++ ++[Global] ++ # Users can restrict work items to only run on certain cores by specifying a ReactorMask. ++ # Can not specify the NO. 0 core. ++ ReactorMask 0x2 ++ ++ # The print level of log. ++ # 0: Print ERROR log only; 1: Print WARNING and ERROR log; and so on, 4: Print all level log ++ LogLevel 1 ++ ++ # The sizes of Memory for Libstorge(Unit: MB). The minimum value is 300MB. ++ # If parameter "SocketMem" was set corrected, MemSize was useless ++ MemSize 300 ++ ++ # The same block device supports multiple queues. ++ MultiQ No ++ ++ # End-to-end data protection. This item is only used if the namespace is formatted to use end-to-end protection information. ++ # if the value is set to '1', then the protection information are generated by controller, and the logical block data and protection information are written to NVM. ++ # if the value is set to '2', then the protection information are transferred from the host buffer to NVM. ++ E2eDif 2 ++ ++ # Open IOstat or not ++ IoStat No ++ ++ # Poll time threshold in millisecond, It will count exceptional polling thread call which duration exceed the value and display in stat report. ++ # This item is only used when UseReactor = No, Set to 0 means disable this measurement. ++ PollTime 0 ++ ++ # Preallocate specified amounts of memory(Unit: MB) per socket. ++ # The parameter is a comma-sprated list of values, For example: ++ # SocketMem 1024,2048 ++ # This will allocate 1 gigabyte of memory on socket 0, and 2048 megabytes of memory on socket 1. ++ # The sum of socket memory must be greater than 300MB. ++ # if SocketMem was set corrected, The parameter "MemSize" was useless ++ # SocketMem 300 ++ ++ # Place a per-socket upper limit on memory use (non-legacy memory mode only). ++ # 0 will disable the limit for a particular socket. ++ # SocketLimit 1024,1 ++ # This will set upper limit of 1 gigabyte on socket 0, and 1 megabytes of memory on socket 1. ++ # if the value is set to empty, means disable the limit per socket. ++ # if SocketMem was empty, the parameter was useless. ++ # SocketLimit 300 ++ ++ #Decide whether to start rpc server or not ++ RpcServer Yes ++ ++# NVMe configuration options ++[Nvme] ++ # NVMe Device Whitelist ++ # Users may specify which NVMe devices to claim by their transport id. ++ # See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format. ++ # The second argument is the assigned name, which can be referenced from ++ # other sections in the configuration file. For NVMe devices, a namespace ++ # is automatically appended to each name in the format nY, where ++ # Y is the NSID (starts at 1). ++ #TransportID "trtype:PCIe traddr:0000:81:00.0" nvme0 ++ #TransportID "trtype:PCIe traddr:0000:01:00.0" nvme1 ++ ++ # The number of attempts per I/O when an I/O fails. Do not include ++ # this key to get the default behavior. ++ RetryCount 4 ++ # Timeout for each command, in microseconds. If 0, don't track timeouts. ++ TimeoutUsec 0 ++ # Action to take on command time out. Only valid when Timeout is greater ++ # than 0. This may be 'Reset' to reset the controller, 'Abort' to abort ++ # the command, or 'None' to just print a message but do nothing. ++ # Admin command timeouts will always result in a reset. ++ ActionOnTimeout None ++ # Set how often the admin queue is polled for asynchronous events. ++ # Units in microseconds. ++ AdminPollRate 100000 ++ ++[Reactor] ++ # Batch size of IO for one-time release by reactor. ++ # The maximum value is 32. ++ BatchSize 8 +diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h +index d894646..2951660 100644 +--- a/include/spdk/bdev.h ++++ b/include/spdk/bdev.h +@@ -53,6 +53,8 @@ extern "C" { + + #define SPDK_BDEV_SMALL_BUF_MAX_SIZE 8192 + #define SPDK_BDEV_LARGE_BUF_MAX_SIZE (64 * 1024) ++#define SPDK_BDEV_SMALL_BUF_WITH_MAX_MD 512 ++#define SPDK_BDEV_LARGE_BUF_WITH_MAX_MD 1024 + + /* Increase the buffer size to store interleaved metadata. Increment is the + * amount necessary to store metadata per data block. 16 byte metadata per +@@ -116,6 +118,42 @@ enum spdk_bdev_status { + SPDK_BDEV_STATUS_REMOVING, + }; + ++#ifdef SPDK_CONFIG_APP_RW ++/** ns status */ ++enum spdk_bdev_ns_status { ++ SPDK_BDEV_NS_STATUS_INVALID, ++ SPDK_BDEV_NS_STATUS_READY, ++ SPDK_BDEV_NS_STATUS_REMOVING, ++ SPDK_BDEV_NS_STATUS_UNREGISTER, ++}; ++ ++typedef void (*LIBSTORAGE_CALLBACK_FUNC)(int32_t cb_status, int32_t sct_code, void *cb_arg); ++ ++typedef struct libstorage_io { ++ uint8_t *buf; ++ struct iovec *iovs; /* array of iovecs to transfer */ ++ int iovcnt; /* Number of iovecs in iovs array */ ++ int32_t fd; /* File Descriptor */ ++ uint16_t opcode; /* r/w */ ++ uint16_t streamId; /* Stream ID for IO */ ++ uint8_t pi_action; ++ uint8_t fua; ++ uint8_t location; ++ bool inSubmit; /* In the I/0 phase or not. Use in nopoll model */ ++ uint32_t count; ++ uint32_t nbytes; ++ uint64_t offset; ++ uint8_t *md_buf; ++ uint32_t md_len; ++ uint32_t magic; ++ /*Save the error code returned by the callback */ ++ int32_t err; ++ int32_t reserved; ++ LIBSTORAGE_CALLBACK_FUNC cb; ++ void *cb_arg; ++} LIBSTORAGE_IO_T; ++#endif ++ + /** + * \brief Handle to an opened SPDK block device. + */ +@@ -140,6 +178,13 @@ enum spdk_bdev_io_type { + SPDK_BDEV_IO_TYPE_COMPARE, + SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE, + SPDK_BDEV_IO_TYPE_ABORT, ++#ifdef SPDK_CONFIG_APP_RW ++ SPDK_BDEV_IO_TYPE_READ_NVME, ++ SPDK_BDEV_IO_TYPE_WRITE_NVME, ++ SPDK_BDEV_IO_TYPE_READV_NVME, ++ SPDK_BDEV_IO_TYPE_WRITEV_NVME, ++ SPDK_BDEV_IO_TYPE_UNMAP_BLOCKS, ++#endif + SPDK_BDEV_NUM_IO_TYPES /* Keep last */ + }; + +@@ -181,6 +226,14 @@ struct spdk_bdev_io_stat { + uint64_t write_latency_ticks; + uint64_t unmap_latency_ticks; + uint64_t ticks_rate; ++#ifdef SPDK_CONFIG_APP_RW ++ int io_stat_id; ++ uint64_t io_ticks; ++ uint64_t pre_ticks; ++ uint64_t cur_ticks; ++ uint64_t start_tsc; ++ uint64_t interval_tsc; ++#endif + }; + + struct spdk_bdev_opts { +@@ -1342,6 +1395,38 @@ int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset, uint64_t nbytes, + spdk_bdev_io_completion_cb cb, void *cb_arg); + ++#ifdef SPDK_CONFIG_APP_RW ++/** ++ * Submit an unmap request to the block device. Unmap is sometimes also called trim or ++ * deallocate. This notifies the device that the data in the blocks described is no ++ * longer valid. Reading blocks that have been unmapped results in indeterminate data. ++ * ++ * \param bdev Block device description ++ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). ++ * \param unmap_d An array of unmap descriptors. ++ * \param bdesc_count The number of elements in unmap_d. ++ * \param cb Called when the request is complete. ++ * \param cb_arg Argument passed to cb. ++ * ++ * \return 0 on success. On success, the callback will always ++ * be called (even if the request ultimately failed). Return ++ * negated errno on failure, in which case the callback will not be called. ++ */ ++int ++spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, ++ void *unmap_d, uint16_t unmap_count, ++ spdk_bdev_io_completion_cb cb, void *cb_arg); ++ ++void* ++spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch); ++ ++void* ++spdk_bdev_io_get_pool(size_t nbytes); ++ ++bool ++spdk_bdev_have_io_in_channel(struct spdk_io_channel *bdevIoCh); ++#endif ++ + /** + * Submit an unmap request to the block device. Unmap is sometimes also called trim or + * deallocate. This notifies the device that the data in the blocks described is no +diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h +index bbb9f94..c2fd81d 100644 +--- a/include/spdk/bdev_module.h ++++ b/include/spdk/bdev_module.h +@@ -222,8 +222,67 @@ struct spdk_bdev_fn_table { + + /** Get bdev module context. */ + void *(*get_module_ctx)(void *ctx); ++ ++#ifdef SPDK_CONFIG_APP_RW ++ uint16_t (*get_io_channel_id)(struct spdk_io_channel *ch); ++ ++ int (*bdev_poll_rsp)(void *pollCh); ++ ++ uint64_t (*get_timeout_count)(struct spdk_io_channel *ch); ++#endif ++}; ++ ++#ifdef SPDK_CONFIG_APP_RW ++static inline void spdk_bdev_set_io_location(void *bdev_ctx, uint8_t location) ++{ ++ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bdev_ctx); ++ uint8_t *ioLoc = (uint8_t *)bdev_io->internal.caller_ctx; ++ *ioLoc = location; ++} ++ ++enum spdk_bdev_driver_ctx { ++ SPDK_BDEV_IO_ACTION_PI, ++ SPDK_BDEV_IO_ACTION_FUA, ++ SPDK_BDEV_IO_STREAM_ID_0, ++ SPDK_BDEV_IO_STREAM_ID_1, ++}; ++ ++enum spdk_bdev_io_e2e_pi_action{ ++ IO_NO_PROTECTION = 0, ++ IO_HALF_WAY_PROTECTION = 1, ++ IO_E2E_PROTECTION = 2 + }; + ++#define FLAG_NO_REF 0x10//bit 4 : 1, disable ctrl ref tag check; 0, enable check ++#define FLAG_CALCRC 0x08//bit 3 : 1, libstorage calculate crc; 0, app calculate crc ++#define FLAG_PRCHK 0x04//bit 2 : 1, enable ctrl guard crc check; 0, disable check ++ ++enum spdk_bdev_io_fua{ ++ IO_FUA_NO = 0, ++ IO_FUA_YES = 1 ++}; ++ ++void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr); ++ ++void spdk_bdev_fail_ctrlr(const char* traddr); ++ ++void *nvme_channel_get_group(void *io_ch); ++ ++enum reqLocation_E ++{ ++ LOCAL_RECEIVE_APP = 1, ++ LOCAL_LIBSTORAGE_SUBMIT = 2, ++ LOCAL_LIBSTORAGE_ASYNC_REQ = 3, ++ LOCAL_LIBSTORAGE_BDEV_NVME = 4, ++ LOCAL_LIBSTORAGE_HUNG_REQ = 5, ++ LOCAL_LIBSTORAGE_TO_DISK = 6, ++ LOCAL_LIBSTORAGE_FROM_DISK = 7, ++ LOCAL_LIBSTORAGE_CALLBACK = 8, ++ LOCAL_LIBSTORAGE_SUBMIT_RETRY = 9, ++ LOCAL_LIBSTORAGE_BDEV_NOMEM = 10, ++}; ++#endif ++ + /** bdev I/O completion status */ + enum spdk_bdev_io_status { + SPDK_BDEV_IO_STATUS_AIO_ERROR = -8, +@@ -407,6 +466,10 @@ struct spdk_bdev { + /** The bdev status */ + enum spdk_bdev_status status; + ++#ifdef SPDK_CONFIG_APP_RW ++ enum spdk_bdev_ns_status ns_status; ++#endif ++ + /** + * Pointer to the module that has claimed this bdev for purposes of creating virtual + * bdevs on top of it. Set to NULL if the bdev has not been claimed. +@@ -528,6 +591,11 @@ struct spdk_bdev_io { + /** Starting offset (in blocks) of the bdev for this I/O. */ + uint64_t offset_blocks; + ++#ifdef SPDK_CONFIG_APP_RW ++ /* The number of bytes to transfer */ ++ size_t nbytes; ++#endif ++ + /** stored user callback in case we split the I/O and use a temporary callback */ + spdk_bdev_io_completion_cb stored_user_cb; + +@@ -595,6 +663,27 @@ struct spdk_bdev_io { + /* The data buffer */ + void *buf; + } zone_mgmt; ++#ifdef SPDK_CONFIG_APP_RW ++ struct { ++ /* The data buffer to transfer */ ++ void *buf; ++ ++ /* The meta data buffer to transfer */ ++ void *md_buf; ++ ++ /** Total size of data(in blocks) to be transferred. */ ++ uint64_t num_blocks; ++ ++ /* The number of bytes to transfer */ ++ size_t nbytes; ++ ++ /** Starting offset (in blocks) of the bdev for this I/O. */ ++ size_t offset_blocks; ++ ++ /* meta data buffer size to transfer */ ++ size_t md_len; ++ } contig; ++#endif + } u; + + /** It may be used by modules to put the bdev_io into its own list. */ +diff --git a/include/spdk/log.h b/include/spdk/log.h +index ad850ab..e16035c 100644 +--- a/include/spdk/log.h ++++ b/include/spdk/log.h +@@ -173,7 +173,7 @@ enum spdk_log_level spdk_log_get_print_level(void); + * \param format Format string to the message. + */ + void spdk_log(enum spdk_log_level level, const char *file, const int line, const char *func, +- const char *format, ...) __attribute__((__format__(__printf__, 5, 6))); ++ const char *format, ...) __attribute__((weak)) __attribute__((__format__(__printf__, 5, 6))); + + /** + * Same as spdk_log except that instead of being called with variable number of +diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h +index 45b9f94..8e05139 100644 +--- a/include/spdk/nvme.h ++++ b/include/spdk/nvme.h +@@ -2465,6 +2465,7 @@ enum spdk_nvme_ns_flags { + part of the logical block that it is associated with */ + SPDK_NVME_NS_WRITE_UNCORRECTABLE_SUPPORTED = 0x40, /**< The write uncorrectable command is supported */ + SPDK_NVME_NS_COMPARE_SUPPORTED = 0x80, /**< The compare command is supported */ ++ SPDK_NVME_NS_DPS_PI_MDSTART = 0x100 /**< protection info transferred at start of metadata */ + }; + + /** +@@ -3434,6 +3435,235 @@ struct spdk_nvme_transport_ops { + */ + void spdk_nvme_transport_register(const struct spdk_nvme_transport_ops *ops); + ++#ifdef SPDK_CONFIG_APP_RW ++struct nvme_ctrlr_info { ++ char ctrlName[16]; ++ char pciAddr[24]; ++ uint64_t tnvmcap; /* Total NVM Capacity in bytes */ ++ uint64_t unvmcap; /* Unallocated NVM Capacity in bytes */ ++ int8_t sn[20]; /* Serial number */ ++ int8_t mn[40]; /* Model number */ ++ uint8_t fr[8]; /* Firmware revision */ ++ uint32_t max_num_ns; /* Number of namespaces */ ++ uint32_t version; /* Version of the NVM Express specification that the controller implementation supports */ ++ uint16_t num_io_queues; /* num of io queues */ ++ uint16_t io_queue_size; /* io queue size */ ++ uint16_t device_id; /* Device id */ ++ uint16_t subdevice_id; /* Subsystem device id */ ++ uint16_t vid; /* Vendor id */ ++ uint16_t ssvid; /* Subsystem vendor id */ ++ uint16_t ctrlid; /* Controller id */ ++ uint16_t trtype; /* Transport type */ ++ uint16_t support_ns :1; /* Supports the Namespace Management and Namespace Attachment commands */ ++ uint16_t directives :1; /* Supports Directives */ ++ uint16_t streams :1; /* Supports Streams Directives */ ++ uint16_t dsm :1; /* Supports the controller supports the Dataset Management command */ ++ uint16_t reserved :12; ++ uint16_t reserved2[3]; ++}; ++ ++struct nvme_ctrlr; ++struct nvme_bdev_ctrlr; ++struct spdk_bdev; ++struct nvme_bdev; ++struct spdk_nvme_ns; ++struct spdk_nvme_qpair; ++int32_t nvme_ctrlr_get_info(const char* ctrlName, struct nvme_ctrlr_info** ppCtrlr); ++struct spdk_nvme_ctrlr* spdk_nvme_ctrlr_get_by_name(const char* ctrlname); ++struct spdk_nvme_ctrlr* spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr); ++struct nvme_bdev_ctrlr* nvme_ctrlr_get_by_name(const char* ctrlname); ++void nvme_ctrlr_clear_iostat_by_name(const char* ctrlname); ++void nvme_ctrlr_clear_iostat_all(void); ++struct nvme_bdev_ctrlr* bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev); ++struct spdk_nvme_ns* bdev_nvme_get_ns(struct nvme_bdev *nbdev); ++void bdev_nvme_update_block_by_nvme_ctrlr(struct spdk_nvme_ctrlr *ctrlr); ++int bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_ctrlr, uint32_t nsid); ++bool spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid); ++void spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid); ++void spdk_nvme_ctrlr_set_shutdown(struct spdk_nvme_ctrlr *ctrlr, bool is_shutdown); ++bool spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr); ++int spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, struct spdk_nvme_health_information_page *health_payload); ++int spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, struct spdk_nvme_error_information_entry *error_info); ++struct spdk_nvme_ctrlr_opts* spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr); ++int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *nsdata); ++bool spdk_nvme_ns_is_allocated(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid); ++bool spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr); ++bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr); ++bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr); ++bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr); ++bool spdk_nvme_ctrlr_is_streams_supported(struct spdk_nvme_ctrlr *ctrlr); ++int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, void *payload); ++int32_t spdk_nvme_ctrlr_enable_streams(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid); ++int32_t spdk_nvme_ctrlr_ret_streams_param(struct spdk_nvme_ctrlr *ctrlr, void *payload); ++int32_t spdk_nvme_ns_ret_streams_param(struct spdk_nvme_ns *ns, void *payload); ++int32_t spdk_nvme_ns_get_streams_status(struct spdk_nvme_ns *ns, void *payload); ++int32_t spdk_nvme_ns_alloc_streams_res(struct spdk_nvme_ns *ns, uint16_t nsr); ++int32_t spdk_nvme_ns_release_streams_id(struct spdk_nvme_ns *ns, uint16_t streamsId); ++int32_t spdk_nvme_ns_release_streams_res(struct spdk_nvme_ns *ns); ++void spdk_nvme_use_streams(bool use); ++ ++/** ++ * \brief Get the ctrlr is_failed state, for an I/O sent to the given namespace. ++ * ++ * This function is thread safe and can be called at any point while the controller is attached to ++ * the SPDK NVMe driver. ++ */ ++bool spdk_nvme_ns_ctrl_is_failed(struct spdk_nvme_ns *ns); ++#define NVME_MAX_CONTROLLERS 1024 ++ ++/* check nvme whether exist by access cc register */ ++bool nvme_ctrlr_is_exist(struct spdk_nvme_ctrlr *ctrlr); ++ ++/* create ctrlr for new added device */ ++int spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, const char *base_name, ++ const char **names, size_t *count, const char *hostnqn); ++ ++int spdk_nvme_detach_ublock(struct spdk_nvme_ctrlr *ctrlr); ++void spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr); ++ ++#define SPDK_NVME_UEVENT_SUBSYSTEM_UIO 1 ++#define SPDK_NVME_UEVENT_SUBSYSTEM_NVME 2 ++ ++enum spdk_nvme_uevent_action { ++ SPDK_NVME_UEVENT_ADD = 0, ++ SPDK_NVME_UEVENT_REMOVE = 1, ++}; ++ ++struct spdk_uevent { ++ /* remove or add */ ++ enum spdk_nvme_uevent_action action; ++ int subsystem; ++ /* pci address of device */ ++ char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1]; ++}; ++ ++/* make a socket to get uevent */ ++int nvme_uevent_connect(void); ++ ++/* get uevent from socket fd */ ++int nvme_get_uevent(int fd, struct spdk_uevent *uevent); ++ ++/* blocked to get uevent from socket fd */ ++int nvme_get_uevent_block(int fd, struct spdk_uevent *uevent); ++ ++/** ++ * @Description: bind device with pci_addr to driver ++ * @param pci_addr: device's pci_addr,like "0000:08:00.0" ++ * @param driver: driver name which device bind to ++ */ ++int32_t spdk_rebind_driver(char *pci_addr, char *driver_name); ++ ++/** ++ * \brief True if the protection information transferred at the start of metadata ++ * when end-to-end data protection enabled. ++ * ++ * This function is thread safe and can be called at any point while the controller is attached to ++ * the SPDK NVMe driver. ++ */ ++bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns); ++ ++/** ++ * \brief True if the namespace supports Dataset Management command. ++ * ++ * This function is thread safe and can be called at any point while the controller is attached to ++ * the SPDK NVMe driver. ++ */ ++bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns); ++ ++/** ++ * Submit a data set management request to the specified NVMe namespace. Data set ++ * management operations are designed to optimize interaction with the block ++ * translation layer inside the device. The most common type of operation is ++ * deallocate, which is often referred to as TRIM or UNMAP. ++ * ++ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). ++ * The user must ensure that only one thread submits I/O on a given qpair at any ++ * given time. ++ * ++ * This is a convenience wrapper that will automatically allocate and construct ++ * the correct data buffers. Therefore, ranges does not need to be allocated from ++ * pinned memory and can be placed on the stack. If a higher performance, zero-copy ++ * version of DSM is required, simply build and submit a raw command using ++ * spdk_nvme_ctrlr_cmd_io_raw(). ++ * ++ * \param ns NVMe namespace to submit the DSM request ++ * \param type A bit field constructed from \ref spdk_nvme_dsm_attribute. ++ * \param qpair I/O queue pair to submit the request ++ * \param ranges An array of \ref spdk_nvme_dsm_range elements describing the LBAs ++ * to operate on. ++ * \param num_ranges The number of elements in the ranges array. ++ * \param cb_fn Callback function to invoke when the I/O is completed ++ * \param cb_arg Argument to pass to the callback function ++ * ++ * \return 0 if successfully submitted, negated POSIX errno values otherwise. ++ */ ++int spdk_nvme_ns_cmd_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, ++ uint32_t type, ++ const struct spdk_nvme_dsm_range *ranges, ++ uint16_t num_ranges, ++ spdk_nvme_cmd_cb cb_fn, ++ void *cb_arg); ++/** ++ * \brief Submits a write I/O to the specified NVMe namespace. ++ * ++ * \param ns NVMe namespace to submit the write I/O ++ * \param qpair I/O queue pair to submit the request ++ * \param lba starting LBA to write the data ++ * \param lba_count length (in sectors) for the write operation ++ * \param streamId The stream id for write I/O ++ * \param cb_fn callback function to invoke when the I/O is completed ++ * \param cb_arg argument to pass to the callback function ++ * \param io_flags set flags, defined in nvme_spec.h, for this I/O ++ * \param reset_sgl_fn callback function to reset scattered payload ++ * \param next_sge_fn callback function to iterate each scattered ++ * payload memory segment ++ * ++ * \return 0 if successfully submitted, ENOMEM if an nvme_request ++ * structure cannot be allocated for the I/O request ++ * ++ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). ++ * The user must ensure that only one thread submits I/O on a given qpair at any given time. ++ */ ++int spdk_nvme_ns_cmd_writev_stream(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, ++ uint64_t lba, uint32_t lba_count, uint16_t streamId, ++ spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, ++ spdk_nvme_req_reset_sgl_cb reset_sgl_fn, ++ spdk_nvme_req_next_sge_cb next_sge_fn); ++ ++/** ++ * \brief Send comman to NVMe controller to start or abort a self-test operation. ++ * ++ * \param ctrlr NVMe controller to operate self-test command. ++ * \param nsid Depending on the log page, this may be 0, a namespace identifier, or SPDK_NVME_GLOBAL_NS_TAG. ++ * \param stc self-test code, which specifies the action taken by the Device Self-test command. ++ * \param payload The pointer to the payload buffer. it doesn't work actually. ++ * \param payload_size The size of payload buffer. it doesn't work actually. ++ * \param cb_fn Callback function to invoke when the feature has been retrieved. ++ * \param cb_arg Argument to pass to the callback function. ++ * ++ * \return 0 if successfully submitted, ENOMEM if resources could not be allocated for this request ++ * ++ * This function is thread safe and can be called at any point while the controller is attached to ++ * the SPDK NVMe driver. ++ * ++ * Call \ref spdk_nvme_ctrlr_process_admin_completions() to poll for completion ++ * of commands submitted through this function. ++ * ++ * \sa spdk_nvme_ctrlr_cmd_self_test_operation() ++ */ ++int spdk_nvme_ctrlr_cmd_self_test_operation(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, uint32_t stc, ++ void *payload, uint32_t payload_size, ++ spdk_nvme_cmd_cb cb_fn, void *cb_arg); ++ ++/** ++ *\get I/O queue pair id ++ *\param qpair I/O queue pair to submit the request ++ *\ ++ *\return I/O queue pair id ++ */ ++uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair); ++#endif ++ + /* + * Macro used to register new transports. + */ +diff --git a/include/spdk/thread.h b/include/spdk/thread.h +index 4b7e650..7c52433 100644 +--- a/include/spdk/thread.h ++++ b/include/spdk/thread.h +@@ -42,6 +42,9 @@ + + #include "spdk/cpuset.h" + #include "spdk/queue.h" ++#ifdef SPDK_CONFIG_APP_RW ++#include "rte_config.h" ++#endif + + #ifdef __cplusplus + extern "C" { +@@ -57,6 +60,21 @@ enum spdk_thread_poller_rc { + */ + struct spdk_thread; + ++#ifdef SPDK_CONFIG_APP_RW ++struct spdk_iodev_thread_info { ++ struct spdk_thread *thread; ++ volatile int32_t state; ++ uint32_t bdevnum; ++}; ++extern struct spdk_iodev_thread_info lcore_thread_info[RTE_MAX_LCORE]; ++ ++void spdk_reactors_use(bool useOrNot); ++ ++bool spdk_get_reactor_type(void); ++ ++void spdk_set_thread_exited(struct spdk_thread *thread); ++#endif ++ + /** + * A function repeatedly called on the same spdk_thread. + */ +diff --git a/include/spdk_internal/bdev_stat.h b/include/spdk_internal/bdev_stat.h +new file mode 100644 +index 0000000..f1ba1df +--- /dev/null ++++ b/include/spdk_internal/bdev_stat.h +@@ -0,0 +1,63 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++*/ ++ ++#ifndef LIBSTORAGE_STAT_H ++#define LIBSTORAGE_STAT_H ++ ++#include ++#include ++#include ++#include ++ ++//share memory file name ++#define LIBSTORAGE_STAT_SHM_FILE_NAME "libstorage_stat.shm.\ ++49ce4ec241e017c65812b71b9832a50865f0b7d9b4d5f18d3d03283b" ++ ++//max number of channel+bdev ++#define STAT_MAX_NUM 8192 ++ ++extern int32_t g_libstorage_iostat; ++extern int32_t g_polltime_threshold; ++ ++extern pthread_mutex_t *g_io_stat_map_mutex; ++ ++/* libstorage iostat status */ ++enum libstorage_iostat_status { ++ LIBSTORAGE_IOSTAT_DISABLE = 0, ++ LIBSTORAGE_IOSTAT_ENABLE = 1, ++ LIBSTORAGE_IOSTAT_QUERY = 2, ++}; ++ ++struct libstorage_bdev_io_stat ++{ ++ bool used; ++ uint16_t channel_id; ++ char bdev_name[24]; ++ uint64_t num_read_ops; ++ uint64_t num_write_ops; ++ uint64_t bytes_read; ++ uint64_t bytes_written; ++ uint64_t io_outstanding; ++ uint64_t read_latency_ticks; ++ uint64_t write_latency_ticks; ++ uint64_t io_ticks; ++ bool poll_time_used; ++ uint64_t num_poll_timeout; ++}; ++ ++extern struct libstorage_bdev_io_stat *g_io_stat_map; ++ ++int libstorage_stat_init(void); ++ ++int libstorage_stat_exit(void); ++#endif +diff --git a/include/spdk_internal/debug.h b/include/spdk_internal/debug.h +new file mode 100644 +index 0000000..5d6e623 +--- /dev/null ++++ b/include/spdk_internal/debug.h +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++*/ ++ ++#ifndef LIBSTORAGE_INTERNAL_DEBUG_H ++#define LIBSTORAGE_INTERNAL_DEBUG_H ++#include "spdk/stdinc.h" ++ ++struct spdk_debug_subsystem ++{ ++ const char *name; ++ void (*output)(FILE *file); ++ TAILQ_ENTRY(spdk_debug_subsystem) tailq; ++}; ++ ++void spdk_add_debug_subsystem(struct spdk_debug_subsystem *subsystem); ++ ++/** ++ * \brief Register a new subsystem ++ */ ++#define SPDK_DEBUG_REGISTER(_name, _output) \ ++ struct spdk_debug_subsystem __spdk_debug_subsystem_ ## _name = \ ++ { \ ++ .name = #_name, \ ++ .output = _output, \ ++ }; \ ++ __attribute__((constructor)) static void _name ## _debug_register(void) \ ++ { \ ++ spdk_add_debug_subsystem(&__spdk_debug_subsystem_ ## _name); \ ++ } ++ ++void spdk_output_debug_info(void); ++ ++#endif +diff --git a/include/spdk_internal/thread.h b/include/spdk_internal/thread.h +index 5bab452..7d1811b 100644 +--- a/include/spdk_internal/thread.h ++++ b/include/spdk_internal/thread.h +@@ -80,6 +80,8 @@ struct spdk_poller { + }; + + enum spdk_thread_state { ++ SPDK_THREAD_STATE_INITIALIZED, ++ + /* The thread is pocessing poller and message by spdk_thread_poll(). */ + SPDK_THREAD_STATE_RUNNING, + +diff --git a/mk/spdk.app_vars.mk b/mk/spdk.app_vars.mk +index 059a56e..ff8fad5 100644 +--- a/mk/spdk.app_vars.mk ++++ b/mk/spdk.app_vars.mk +@@ -57,8 +57,10 @@ SPDK_LIB_LINKER_ARGS = \ + -L$(SPDK_ROOT_DIR)/build/lib \ + -Wl,--whole-archive \ + -Wl,--no-as-needed \ ++ -Wl,-Bstatic \ + $(SPDK_DEPLIB_LIST:%=-lspdk_%) \ +- -Wl,--no-whole-archive ++ -Wl,--no-whole-archive \ ++ -Wl,-Bdynamic + + # This is primarily used for unit tests to ensure they link when shared library + # build is enabled. Shared libraries can't get their mock implementation from +-- +2.33.0 + diff --git a/0018-lib-bdev-Add-bdev-support-for-HSAK.patch b/0018-lib-bdev-Add-bdev-support-for-HSAK.patch new file mode 100644 index 0000000..0ed2ef9 --- /dev/null +++ b/0018-lib-bdev-Add-bdev-support-for-HSAK.patch @@ -0,0 +1,953 @@ +From 214b56fd5a6fd40113c3bf912f0cf1ca7a07abae Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Thu, 18 Feb 2021 16:49:16 +0800 +Subject: [PATCH 18/27] lib/bdev: Add bdev support for HSAK + +Signed-off-by: sunshihao +--- + include/spdk/bdev.h | 21 ++- + include/spdk/bdev_module.h | 9 +- + include/spdk/nvme.h | 42 +++--- + include/spdk_internal/bdev_stat.h | 14 +- + include/spdk_internal/debug.h | 5 +- + lib/accel/accel_engine.c | 4 + + lib/bdev/Makefile | 1 + + lib/bdev/bdev.c | 173 ++++++++++++++++++++++-- + lib/bdev/bdev_internal.h | 18 +++ + lib/bdev/bdev_self.c | 217 ++++++++++++++++++++++++++++++ + 10 files changed, 449 insertions(+), 55 deletions(-) + create mode 100644 lib/bdev/bdev_self.c + +diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h +index 2951660..22b87ec 100644 +--- a/include/spdk/bdev.h ++++ b/include/spdk/bdev.h +@@ -131,23 +131,22 @@ typedef void (*LIBSTORAGE_CALLBACK_FUNC)(int32_t cb_status, int32_t sct_code, vo + + typedef struct libstorage_io { + uint8_t *buf; +- struct iovec *iovs; /* array of iovecs to transfer */ +- int iovcnt; /* Number of iovecs in iovs array */ +- int32_t fd; /* File Descriptor */ +- uint16_t opcode; /* r/w */ +- uint16_t streamId; /* Stream ID for IO */ ++ struct iovec *iovs; /* array of iovecs to transfer */ ++ int iovcnt; /* Number of iovecs in iovs array */ ++ int32_t fd; /* File Descriptor */ ++ uint16_t opcode; /* r/w */ ++ uint16_t streamId; /* Stream ID for IO */ + uint8_t pi_action; + uint8_t fua; + uint8_t location; +- bool inSubmit; /* In the I/0 phase or not. Use in nopoll model */ ++ bool inSubmit; /* In the I/0 phase or not. Use in nopoll model */ + uint32_t count; + uint32_t nbytes; + uint64_t offset; + uint8_t *md_buf; + uint32_t md_len; + uint32_t magic; +- /*Save the error code returned by the callback */ +- int32_t err; ++ int32_t err; /* Save the error code returned by the callback */ + int32_t reserved; + LIBSTORAGE_CALLBACK_FUNC cb; + void *cb_arg; +@@ -1395,7 +1394,7 @@ int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset, uint64_t nbytes, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +-#ifdef SPDK_CONFIG_APP_RW ++#ifdef SPDK_CONFIG_APP_RW + /** + * Submit an unmap request to the block device. Unmap is sometimes also called trim or + * deallocate. This notifies the device that the data in the blocks described is no +@@ -1417,10 +1416,10 @@ spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel + void *unmap_d, uint16_t unmap_count, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +-void* ++void * + spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch); + +-void* ++void * + spdk_bdev_io_get_pool(size_t nbytes); + + bool +diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h +index c2fd81d..3ff7e28 100644 +--- a/include/spdk/bdev_module.h ++++ b/include/spdk/bdev_module.h +@@ -247,7 +247,7 @@ enum spdk_bdev_driver_ctx { + SPDK_BDEV_IO_STREAM_ID_1, + }; + +-enum spdk_bdev_io_e2e_pi_action{ ++enum spdk_bdev_io_e2e_pi_action { + IO_NO_PROTECTION = 0, + IO_HALF_WAY_PROTECTION = 1, + IO_E2E_PROTECTION = 2 +@@ -257,19 +257,18 @@ enum spdk_bdev_io_e2e_pi_action{ + #define FLAG_CALCRC 0x08//bit 3 : 1, libstorage calculate crc; 0, app calculate crc + #define FLAG_PRCHK 0x04//bit 2 : 1, enable ctrl guard crc check; 0, disable check + +-enum spdk_bdev_io_fua{ ++enum spdk_bdev_io_fua { + IO_FUA_NO = 0, + IO_FUA_YES = 1 + }; + + void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr); + +-void spdk_bdev_fail_ctrlr(const char* traddr); ++void spdk_bdev_fail_ctrlr(const char *traddr); + + void *nvme_channel_get_group(void *io_ch); + +-enum reqLocation_E +-{ ++enum reqLocation_E { + LOCAL_RECEIVE_APP = 1, + LOCAL_LIBSTORAGE_SUBMIT = 2, + LOCAL_LIBSTORAGE_ASYNC_REQ = 3, +diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h +index 8e05139..adda642 100644 +--- a/include/spdk/nvme.h ++++ b/include/spdk/nvme.h +@@ -3454,11 +3454,11 @@ struct nvme_ctrlr_info { + uint16_t ssvid; /* Subsystem vendor id */ + uint16_t ctrlid; /* Controller id */ + uint16_t trtype; /* Transport type */ +- uint16_t support_ns :1; /* Supports the Namespace Management and Namespace Attachment commands */ +- uint16_t directives :1; /* Supports Directives */ +- uint16_t streams :1; /* Supports Streams Directives */ +- uint16_t dsm :1; /* Supports the controller supports the Dataset Management command */ +- uint16_t reserved :12; ++ uint16_t support_ns : 1; /* Supports the Namespace Management and Namespace Attachment commands */ ++ uint16_t directives : 1; /* Supports Directives */ ++ uint16_t streams : 1; /* Supports Streams Directives */ ++ uint16_t dsm : 1; /* Supports the controller supports the Dataset Management command */ ++ uint16_t reserved : 12; + uint16_t reserved2[3]; + }; + +@@ -3468,23 +3468,25 @@ struct spdk_bdev; + struct nvme_bdev; + struct spdk_nvme_ns; + struct spdk_nvme_qpair; +-int32_t nvme_ctrlr_get_info(const char* ctrlName, struct nvme_ctrlr_info** ppCtrlr); +-struct spdk_nvme_ctrlr* spdk_nvme_ctrlr_get_by_name(const char* ctrlname); +-struct spdk_nvme_ctrlr* spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr); +-struct nvme_bdev_ctrlr* nvme_ctrlr_get_by_name(const char* ctrlname); +-void nvme_ctrlr_clear_iostat_by_name(const char* ctrlname); ++int32_t nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr); ++struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_name(const char *ctrlname); ++struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr); ++struct nvme_bdev_ctrlr *nvme_ctrlr_get_by_name(const char *ctrlname); ++void nvme_ctrlr_clear_iostat_by_name(const char *ctrlname); + void nvme_ctrlr_clear_iostat_all(void); +-struct nvme_bdev_ctrlr* bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev); +-struct spdk_nvme_ns* bdev_nvme_get_ns(struct nvme_bdev *nbdev); ++struct nvme_bdev_ctrlr *bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev); ++struct spdk_nvme_ns *bdev_nvme_get_ns(struct nvme_bdev *nbdev); + void bdev_nvme_update_block_by_nvme_ctrlr(struct spdk_nvme_ctrlr *ctrlr); + int bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_ctrlr, uint32_t nsid); + bool spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid); + void spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid); + void spdk_nvme_ctrlr_set_shutdown(struct spdk_nvme_ctrlr *ctrlr, bool is_shutdown); + bool spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr); +-int spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, struct spdk_nvme_health_information_page *health_payload); +-int spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, struct spdk_nvme_error_information_entry *error_info); +-struct spdk_nvme_ctrlr_opts* spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr); ++int spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, ++ struct spdk_nvme_health_information_page *health_payload); ++int spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, ++ struct spdk_nvme_error_information_entry *error_info); ++struct spdk_nvme_ctrlr_opts *spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr); + int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *nsdata); + bool spdk_nvme_ns_is_allocated(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid); + bool spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr); +@@ -3492,7 +3494,8 @@ bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_streams_supported(struct spdk_nvme_ctrlr *ctrlr); +-int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, void *payload); ++int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, ++ void *payload); + int32_t spdk_nvme_ctrlr_enable_streams(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid); + int32_t spdk_nvme_ctrlr_ret_streams_param(struct spdk_nvme_ctrlr *ctrlr, void *payload); + int32_t spdk_nvme_ns_ret_streams_param(struct spdk_nvme_ns *ns, void *payload); +@@ -3651,9 +3654,10 @@ int spdk_nvme_ns_cmd_writev_stream(struct spdk_nvme_ns *ns, struct spdk_nvme_qpa + * + * \sa spdk_nvme_ctrlr_cmd_self_test_operation() + */ +-int spdk_nvme_ctrlr_cmd_self_test_operation(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, uint32_t stc, +- void *payload, uint32_t payload_size, +- spdk_nvme_cmd_cb cb_fn, void *cb_arg); ++int spdk_nvme_ctrlr_cmd_self_test_operation(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, ++ uint32_t stc, ++ void *payload, uint32_t payload_size, ++ spdk_nvme_cmd_cb cb_fn, void *cb_arg); + + /** + *\get I/O queue pair id +diff --git a/include/spdk_internal/bdev_stat.h b/include/spdk_internal/bdev_stat.h +index f1ba1df..58a5102 100644 +--- a/include/spdk_internal/bdev_stat.h ++++ b/include/spdk_internal/bdev_stat.h +@@ -9,21 +9,18 @@ + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +-*/ ++ */ + + #ifndef LIBSTORAGE_STAT_H + #define LIBSTORAGE_STAT_H + +-#include +-#include +-#include +-#include ++#include "spdk/stdinc.h" + +-//share memory file name ++/* share memory file name */ + #define LIBSTORAGE_STAT_SHM_FILE_NAME "libstorage_stat.shm.\ + 49ce4ec241e017c65812b71b9832a50865f0b7d9b4d5f18d3d03283b" + +-//max number of channel+bdev ++/* max number of channel+bdev */ + #define STAT_MAX_NUM 8192 + + extern int32_t g_libstorage_iostat; +@@ -38,8 +35,7 @@ enum libstorage_iostat_status { + LIBSTORAGE_IOSTAT_QUERY = 2, + }; + +-struct libstorage_bdev_io_stat +-{ ++struct libstorage_bdev_io_stat { + bool used; + uint16_t channel_id; + char bdev_name[24]; +diff --git a/include/spdk_internal/debug.h b/include/spdk_internal/debug.h +index 5d6e623..cf9b9e7 100644 +--- a/include/spdk_internal/debug.h ++++ b/include/spdk_internal/debug.h +@@ -9,14 +9,13 @@ + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +-*/ ++ */ + + #ifndef LIBSTORAGE_INTERNAL_DEBUG_H + #define LIBSTORAGE_INTERNAL_DEBUG_H + #include "spdk/stdinc.h" + +-struct spdk_debug_subsystem +-{ ++struct spdk_debug_subsystem { + const char *name; + void (*output)(FILE *file); + TAILQ_ENTRY(spdk_debug_subsystem) tailq; +diff --git a/lib/accel/accel_engine.c b/lib/accel/accel_engine.c +index ca3e248..865128a 100644 +--- a/lib/accel/accel_engine.c ++++ b/lib/accel/accel_engine.c +@@ -745,7 +745,11 @@ spdk_accel_engine_module_finish(void) + } + + if (g_accel_engine_module->module_fini) { ++#ifndef SPDK_CONFIG_APP_RW + spdk_thread_send_msg(spdk_get_thread(), g_accel_engine_module->module_fini, NULL); ++#else ++ g_accel_engine_module->module_fini(NULL); ++#endif + } else { + spdk_accel_engine_module_finish(); + } +diff --git a/lib/bdev/Makefile b/lib/bdev/Makefile +index 795fa6e..c23caf1 100644 +--- a/lib/bdev/Makefile ++++ b/lib/bdev/Makefile +@@ -42,6 +42,7 @@ CFLAGS += -I$(CONFIG_VTUNE_DIR)/include -I$(CONFIG_VTUNE_DIR)/sdk/src/ittnotify + endif + + C_SRCS = bdev.c bdev_rpc.c bdev_zone.c part.c scsi_nvme.c ++C_SRCS-$(CONFIG_APP_RW) += bdev_self.c + C_SRCS-$(CONFIG_VTUNE) += vtune.c + LIBNAME = bdev + +diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c +index 2a642d6..bf102bb 100644 +--- a/lib/bdev/bdev.c ++++ b/lib/bdev/bdev.c +@@ -50,6 +50,13 @@ + #include "spdk/log.h" + #include "spdk/string.h" + ++#ifdef SPDK_CONFIG_APP_RW ++#include "spdk/stdinc.h" ++#include "spdk/barrier.h" ++#include ++#include "spdk_internal/bdev_stat.h" ++#endif ++ + #include "bdev_internal.h" + + #ifdef SPDK_CONFIG_VTUNE +@@ -1377,8 +1384,12 @@ spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg) + + g_bdev_mgr.buf_small_pool = spdk_mempool_create(mempool_name, + g_bdev_opts.small_buf_pool_size, ++#ifdef SPDK_CONFIG_APP_RW ++ SPDK_BDEV_SMALL_BUF_MAX_SIZE + SPDK_BDEV_SMALL_BUF_WITH_MAX_MD, ++#else + SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) + + SPDK_BDEV_POOL_ALIGNMENT, ++#endif + cache_size, + SPDK_ENV_SOCKET_ID_ANY); + if (!g_bdev_mgr.buf_small_pool) { +@@ -1392,8 +1403,12 @@ spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg) + + g_bdev_mgr.buf_large_pool = spdk_mempool_create(mempool_name, + g_bdev_opts.large_buf_pool_size, ++#ifdef SPDK_CONFIG_APP_RW ++ SPDK_BDEV_LARGE_BUF_MAX_SIZE + SPDK_BDEV_LARGE_BUF_WITH_MAX_MD, ++#else + SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_LARGE_BUF_MAX_SIZE) + + SPDK_BDEV_POOL_ALIGNMENT, ++#endif + cache_size, + SPDK_ENV_SOCKET_ID_ANY); + if (!g_bdev_mgr.buf_large_pool) { +@@ -1561,7 +1576,11 @@ bdev_finish_unregister_bdevs_iter(void *cb_arg, int bdeverrno) + * (like bdev part free) that will use this bdev (or private bdev driver ctx data) + * after returning. + */ ++#ifdef SPDK_CONFIG_APP_RW ++ bdev_module_finish_iter(NULL); ++#else + spdk_thread_send_msg(spdk_get_thread(), bdev_module_finish_iter, NULL); ++#endif + return; + } + +@@ -2296,6 +2315,17 @@ _bdev_io_submit(void *ctx) + bdev_io->internal.submit_tsc = tsc; + spdk_trace_record_tsc(tsc, TRACE_BDEV_IO_START, 0, 0, (uintptr_t)bdev_io, bdev_io->type); + ++#ifdef SPDK_CONFIG_APP_RW ++ struct spdk_bdev_io_stat *stat = &bdev_ch->stat; ++ if (bdev_ch->io_outstanding > 0) { ++ stat->pre_ticks = stat->cur_ticks; ++ stat->cur_ticks = tsc; ++ stat->io_ticks += stat->cur_ticks - stat->pre_ticks; ++ } else { ++ stat->cur_ticks = tsc; ++ } ++#endif ++ + if (spdk_likely(bdev_ch->flags == 0)) { + bdev_io_do_submit(bdev_ch, bdev_io); + return; +@@ -2307,6 +2337,9 @@ _bdev_io_submit(void *ctx) + if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT) && + bdev_abort_queued_io(&bdev->internal.qos->queued, bdev_io->u.abort.bio_to_abort)) { + _bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); ++#ifdef SPDK_CONFIG_APP_RW ++ spdk_bdev_set_io_location(bdev_io->driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NOMEM); ++#endif + } else { + TAILQ_INSERT_TAIL(&bdev->internal.qos->queued, bdev_io, internal.link); + bdev_qos_io_submit(bdev_ch, bdev->internal.qos); +@@ -2652,6 +2685,7 @@ bdev_desc_free(struct spdk_bdev_desc *desc) + pthread_mutex_destroy(&desc->mutex); + free(desc->media_events_buffer); + free(desc); ++ desc = NULL; + } + + static void +@@ -2837,6 +2871,9 @@ bdev_channel_create(void *io_device, void *ctx_buf) + ch->flags = 0; + ch->shared_resource = shared_resource; + ++#ifdef SPDK_CONFIG_APP_RW ++ spdk_bdev_init_iostat(ch, ch->bdev, ch->channel, &ch->stat); ++#endif + TAILQ_INIT(&ch->io_submitted); + TAILQ_INIT(&ch->io_locked); + +@@ -3075,6 +3112,10 @@ bdev_channel_destroy(void *io_device, void *ctx_buf) + spdk_histogram_data_free(ch->histogram); + } + ++#ifdef SPDK_CONFIG_APP_RW ++ spdk_bdev_destroy_iostat(ch, ch->bdev, ch->channel); ++#endif ++ + bdev_channel_destroy_resource(ch); + } + +@@ -3527,6 +3568,26 @@ _bdev_io_check_md_buf(const struct iovec *iovs, const void *md_buf) + return _is_buf_allocated(iovs) == (md_buf != NULL); + } + ++static void ++bdev_build_contig_io(uint8_t type, void *buf, void *md_buf, uint64_t offset_blocks, ++ uint64_t num_blocks, ++ struct libstorage_io *io, struct spdk_bdev_io *bdev_io) ++{ ++ bdev_io->type = type; ++ bdev_io->u.contig.buf = buf; ++ bdev_io->u.contig.md_buf = md_buf; ++ bdev_io->u.contig.offset_blocks = offset_blocks; ++ bdev_io->u.contig.num_blocks = num_blocks; ++ bdev_io->u.contig.nbytes = io->nbytes; ++ bdev_io->u.contig.md_len = io->md_len; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_PI] = io->pi_action; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_FUA] = io->fua; ++ if (type == SPDK_BDEV_IO_TYPE_WRITE_NVME) { ++ bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_0] = io->streamId & 0xFF; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_1] = (io->streamId >> 8) & 0xFF; ++ } ++} ++ + static int + bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf, + void *md_buf, int64_t offset_blocks, uint64_t num_blocks, +@@ -3547,6 +3608,7 @@ bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch + + bdev_io->internal.ch = channel; + bdev_io->internal.desc = desc; ++#ifndef SPDK_CONFIG_APP_RW + bdev_io->type = SPDK_BDEV_IO_TYPE_READ; + bdev_io->u.bdev.iovs = &bdev_io->iov; + bdev_io->u.bdev.iovs[0].iov_base = buf; +@@ -3555,6 +3617,12 @@ bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch + bdev_io->u.bdev.md_buf = md_buf; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->u.bdev.offset_blocks = offset_blocks; ++#else ++ struct libstorage_io *io = (struct libstorage_io *)cb_arg; ++ bdev_build_contig_io(SPDK_BDEV_IO_TYPE_READ_NVME, buf, md_buf, offset_blocks, num_blocks, ++ io, bdev_io); ++ cb_arg = &io->location; ++#endif + bdev_io_init(bdev_io, bdev, cb_arg, cb); + + bdev_io_submit(bdev_io); +@@ -3592,7 +3660,7 @@ spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channe + struct iovec iov = { + .iov_base = buf, + }; +- ++#ifndef SPDK_CONFIG_APP_RW + if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } +@@ -3600,7 +3668,7 @@ spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channe + if (!_bdev_io_check_md_buf(&iov, md_buf)) { + return -EINVAL; + } +- ++#endif + return bdev_read_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks, + cb, cb_arg); + } +@@ -3647,6 +3715,14 @@ bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c + bdev_io->u.bdev.md_buf = md_buf; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->u.bdev.offset_blocks = offset_blocks; ++#ifdef SPDK_CONFIG_APP_RW ++ struct libstorage_io *io = (struct libstorage_io *)cb_arg; ++ bdev_io->type = SPDK_BDEV_IO_TYPE_READV_NVME; ++ bdev_io->u.bdev.nbytes = io->nbytes; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_PI] = io->pi_action; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_FUA] = io->fua; ++ cb_arg = &io->location; ++#endif + bdev_io_init(bdev_io, bdev, cb_arg, cb); + + bdev_io_submit(bdev_io); +@@ -3668,6 +3744,7 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) + { ++#ifndef SPDK_CONFIG_APP_RW + if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } +@@ -3675,7 +3752,7 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann + if (!_bdev_io_check_md_buf(iov, md_buf)) { + return -EINVAL; + } +- ++#endif + return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks, + num_blocks, cb, cb_arg); + } +@@ -3689,9 +3766,11 @@ bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c + struct spdk_bdev_io *bdev_io; + struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); + ++#ifndef SPDK_CONFIG_APP_RW + if (!desc->write) { + return -EBADF; + } ++#endif + + if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { + return -EINVAL; +@@ -3704,6 +3783,7 @@ bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c + + bdev_io->internal.ch = channel; + bdev_io->internal.desc = desc; ++#ifndef SPDK_CONFIG_APP_RW + bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; + bdev_io->u.bdev.iovs = &bdev_io->iov; + bdev_io->u.bdev.iovs[0].iov_base = buf; +@@ -3712,6 +3792,12 @@ bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c + bdev_io->u.bdev.md_buf = md_buf; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->u.bdev.offset_blocks = offset_blocks; ++#else ++ LIBSTORAGE_IO_T *io = (struct libstorage_io *)cb_arg; ++ bdev_build_contig_io(SPDK_BDEV_IO_TYPE_WRITE_NVME, buf, md_buf, offset_blocks, num_blocks, ++ io, bdev_io); ++ cb_arg = &io->location; ++#endif + bdev_io_init(bdev_io, bdev, cb_arg, cb); + + bdev_io_submit(bdev_io); +@@ -3751,6 +3837,7 @@ spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann + .iov_base = buf, + }; + ++#ifndef SPDK_CONFIG_APP_RW + if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } +@@ -3758,7 +3845,7 @@ spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann + if (!_bdev_io_check_md_buf(&iov, md_buf)) { + return -EINVAL; + } +- ++#endif + return bdev_write_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks, + cb, cb_arg); + } +@@ -3773,9 +3860,11 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel * + struct spdk_bdev_io *bdev_io; + struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); + ++#ifndef SPDK_CONFIG_APP_RW + if (!desc->write) { + return -EBADF; + } ++#endif + + if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { + return -EINVAL; +@@ -3794,6 +3883,16 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel * + bdev_io->u.bdev.md_buf = md_buf; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->u.bdev.offset_blocks = offset_blocks; ++#ifdef SPDK_CONFIG_APP_RW ++ struct libstorage_io *io = (struct libstorage_io *)cb_arg; ++ bdev_io->type = SPDK_BDEV_IO_TYPE_WRITEV_NVME; ++ bdev_io->u.bdev.nbytes = io->nbytes; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_PI] = io->pi_action; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_FUA] = io->fua; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_0] = io->streamId & 0xFF; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_1] = (io->streamId >> 8) & 0xFF; ++ cb_arg = &io->location; ++#endif + bdev_io_init(bdev_io, bdev, cb_arg, cb); + + bdev_io_submit(bdev_io); +@@ -3832,6 +3931,7 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) + { ++#ifndef SPDK_CONFIG_APP_RW + if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } +@@ -3839,7 +3939,7 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan + if (!_bdev_io_check_md_buf(iov, md_buf)) { + return -EINVAL; + } +- ++#endif + return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks, + num_blocks, cb, cb_arg); + } +@@ -5111,8 +5211,16 @@ bdev_io_complete(void *ctx) + default: + break; + } ++#ifdef SPDK_CONFIG_APP_RW ++ bdev_io_stat_update(bdev_io, tsc, &bdev_io->internal.ch->stat); ++#endif + } + ++#ifdef SPDK_CONFIG_APP_RW ++ bdev_update_iostat_map(bdev_io, tsc, &bdev_io->internal.ch->stat, bdev_io->internal.ch->channel, ++ bdev_io->internal.ch->io_outstanding); ++#endif ++ + #ifdef SPDK_CONFIG_VTUNE + uint64_t now_tsc = spdk_get_ticks(); + if (now_tsc > (bdev_io->internal.ch->start_tsc + bdev_io->internal.ch->interval_tsc)) { +@@ -5134,7 +5242,9 @@ bdev_io_complete(void *ctx) + #endif + + assert(bdev_io->internal.cb != NULL); ++#ifndef SPDK_CONFIG_APP_RW + assert(spdk_get_thread() == spdk_bdev_io_get_thread(bdev_io)); ++#endif + + bdev_io->internal.cb(bdev_io, bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS, + bdev_io->internal.caller_ctx); +@@ -5208,6 +5318,9 @@ spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status sta + + if (spdk_unlikely(status == SPDK_BDEV_IO_STATUS_NOMEM)) { + TAILQ_INSERT_HEAD(&shared_resource->nomem_io, bdev_io, internal.link); ++#ifdef SPDK_CONFIG_APP_RW ++ spdk_bdev_set_io_location(bdev_io->driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NOMEM); ++#endif + /* + * Wait for some of the outstanding I/O to complete before we + * retry any of the nomem_io. Normally we will wait for +@@ -5613,8 +5726,8 @@ bdev_unregister_unsafe(struct spdk_bdev *bdev) + * immediately closes its descriptor. + */ + desc->refs++; +- spdk_thread_send_msg(desc->thread, _remove_notify, desc); + pthread_mutex_unlock(&desc->mutex); ++ spdk_thread_send_msg(desc->thread, _remove_notify, desc); + } + + /* If there are no descriptors, proceed removing the bdev */ +@@ -5858,9 +5971,9 @@ spdk_bdev_close(struct spdk_bdev_desc *desc) + + SPDK_DEBUGLOG(bdev, "Closing descriptor %p for bdev %s on thread %p\n", desc, bdev->name, + spdk_get_thread()); +- ++#ifndef SPDK_CONFIG_APP_RW + assert(desc->thread == spdk_get_thread()); +- ++#endif + spdk_poller_unregister(&desc->io_timeout_poller); + + pthread_mutex_lock(&bdev->internal.mutex); +@@ -6909,6 +7022,50 @@ bdev_unlock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch, + return 0; + } + ++#ifdef SPDK_CONFIG_APP_RW ++void * ++spdk_bdev_io_get_pool(size_t nbytes) ++{ ++ struct spdk_mempool *pool = NULL; ++ ++ if (nbytes == 0 || nbytes > SPDK_BDEV_LARGE_BUF_MAX_SIZE + SPDK_BDEV_LARGE_BUF_WITH_MAX_MD) { ++ SPDK_ERRLOG("The size of buffer[%zu] is incorrect!\n", nbytes); ++ return NULL; ++ } ++ ++ if (nbytes <= SPDK_BDEV_SMALL_BUF_MAX_SIZE + SPDK_BDEV_SMALL_BUF_WITH_MAX_MD) { ++ pool = g_bdev_mgr.buf_small_pool; ++ } else { ++ pool = g_bdev_mgr.buf_large_pool; ++ } ++ ++ return pool; ++} ++ ++void * ++spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch) ++{ ++ struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(io_ch); ++ struct spdk_io_channel *under_io_ch = ch->channel; ++ void *nvme_io_ch = spdk_io_channel_get_ctx(under_io_ch); ++ ++ return nvme_channel_get_group(nvme_io_ch); ++} ++ ++bool ++spdk_bdev_have_io_in_channel(struct spdk_io_channel *io_ch) ++{ ++ struct spdk_bdev_channel *bdev_ch = NULL; ++ ++ if (io_ch != NULL) { ++ bdev_ch = spdk_io_channel_get_ctx(io_ch); ++ return bdev_ch->io_outstanding != 0; ++ } ++ ++ return false; ++} ++#endif ++ + SPDK_LOG_REGISTER_COMPONENT(bdev) + + SPDK_TRACE_REGISTER_FN(bdev_trace, "bdev", TRACE_GROUP_BDEV) +diff --git a/lib/bdev/bdev_internal.h b/lib/bdev/bdev_internal.h +index d1fa6e6..871387f 100644 +--- a/lib/bdev/bdev_internal.h ++++ b/lib/bdev/bdev_internal.h +@@ -47,4 +47,22 @@ void bdev_io_init(struct spdk_bdev_io *bdev_io, struct spdk_bdev *bdev, void *cb + + void bdev_io_submit(struct spdk_bdev_io *bdev_io); + ++#ifdef SPDK_CONFIG_APP_RW ++void ++spdk_bdev_init_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, ++ struct spdk_io_channel *io_ch, ++ struct spdk_bdev_io_stat *stat); ++ ++void ++spdk_bdev_destroy_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, ++ struct spdk_io_channel *io_ch); ++ ++void ++bdev_io_stat_update(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat); ++ ++void ++bdev_update_iostat_map(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat, ++ struct spdk_io_channel *channel, uint64_t io_outstanding); ++#endif ++ + #endif /* SPDK_BDEV_INTERNAL_H */ +diff --git a/lib/bdev/bdev_self.c b/lib/bdev/bdev_self.c +new file mode 100644 +index 0000000..7050c30 +--- /dev/null ++++ b/lib/bdev/bdev_self.c +@@ -0,0 +1,217 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#include "bdev_internal.h" ++ ++#include "spdk/stdinc.h" ++#include "spdk/bdev.h" ++#include "spdk/bdev_module.h" ++#include "spdk/env.h" ++#include "spdk/nvme_spec.h" ++#include "spdk/log.h" ++ ++#include ++#include "spdk_internal/bdev_stat.h" ++ ++pthread_mutex_t *g_io_stat_map_mutex = NULL; ++/* share memory for libstorage iostat */ ++struct libstorage_bdev_io_stat *g_io_stat_map; ++/* libstorage iostat enable or disable switch */ ++int32_t g_libstorage_iostat = 0; ++int32_t g_polltime_threshold = 0; ++ ++void ++spdk_bdev_init_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, ++ struct spdk_io_channel *io_ch, ++ struct spdk_bdev_io_stat *stat) ++{ ++ int i = 0; ++ bool find = false; ++ uint16_t channel_id; ++ ++ if (bdev->fn_table->get_io_channel_id) { ++ channel_id = bdev->fn_table->get_io_channel_id(io_ch); ++ for (i = 0; i < STAT_MAX_NUM; i++) { ++ /* Reuse last record */ ++ if (g_io_stat_map[i].used && !strcmp(g_io_stat_map[i].bdev_name, bdev->name) ++ && g_io_stat_map[i].channel_id == channel_id) { ++ stat->io_stat_id = i; ++ find = true; ++ g_io_stat_map[i].num_read_ops = 0; ++ g_io_stat_map[i].num_write_ops = 0; ++ g_io_stat_map[i].bytes_read = 0; ++ g_io_stat_map[i].bytes_written = 0; ++ g_io_stat_map[i].io_outstanding = 0; ++ g_io_stat_map[i].read_latency_ticks = 0; ++ g_io_stat_map[i].write_latency_ticks = 0; ++ g_io_stat_map[i].io_ticks = 0; ++ g_io_stat_map[i].poll_time_used = false; ++ g_io_stat_map[i].num_poll_timeout = 0; ++ break; ++ } ++ } ++ if (!find) { ++ /* Add lock when multi thread or process */ ++ if (pthread_mutex_lock(g_io_stat_map_mutex) == EOWNERDEAD) { ++ if (pthread_mutex_consistent(g_io_stat_map_mutex)) { ++ SPDK_WARNLOG("[libstorage] the iostat_map process mutex is not normal any more.\n"); ++ } ++ } ++ for (i = 0; i < STAT_MAX_NUM; i++) { ++ /* Find unused record, allocate it to this channel */ ++ if (!g_io_stat_map[i].used) { ++ g_io_stat_map[i].used = true; ++ if (strncpy_s(g_io_stat_map[i].bdev_name, sizeof(g_io_stat_map[i].bdev_name), bdev->name, ++ sizeof(g_io_stat_map[i].bdev_name) - 1) != 0) { ++ SPDK_ERRLOG("[libstorage] string copy failed.\n"); ++ } ++ g_io_stat_map[i].channel_id = channel_id; ++ stat->io_stat_id = i; ++ find = true; ++ break; ++ } ++ } ++ pthread_mutex_unlock(g_io_stat_map_mutex); ++ } ++ if (!find) { ++ stat->io_stat_id = -1; ++ SPDK_ERRLOG("channel %u bdev %s allocate io stat memory failed.\n", channel_id, bdev->name); ++ } ++ } else { ++ /* It is not nvme disk, can use iostat. So do not do IO statistics in libstorage. */ ++ stat->io_stat_id = -1; ++ } ++ stat->start_tsc = spdk_get_ticks(); ++ stat->interval_tsc = spdk_get_ticks_hz() / 10; ++} ++ ++void ++spdk_bdev_destroy_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, ++ struct spdk_io_channel *io_ch) ++{ ++ int i = 0; ++ uint16_t channel_id; ++ ++ if (bdev->fn_table->get_io_channel_id) { ++ channel_id = bdev->fn_table->get_io_channel_id(io_ch); ++ for (i = 0; i < STAT_MAX_NUM; i++) { ++ /* clear channel iostat info in share memory */ ++ if (g_io_stat_map[i].used && !strcmp(g_io_stat_map[i].bdev_name, bdev->name) ++ && g_io_stat_map[i].channel_id == channel_id) { ++ g_io_stat_map[i].channel_id = 0; ++ memset(g_io_stat_map[i].bdev_name, 0, sizeof(g_io_stat_map[i].bdev_name)); ++ g_io_stat_map[i].num_read_ops = 0; ++ g_io_stat_map[i].num_write_ops = 0; ++ g_io_stat_map[i].bytes_read = 0; ++ g_io_stat_map[i].bytes_written = 0; ++ g_io_stat_map[i].io_outstanding = 0; ++ g_io_stat_map[i].read_latency_ticks = 0; ++ g_io_stat_map[i].write_latency_ticks = 0; ++ g_io_stat_map[i].io_ticks = 0; ++ /* used flag set false in last avoid race in channel create */ ++ g_io_stat_map[i].used = false; ++ g_io_stat_map[i].poll_time_used = false; ++ g_io_stat_map[i].num_poll_timeout = 0; ++ } ++ } ++ } ++} ++ ++int ++spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, ++ void *unmap_d, uint16_t unmap_count, ++ spdk_bdev_io_completion_cb cb, void *cb_arg) ++{ ++ struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); ++ struct spdk_bdev_io *bdev_io = NULL; ++ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); ++ ++ bdev_io = bdev_channel_get_io(channel); ++ if (bdev_io == NULL) { ++ return -ENOMEM; ++ } ++ ++ bdev_io->internal.ch = channel; ++ bdev_io->internal.desc = desc; ++ bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP_BLOCKS; ++ bdev_io->u.contig.buf = unmap_d; ++ bdev_io->u.contig.num_blocks = unmap_count; ++ bdev_io_init(bdev_io, bdev, cb_arg, cb); ++ ++ bdev_io_submit(bdev_io); ++ return 0; ++} ++ ++void ++bdev_io_stat_update(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat) ++{ ++ switch (bdev_io->type) { ++ case SPDK_BDEV_IO_TYPE_READ_NVME: ++ stat->bytes_read += bdev_io->u.contig.nbytes + bdev_io->u.contig.md_len; ++ stat->num_read_ops++; ++ stat->read_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ break; ++ case SPDK_BDEV_IO_TYPE_WRITE_NVME: ++ stat->bytes_written += bdev_io->u.contig.nbytes + bdev_io->u.contig.md_len; ++ stat->num_write_ops++; ++ stat->write_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ break; ++ case SPDK_BDEV_IO_TYPE_READV_NVME: ++ stat->bytes_read += bdev_io->u.bdev.nbytes; ++ stat->num_read_ops++; ++ stat->read_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ break; ++ case SPDK_BDEV_IO_TYPE_WRITEV_NVME: ++ stat->bytes_written += bdev_io->u.bdev.nbytes; ++ stat->num_write_ops++; ++ stat->write_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ break; ++ default: ++ break; ++ } ++} ++ ++void ++bdev_update_iostat_map(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat, ++ struct spdk_io_channel *channel, uint64_t io_outstanding) ++{ ++ uint64_t num_poll_timeout; ++ ++ stat->pre_ticks = stat->cur_ticks; ++ stat->cur_ticks = tsc; ++ stat->io_ticks += stat->cur_ticks - stat->pre_ticks; ++ ++ if (g_libstorage_iostat) { ++ if ((stat->io_stat_id >= 0) && (stat->io_stat_id < STAT_MAX_NUM)) { ++ g_io_stat_map[stat->io_stat_id].io_outstanding = io_outstanding; ++ if (tsc > (stat->start_tsc + stat->interval_tsc)) { ++ g_io_stat_map[stat->io_stat_id].num_read_ops = stat->num_read_ops; ++ g_io_stat_map[stat->io_stat_id].num_write_ops = stat->num_write_ops; ++ g_io_stat_map[stat->io_stat_id].bytes_read = stat->bytes_read; ++ g_io_stat_map[stat->io_stat_id].bytes_written = stat->bytes_written; ++ g_io_stat_map[stat->io_stat_id].read_latency_ticks = stat->read_latency_ticks; ++ g_io_stat_map[stat->io_stat_id].write_latency_ticks = stat->write_latency_ticks; ++ g_io_stat_map[stat->io_stat_id].io_ticks = stat->io_ticks; ++ ++ stat->start_tsc = tsc; ++ ++ if (g_polltime_threshold) { ++ num_poll_timeout = bdev_io->bdev->fn_table->get_timeout_count ? \ ++ bdev_io->bdev->fn_table->get_timeout_count(channel) : 0; ++ g_io_stat_map[stat->io_stat_id].poll_time_used = true; ++ g_io_stat_map[stat->io_stat_id].num_poll_timeout = num_poll_timeout; ++ } ++ } ++ } ++ } ++} +-- +2.33.0 + diff --git a/0019-lib-env_dpdk-Add-config-args-for-HSAK.patch b/0019-lib-env_dpdk-Add-config-args-for-HSAK.patch new file mode 100644 index 0000000..c68041a --- /dev/null +++ b/0019-lib-env_dpdk-Add-config-args-for-HSAK.patch @@ -0,0 +1,176 @@ +From c359ae7d0ee0593b032f2e2543442fa9f6df3827 Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Mon, 22 Feb 2021 19:58:17 +0800 +Subject: [PATCH 19/27] lib/env_dpdk: Add config args for HSAK + +Signed-off-by: sunshihao +--- + lib/env_dpdk/init.c | 7 +++++++ + lib/event/reactor.c | 36 +++++++++++++++++++++++++++++--- + lib/jsonrpc/jsonrpc_internal.h | 2 +- + lib/jsonrpc/jsonrpc_server_tcp.c | 4 ++-- + 4 files changed, 43 insertions(+), 6 deletions(-) + +diff --git a/lib/env_dpdk/init.c b/lib/env_dpdk/init.c +index e6464c9..3bb713d 100644 +--- a/lib/env_dpdk/init.c ++++ b/lib/env_dpdk/init.c +@@ -398,6 +398,13 @@ build_eal_cmdline(const struct spdk_env_opts *opts) + + #ifdef __linux__ + ++#ifdef SPDK_CONFIG_APP_RW ++ /* set IOVA use phys addr and keep same with DPDK16.11 */ ++ args = push_arg(args, &argcount, _sprintf_alloc("--iova-mode=pa")); ++ if (args == NULL) { ++ return -1; ++ } ++#endif + if (opts->iova_mode) { + args = push_arg(args, &argcount, _sprintf_alloc("--iova-mode=%s", opts->iova_mode)); + if (args == NULL) { +diff --git a/lib/event/reactor.c b/lib/event/reactor.c +index 724371c..9fb9e0f 100644 +--- a/lib/event/reactor.c ++++ b/lib/event/reactor.c +@@ -42,6 +42,8 @@ + #include "spdk/util.h" + #include "spdk/string.h" + #include "spdk/fd_group.h" ++#include "spdk_internal/thread.h" ++#include "spdk/conf.h" + + #ifdef __linux__ + #include +@@ -54,6 +56,10 @@ + + #define SPDK_EVENT_BATCH_SIZE 8 + ++#ifdef SPDK_CONFIG_APP_RW ++struct spdk_iodev_thread_info lcore_thread_info[RTE_MAX_LCORE]; ++#endif ++ + static struct spdk_reactor *g_reactors; + static uint32_t g_reactor_count; + static struct spdk_cpuset g_reactor_core_mask; +@@ -62,6 +68,7 @@ static enum spdk_reactor_state g_reactor_state = SPDK_REACTOR_STATE_UNINITIALIZE + static bool g_framework_context_switch_monitor_enabled = true; + + static struct spdk_mempool *g_spdk_event_mempool = NULL; ++static int16_t g_reactor_batch_size = SPDK_EVENT_BATCH_SIZE; + + TAILQ_HEAD(, spdk_scheduler) g_scheduler_list + = TAILQ_HEAD_INITIALIZER(g_scheduler_list); +@@ -250,6 +257,20 @@ spdk_reactors_init(void) + uint32_t i, current_core; + char mempool_name[32]; + ++#ifdef SPDK_CONFIG_APP_RW ++ struct spdk_conf_section *sp; ++ sp = spdk_conf_find_section(NULL, "Reactor"); ++ if (sp != 0) { ++ g_reactor_batch_size = spdk_conf_section_get_intval(sp, "BatchSize"); ++ if (g_reactor_batch_size <= 0 || g_reactor_batch_size > SPDK_EVENT_BATCH_SIZE) { ++ g_reactor_batch_size = SPDK_EVENT_BATCH_SIZE; ++ } ++ syslog(LOG_INFO,"BatchSize is set to %d\n", g_reactor_batch_size); ++ } else { ++ SPDK_ERRLOG("config file does not contain [Reactor] section, which need to be provided\n"); ++ } ++#endif ++ + snprintf(mempool_name, sizeof(mempool_name), "evtpool_%d", getpid()); + g_spdk_event_mempool = spdk_mempool_create(mempool_name, + 262144 - 1, /* Power of 2 minus 1 is optimal for memory consumption */ +@@ -557,7 +578,7 @@ event_queue_run_batch(struct spdk_reactor *reactor) + return -errno; + } + +- count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE); ++ count = spdk_ring_dequeue(reactor->events, events, g_reactor_batch_size); + + if (spdk_ring_count(reactor->events) != 0) { + /* Trigger new notification if there are still events in event-queue waiting for processing. */ +@@ -568,7 +589,7 @@ event_queue_run_batch(struct spdk_reactor *reactor) + } + } + } else { +- count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE); ++ count = spdk_ring_dequeue(reactor->events, events, g_reactor_batch_size); + } + + if (count == 0) { +@@ -948,6 +969,9 @@ reactor_run(void *arg) + } + + if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) { ++#ifdef SPDK_CONFIG_APP_RW ++ lcore_thread_info[reactor->lcore].state = SPDK_THREAD_STATE_EXITED; ++#endif + break; + } + } +@@ -1039,11 +1063,16 @@ spdk_reactors_start(void) + spdk_cpuset_zero(&tmp_cpumask); + spdk_cpuset_set_cpu(&tmp_cpumask, i, true); + ++#ifdef SPDK_CONFIG_APP_RW ++ lcore_thread_info[reactor->lcore].thread = spdk_thread_create(thread_name, &tmp_cpumask); ++ lcore_thread_info[reactor->lcore].state = SPDK_THREAD_STATE_RUNNING; ++#else + spdk_thread_create(thread_name, &tmp_cpumask); ++#endif + } + spdk_cpuset_set_cpu(&g_reactor_core_mask, i, true); + } +- ++#ifndef SPDK_CONFIG_APP_RW + /* Start the main reactor */ + reactor = spdk_reactor_get(current_core); + assert(reactor != NULL); +@@ -1052,6 +1081,7 @@ spdk_reactors_start(void) + spdk_env_thread_wait_all(); + + g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN; ++#endif + } + + void +diff --git a/lib/jsonrpc/jsonrpc_internal.h b/lib/jsonrpc/jsonrpc_internal.h +index 4e5852e..331ee00 100644 +--- a/lib/jsonrpc/jsonrpc_internal.h ++++ b/lib/jsonrpc/jsonrpc_internal.h +@@ -40,7 +40,7 @@ + + #include "spdk/log.h" + +-#define SPDK_JSONRPC_RECV_BUF_SIZE (32 * 1024) ++#define SPDK_JSONRPC_RECV_BUF_SIZE (4 * 1024 * 1024) + #define SPDK_JSONRPC_SEND_BUF_SIZE_INIT (32 * 1024) + #define SPDK_JSONRPC_SEND_BUF_SIZE_MAX (32 * 1024 * 1024) + #define SPDK_JSONRPC_ID_MAX_LEN 128 +diff --git a/lib/jsonrpc/jsonrpc_server_tcp.c b/lib/jsonrpc/jsonrpc_server_tcp.c +index 71f3b5c..5173aea 100644 +--- a/lib/jsonrpc/jsonrpc_server_tcp.c ++++ b/lib/jsonrpc/jsonrpc_server_tcp.c +@@ -319,7 +319,7 @@ jsonrpc_server_conn_recv(struct spdk_jsonrpc_server_conn *conn) + } + + offset += rc; +- } while (rc > 0); ++ } while (rc > 1000); + + if (offset > 0) { + /* +@@ -375,7 +375,7 @@ more: + return 0; + } + +- SPDK_DEBUGLOG(rpc, "send() failed: %s\n", spdk_strerror(errno)); ++ SPDK_ERRLOG("send() failed: %s\n", spdk_strerror(errno)); + return -1; + } + +-- +2.33.0 + diff --git a/0020-lib-nvme-Add-nvme-support-for-HSAK.patch b/0020-lib-nvme-Add-nvme-support-for-HSAK.patch new file mode 100644 index 0000000..4bc4cea --- /dev/null +++ b/0020-lib-nvme-Add-nvme-support-for-HSAK.patch @@ -0,0 +1,1441 @@ +From a2e7a4a2694bc01e480f12c535485445a62828e1 Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Thu, 25 Feb 2021 16:15:02 +0800 +Subject: [PATCH 20/27] lib/nvme: Add nvme support for HSAK + +Signed-off-by: sunshihao +--- + lib/event/reactor.c | 2 +- + lib/nvme/Makefile | 1 + + lib/nvme/nvme.c | 123 +++++++++++++++++ + lib/nvme/nvme_ctrlr.c | 31 +++++ + lib/nvme/nvme_ctrlr_cmd.c | 18 +++ + lib/nvme/nvme_ctrlr_self.c | 239 ++++++++++++++++++++++++++++++++ + lib/nvme/nvme_internal.h | 14 +- + lib/nvme/nvme_ns.c | 5 + + lib/nvme/nvme_ns_cmd.c | 140 ++++++++++++++++++- + lib/nvme/nvme_ns_self.c | 93 +++++++++++++ + lib/nvme/nvme_pcie.c | 26 +++- + lib/nvme/nvme_pcie_common.c | 12 ++ + lib/nvme/nvme_qpair.c | 10 +- + lib/nvme/nvme_rebind.c | 262 ++++++++++++++++++++++++++++++++++++ + lib/nvme/nvme_uevent.c | 6 +- + lib/nvme/nvme_uevent.h | 2 + + 16 files changed, 972 insertions(+), 12 deletions(-) + create mode 100644 lib/nvme/nvme_ctrlr_self.c + create mode 100644 lib/nvme/nvme_ns_self.c + create mode 100644 lib/nvme/nvme_rebind.c + +diff --git a/lib/event/reactor.c b/lib/event/reactor.c +index 9fb9e0f..3eb8799 100644 +--- a/lib/event/reactor.c ++++ b/lib/event/reactor.c +@@ -265,7 +265,7 @@ spdk_reactors_init(void) + if (g_reactor_batch_size <= 0 || g_reactor_batch_size > SPDK_EVENT_BATCH_SIZE) { + g_reactor_batch_size = SPDK_EVENT_BATCH_SIZE; + } +- syslog(LOG_INFO,"BatchSize is set to %d\n", g_reactor_batch_size); ++ syslog(LOG_INFO, "BatchSize is set to %d\n", g_reactor_batch_size); + } else { + SPDK_ERRLOG("config file does not contain [Reactor] section, which need to be provided\n"); + } +diff --git a/lib/nvme/Makefile b/lib/nvme/Makefile +index fcc9d29..c5f9691 100644 +--- a/lib/nvme/Makefile ++++ b/lib/nvme/Makefile +@@ -42,6 +42,7 @@ C_SRCS = nvme_ctrlr_cmd.c nvme_ctrlr.c nvme_fabric.c nvme_ns_cmd.c nvme_ns.c nvm + C_SRCS-$(CONFIG_VFIO_USER) += nvme_vfio_user.c + C_SRCS-$(CONFIG_RDMA) += nvme_rdma.c + C_SRCS-$(CONFIG_NVME_CUSE) += nvme_cuse.c ++C_SRCS-$(CONFIG_APP_RW) += nvme_rebind.c nvme_ctrlr_self.c nvme_ns_self.c + + LIBNAME = nvme + LOCAL_SYS_LIBS = +diff --git a/lib/nvme/nvme.c b/lib/nvme/nvme.c +index a23abf1..fca2f41 100644 +--- a/lib/nvme/nvme.c ++++ b/lib/nvme/nvme.c +@@ -36,6 +36,9 @@ + #include "nvme_internal.h" + #include "nvme_io_msg.h" + #include "nvme_uevent.h" ++#include "spdk/nvme.h" ++#include "spdk_internal/debug.h" ++#include "spdk/bdev_module.h" + + #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver" + +@@ -56,6 +59,91 @@ nvme_ctrlr_shared(const struct spdk_nvme_ctrlr *ctrlr) + return ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE; + } + ++#ifdef SPDK_CONFIG_APP_RW ++static pthread_t g_admin_timer_thread; ++ ++bool nvme_ctrlr_is_exist(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ union spdk_nvme_cc_register cc; ++ ++ if (NULL == ctrlr) { ++ SPDK_ERRLOG("invalid paramter\n"); ++ return false; ++ } ++ ++ if (nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), ++ &cc.raw) != 0) { ++ return false; ++ } ++ ++ return true; ++} ++ ++static void admin_timer_timeout(void) ++{ ++ struct spdk_nvme_ctrlr *ctrlr = NULL; ++ ++ nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); ++ ++ TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) { ++ nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); ++ if (ctrlr->is_resetting) { ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++ continue; ++ } ++ spdk_nvme_ctrlr_process_admin_completions(ctrlr); ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++ } ++ ++ nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); ++} ++ ++static void *nvme_ctrlr_run_admin_timer(void *arg) ++{ ++ sleep(20); ++ ++ while (1) { ++ admin_timer_timeout(); ++ usleep(10000); ++ } ++ ++ return ((void *)0); ++} ++ ++static int ++nvme_ctrlr_start_admin_timer(void) ++{ ++ if (g_admin_timer_thread == 0) { ++ if (pthread_create(&g_admin_timer_thread, NULL, nvme_ctrlr_run_admin_timer, NULL) != 0) { ++ SPDK_ERRLOG("Failed to create admin timer thread.\n"); ++ return -1; ++ } ++ } ++ ++ return 0; ++} ++ ++int ++spdk_nvme_detach_ublock(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); ++ ++ nvme_ctrlr_proc_put_ref(ctrlr); ++ ++ if (nvme_ctrlr_get_ref_count(ctrlr) == 0) { ++ if (nvme_ctrlr_shared(ctrlr)) { ++ TAILQ_REMOVE(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq); ++ } else { ++ TAILQ_REMOVE(&g_nvme_attached_ctrlrs, ctrlr, tailq); ++ } ++ nvme_ctrlr_destruct_ublock(ctrlr); ++ } ++ ++ nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); ++ return 0; ++} ++#endif ++ + void + nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx, + struct spdk_nvme_ctrlr *ctrlr) +@@ -239,11 +327,13 @@ nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl) + { + struct nvme_completion_poll_status *status = arg; + ++#ifndef SPDK_CONFIG_APP_RW + if (status->timed_out) { + /* There is no routine waiting for the completion of this request, free allocated memory */ + free(status); + return; + } ++#endif + + /* + * Copy status into the argument passed by the caller, so that +@@ -476,7 +566,11 @@ nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, + + assert(active_proc->timeout_cb_fn != NULL); + ++#ifndef SPDK_CONFIG_APP_RW + if (req->timed_out || req->submit_tick == 0) { ++#else ++ if (req->submit_tick == 0) { ++#endif + return 0; + } + +@@ -493,6 +587,7 @@ nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, + return 1; + } + ++#ifndef SPDK_CONFIG_APP_RW + req->timed_out = true; + + /* +@@ -503,6 +598,28 @@ nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, + active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, + nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, + cid); ++#else ++ if (!nvme_qpair_is_admin_queue(qpair) && (req->cmd.opc == SPDK_NVME_OPC_WRITE || ++ req->cmd.opc == SPDK_NVME_OPC_READ)) { ++ SPDK_WARNLOG("IO timeout, OP[%u] NS[%u] LBA[%lu].\n", req->cmd.opc, req->cmd.nsid, ++ *(uint64_t *)&req->cmd.cdw10); ++ } else { ++ SPDK_WARNLOG("%s Command[%u] timeout.\n", nvme_qpair_is_admin_queue(qpair) ? ++ "Admin" : "IO", req->cmd.opc); ++ } ++ if (req->timed_out) { ++ /* Reset the controller if the command was already timed out. */ ++ SPDK_WARNLOG("IO Command[%u] timeout again, reset controller.\n", cid); ++ active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, NULL, cid); ++ } else { ++ req->timed_out = true; ++ active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, ++ nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, ++ cid); ++ /* Timing again. Reset the controller if it times out again */ ++ req->submit_tick = spdk_get_ticks(); ++ } ++#endif + return 0; + } + +@@ -811,6 +928,12 @@ nvme_probe_internal(struct spdk_nvme_probe_ctx *probe_ctx, + return -1; + } + ++#ifdef SPDK_CONFIG_APP_RW ++ if (nvme_ctrlr_start_admin_timer() != 0) { ++ return -1; ++ } ++#endif ++ + nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); + + rc = nvme_transport_ctrlr_scan(probe_ctx, direct_connect); +diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c +index ff27771..fa28f07 100644 +--- a/lib/nvme/nvme_ctrlr.c ++++ b/lib/nvme/nvme_ctrlr.c +@@ -38,6 +38,9 @@ + + #include "spdk/env.h" + #include "spdk/string.h" ++#ifdef SPDK_CONFIG_APP_RW ++#include "spdk_internal/debug.h" ++#endif + + struct nvme_active_ns_ctx; + +@@ -3064,8 +3067,13 @@ nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) + * Not using sleep() to avoid blocking other controller's initialization. + */ + if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) { ++#ifdef SPDK_CONFIG_APP_RW ++ SPDK_DEBUGLOG(nvme, "Applying quirk: delay 2 seconds before reading registers\n"); ++ ctrlr->sleep_timeout_tsc = spdk_get_ticks() + 2 * spdk_get_ticks_hz(); ++#else + SPDK_DEBUGLOG(nvme, "Applying quirk: delay 2.5 seconds before reading registers\n"); + ctrlr->sleep_timeout_tsc = ticks + (2500 * spdk_get_ticks_hz() / 1000); ++#endif + } + return 0; + } else { +@@ -3268,11 +3276,15 @@ nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr) + { + int rc; + ++#ifdef SPDK_CONFIG_APP_RW ++ nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); ++#else + if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE); + } else { + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); + } ++#endif + + if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) { + SPDK_ERRLOG("admin_queue_size %u exceeds max defined by NVMe spec, use max value\n", +@@ -4209,3 +4221,22 @@ spdk_nvme_map_prps(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, + + return iovcnt; + } ++ ++#ifdef SPDK_CONFIG_APP_RW ++void ++nvme_ctrlr_destruct_ublock(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ struct spdk_nvme_qpair *qpair = NULL; ++ struct spdk_nvme_qpair *tmp = NULL; ++ ++ SPDK_DEBUGLOG(nvme, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr); ++ TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) { ++ spdk_nvme_ctrlr_free_io_qpair(qpair); ++ } ++ ++ nvme_ctrlr_free_doorbell_buffer(ctrlr); ++ nvme_ctrlr_destruct_namespaces(ctrlr); ++ spdk_bit_array_free(&ctrlr->free_io_qids); ++ nvme_transport_ctrlr_destruct(ctrlr); ++} ++#endif +diff --git a/lib/nvme/nvme_ctrlr_cmd.c b/lib/nvme/nvme_ctrlr_cmd.c +index 29d76f3..d335bc6 100644 +--- a/lib/nvme/nvme_ctrlr_cmd.c ++++ b/lib/nvme/nvme_ctrlr_cmd.c +@@ -581,17 +581,35 @@ nvme_ctrlr_retry_queued_abort(struct spdk_nvme_ctrlr *ctrlr) + rc = nvme_ctrlr_submit_admin_request(ctrlr, next); + if (rc < 0) { + SPDK_ERRLOG("Failed to submit queued abort.\n"); ++#ifndef SPDK_CONFIG_APP_RW + memset(&next->cpl, 0, sizeof(next->cpl)); + next->cpl.status.sct = SPDK_NVME_SCT_GENERIC; + next->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + next->cpl.status.dnr = 1; + nvme_complete_request(next->cb_fn, next->cb_arg, next->qpair, next, &next->cpl); + nvme_free_request(next); ++#else ++ nvme_free_request(next); ++ break; ++#endif + } else { + /* If the first abort succeeds, stop iterating. */ + break; + } + } ++ ++#ifdef SPDK_CONFIG_APP_RW ++ nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); ++ if (rc < 0) { ++ /* If abort fail, free all of the queued abort requests */ ++ STAILQ_FOREACH_SAFE(next, &ctrlr->queued_aborts, stailq, tmp) { ++ STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); ++ nvme_free_request(next); ++ ctrlr->outstanding_aborts--; ++ } ++ } ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++#endif + } + + static int +diff --git a/lib/nvme/nvme_ctrlr_self.c b/lib/nvme/nvme_ctrlr_self.c +new file mode 100644 +index 0000000..d3937d9 +--- /dev/null ++++ b/lib/nvme/nvme_ctrlr_self.c +@@ -0,0 +1,239 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#include "spdk/stdinc.h" ++#include "nvme_internal.h" ++ ++void ++spdk_nvme_ctrlr_set_shutdown(struct spdk_nvme_ctrlr *ctrlr, bool is_shutdown) ++{ ++ nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); ++ ctrlr-> is_destructed= is_shutdown; ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++} ++ ++bool ++spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ if(NULL == ctrlr) { ++ SPDK_ERRLOG("spdk_nvme_ctrlr_is_smart_per_namespace_supported: Invalid Parameters!\n"); ++ return false; ++ } ++ ++ /* check Bit 0 of Log Page Attributes(LPA), ++ to find out whether the controller supports namespace basis or not. */ ++ ++ if(0 == ctrlr->cdata.lpa.ns_smart) { ++ SPDK_NOTICELOG("This controller does not support the SMART information on a per namespace basis.\n"); ++ return false; ++ } ++ ++ return true; ++} ++ ++static int nvme_get_log_info(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page, uint32_t nsid, ++ void *payload, uint32_t payload_size) ++{ ++ struct nvme_completion_poll_status status = {0x0}; ++ int ret; ++ ++ status.done = false; ++ ret = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, log_page, nsid, payload, payload_size, 0, ++ nvme_completion_poll_cb, &status); ++ if (ret) { ++ return ret; ++ } ++ ++ while (status.done == false) { ++ nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); ++ spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++ } ++ if (spdk_nvme_cpl_is_error(&status.cpl)) { ++ SPDK_ERRLOG("spdk_nvme_ctrlr_get_smart_info failed! sc[0x%x], sct[0x%x]\n", ++ status.cpl.status.sc, status.cpl.status.sct); ++ return -ENXIO; ++ } ++ return 0; ++} ++ ++int ++spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, ++ struct spdk_nvme_health_information_page *smart_info) ++{ ++ struct spdk_nvme_ns *ns = NULL; ++ ++ if(NULL == ctrlr || NULL == smart_info) { ++ SPDK_ERRLOG("Invalid parameters!\n"); ++ return -EINVAL; ++ } ++ ++ /* if controller does not support namespase basis, then set the nsid to 0xFFFFFFFF, and continue the process. ++ and if nsid is 0, set the nsid to 0xFFFFFFFF too. */ ++ if(!spdk_nvme_ctrlr_is_smart_per_namespace_supported(ctrlr) || 0 == nsid) { ++ nsid = SPDK_NVME_GLOBAL_NS_TAG; ++ } ++ ++ /* nsid should be 0xffffffff or on a per namespace basis. */ ++ if(nsid != SPDK_NVME_GLOBAL_NS_TAG) { ++ ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); ++ if (NULL == ns) { ++ SPDK_ERRLOG("Invalid NS %u\n", nsid); ++ return -EINVAL; ++ } ++ ++ /* if the namespace specified is not active, set the nsid to 0xFFFFFFFF, and continue the process. */ ++ if (!spdk_nvme_ns_is_active(ns)) { ++ SPDK_WARNLOG("NS %u is not active\n", nsid); ++ nsid = SPDK_NVME_GLOBAL_NS_TAG; ++ } ++ } ++ ++ return nvme_get_log_info(ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION, nsid, smart_info, ++ sizeof(struct spdk_nvme_health_information_page)); ++} ++ ++int ++spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, ++ struct spdk_nvme_error_information_entry *error_info) ++{ ++ const struct spdk_nvme_ctrlr_data *cdata = NULL; ++ uint32_t nsid = SPDK_NVME_GLOBAL_NS_TAG; ++ int ret; ++ ++ if(NULL == ctrlr || NULL == error_info) { ++ SPDK_ERRLOG("Invalid parameters!\n"); ++ return -EINVAL; ++ } ++ ++ /* get cdata for judging the err_entries is bigger than the maximum number of entries supported or not. */ ++ cdata = spdk_nvme_ctrlr_get_data(ctrlr); ++ if (err_entries > (cdata->elpe + 1u)) { ++ /* if the parameter is bigger, then cut it into the maximum number supported. */ ++ SPDK_WARNLOG("Cannot get %d error log entries, the controller only support %d errors.\n", ++ err_entries, cdata->elpe + 1); ++ err_entries = cdata->elpe + 1; ++ } ++ ++ ret = nvme_get_log_info(ctrlr, SPDK_NVME_LOG_ERROR, nsid, error_info, ++ sizeof(struct spdk_nvme_error_information_entry) * err_entries); ++ if (ret) { ++ return ret; ++ } ++ /* return actual count of error log pages info. */ ++ return err_entries; ++} ++ ++struct spdk_nvme_ctrlr_opts * ++spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ return &ctrlr->opts; ++} ++ ++bool ++spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ return ctrlr->cdata.oacs.ns_manage != 0; ++} ++ ++bool ++spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ return ctrlr->cdata.oacs.format != 0; ++} ++ ++bool ++spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ return ctrlr->cdata.fna.format_all_ns != 0; ++} ++ ++bool ++spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ return ctrlr->cdata.oacs.directives != 0; ++} ++ ++void ++spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ int rc; ++ struct nvme_completion_poll_status status; ++ struct spdk_nvme_ctrlr_data cdata; ++ if (ctrlr == NULL) { ++ SPDK_ERRLOG("Parameter error\n"); ++ return; ++ } ++ ++ status.done = false; ++ rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 0, &cdata, ++ sizeof(cdata), nvme_completion_poll_cb, &status); ++ if (rc != 0) { ++ return; ++ } ++ ++ if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { ++ SPDK_ERRLOG("Failed to identify ctrlr data, cannot update unvmcap, sct[%x], sc[%x]\n", ++ status.cpl.status.sct, status.cpl.status.sc); ++ return; ++ } ++ ++ ctrlr->cdata.unvmcap[0] = cdata.unvmcap[0]; ++ ctrlr->cdata.unvmcap[1] = cdata.unvmcap[1]; ++} ++ ++int32_t ++spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, void *payload) ++{ ++ struct nvme_completion_poll_status status; ++ int32_t res; ++ ++ if (ctrlr == NULL || payload == NULL) { ++ SPDK_ERRLOG("Parameter error\n"); ++ return -EINVAL; ++ } ++ ++ if (!spdk_nvme_ctrlr_is_directive_supported(ctrlr)) { ++ SPDK_WARNLOG("The controller[%s] does not support Directives.\n", ctrlr->trid.traddr); ++ return -EPERM; ++ } ++ ++ status.done = false; ++ res = nvme_ctrlr_cmd_directive_receive(ctrlr, nsid, SPDK_NVME_ID_RECV_OP_RET_PARA, ++ SPDK_NVME_DIR_TYPE_IDENTIFY, 0, payload, ++ sizeof(struct spdk_nvme_identify_recv_ret_para), ++ 0, nvme_completion_poll_cb, &status); ++ if (res != 0) { ++ return res; ++ } ++ ++ while (status.done == false) { ++ nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); ++ spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++ } ++ ++ if (spdk_nvme_cpl_is_error(&status.cpl)) { ++ SPDK_ERRLOG("Failed to Identify directive! sc[0x%x], sct[0x%x]\n", ++ status.cpl.status.sc, status.cpl.status.sct); ++ return -ENXIO; ++ } ++ ++ return 0; ++} ++ ++uint16_t ++spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair) ++{ ++ return qpair->id; ++} +diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h +index 1626680..6934f9f 100644 +--- a/lib/nvme/nvme_internal.h ++++ b/lib/nvme/nvme_internal.h +@@ -173,7 +173,7 @@ extern pid_t g_spdk_nvme_pid; + #define DEFAULT_IO_QUEUE_SIZE (256) + #define DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK (1024) /* Matches Linux kernel driver */ + +-#define DEFAULT_IO_QUEUE_REQUESTS (512) ++#define DEFAULT_IO_QUEUE_REQUESTS (2048) + + #define SPDK_NVME_DEFAULT_RETRY_COUNT (4) + +@@ -208,6 +208,10 @@ enum nvme_payload_type { + + /** nvme_request::u.sgl is valid for this request */ + NVME_PAYLOAD_TYPE_SGL, ++#ifdef SPDK_CONFIG_APP_RW ++ /** nvme_request::sgl is not extended LBA align */ ++ NVME_PAYLOAD_TYPE_SGL_PRP, ++#endif + }; + + /** +@@ -231,6 +235,9 @@ struct nvme_payload { + + /** Virtual memory address of a single virtually contiguous metadata buffer */ + void *md; ++#ifdef SPDK_CONFIG_APP_RW ++ enum nvme_payload_type payload_type; ++#endif + }; + + #define NVME_PAYLOAD_CONTIG(contig_, md_) \ +@@ -1063,6 +1070,11 @@ typedef int (*spdk_nvme_parse_ana_log_page_cb)( + int nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr, + spdk_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg); + ++#ifdef SPDK_CONFIG_APP_RW ++void nvme_ctrlr_destruct_ublock(struct spdk_nvme_ctrlr *ctrlr); ++void nvme_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); ++#endif ++ + static inline struct nvme_request * + nvme_allocate_request(struct spdk_nvme_qpair *qpair, + const struct nvme_payload *payload, uint32_t payload_size, uint32_t md_size, +diff --git a/lib/nvme/nvme_ns.c b/lib/nvme/nvme_ns.c +index f5cf75b..458d32f 100644 +--- a/lib/nvme/nvme_ns.c ++++ b/lib/nvme/nvme_ns.c +@@ -108,6 +108,11 @@ nvme_ns_set_identify_data(struct spdk_nvme_ns *ns) + ns->flags |= SPDK_NVME_NS_DPS_PI_SUPPORTED; + ns->pi_type = nsdata->dps.pit; + } ++#ifdef SPDK_CONFIG_APP_RW ++ if (nsdata->dps.md_start) { ++ ns->flags |= SPDK_NVME_NS_DPS_PI_MDSTART; ++ } ++#endif + } + + static int +diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c +index 73246f8..4d706bc 100644 +--- a/lib/nvme/nvme_ns_cmd.c ++++ b/lib/nvme/nvme_ns_cmd.c +@@ -35,7 +35,7 @@ + + static inline struct nvme_request *_nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, +- const struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset, ++ struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, + void *cb_arg, uint32_t opc, uint32_t io_flags, + uint16_t apptag_mask, uint16_t apptag, bool check_sgl); +@@ -79,7 +79,7 @@ _nvme_get_host_buffer_sector_size(struct spdk_nvme_ns *ns, uint32_t io_flags) + + static struct nvme_request * + _nvme_add_child_request(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, +- const struct nvme_payload *payload, ++ struct nvme_payload *payload, + uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, + uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag, +@@ -102,7 +102,7 @@ _nvme_add_child_request(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + static struct nvme_request * + _nvme_ns_cmd_split_request(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, +- const struct nvme_payload *payload, ++ struct nvme_payload *payload, + uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, +@@ -174,7 +174,6 @@ _nvme_ns_cmd_setup_request(struct spdk_nvme_ns *ns, struct nvme_request *req, + + cmd->cdw12 = lba_count - 1; + cmd->cdw12 |= (io_flags & SPDK_NVME_IO_FLAGS_CDW12_MASK); +- + cmd->cdw15 = apptag_mask; + cmd->cdw15 = (cmd->cdw15 << 16 | apptag); + } +@@ -182,7 +181,7 @@ _nvme_ns_cmd_setup_request(struct spdk_nvme_ns *ns, struct nvme_request *req, + static struct nvme_request * + _nvme_ns_cmd_split_request_prp(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, +- const struct nvme_payload *payload, ++ struct nvme_payload *payload, + uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, +@@ -301,10 +300,78 @@ _nvme_ns_cmd_split_request_prp(struct spdk_nvme_ns *ns, + return req; + } + ++#ifdef SPDK_CONFIG_APP_RW ++#define NVME_MAX_SGL_PRP_DESCRIPTORS (2048) ++ ++static int ++_nvme_ns_check_hw_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_payload *payload, ++ uint32_t payload_offset, uint32_t extended_lba_size, struct nvme_request *req) ++{ ++ void *virt_addr = NULL; ++ uint64_t phys_addr; ++ uint32_t remaining_transfer_len, remaining_user_sge_len, length; ++ uint32_t nseg = 0; ++ uint32_t nseg_idx; ++ ++ struct spdk_nvme_sgl_descriptor sgl[NVME_MAX_SGL_PRP_DESCRIPTORS]; ++ ++ /* ++ * check scattered payloads whether extended_lba_size align. ++ */ ++ req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, payload_offset); ++ ++ remaining_transfer_len = req->payload_size; ++ ++ while (remaining_transfer_len > 0) { ++ req->payload.next_sge_fn(req->payload.contig_or_cb_arg, ++ &virt_addr, &remaining_user_sge_len); ++ ++ remaining_user_sge_len = spdk_min(remaining_user_sge_len, remaining_transfer_len); ++ remaining_transfer_len -= remaining_user_sge_len; ++ while (remaining_user_sge_len > 0) { ++ if (nseg >= NVME_MAX_SGL_PRP_DESCRIPTORS) { ++ SPDK_ERRLOG("Seg number of SGL should not greater than %d\n", NVME_MAX_SGL_PRP_DESCRIPTORS); ++ return -1; ++ } ++ ++ phys_addr = spdk_vtophys(virt_addr, NULL); ++ if (phys_addr == SPDK_VTOPHYS_ERROR) { ++ SPDK_ERRLOG("Cannot translate SGL data addr 0x%lx to physical addr.\n", (uint64_t)virt_addr); ++ return -1; ++ } ++ ++ length = spdk_min(remaining_user_sge_len, 0x200000 - _2MB_OFFSET(virt_addr)); ++ remaining_user_sge_len -= length; ++ virt_addr += length; ++ ++ if (nseg > 0 && phys_addr == ++ sgl[nseg - 1].address + sgl[nseg - 1].unkeyed.length) { ++ /* extend previous entry */ ++ sgl[nseg - 1].unkeyed.length += length; ++ continue; ++ } ++ ++ sgl[nseg].unkeyed.length = length; ++ sgl[nseg].address = phys_addr; ++ ++ nseg++; ++ } ++ } ++ ++ for (nseg_idx = 0; nseg_idx < nseg; nseg_idx++) { ++ if (sgl[nseg_idx].unkeyed.length % extended_lba_size) { ++ return 1; ++ } ++ } ++ ++ return 0; ++} ++#endif ++ + static struct nvme_request * + _nvme_ns_cmd_split_request_sgl(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, +- const struct nvme_payload *payload, ++ struct nvme_payload *payload, + uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, +@@ -387,7 +454,7 @@ _nvme_ns_cmd_split_request_sgl(struct spdk_nvme_ns *ns, + + static inline struct nvme_request * + _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, +- const struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset, ++ struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, + uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag, bool check_sgl) + { +@@ -404,6 +471,9 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + + req->payload_offset = payload_offset; + req->md_offset = md_offset; ++#ifdef SPDK_CONFIG_APP_RW ++ req->user_cb_arg = cb_arg; ++#endif + + /* Zone append commands cannot be split. */ + if (opc == SPDK_NVME_OPC_ZONE_APPEND) { +@@ -426,12 +496,33 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + cb_arg, opc, + io_flags, req, sectors_per_stripe, sectors_per_stripe - 1, apptag_mask, apptag); + } else if (lba_count > sectors_per_max_io) { ++#ifdef SPDK_CONFIG_APP_RW ++ if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL && check_sgl) { ++ if (ns->ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED) { ++ rc = _nvme_ns_check_hw_sgl_request(qpair, payload, payload_offset, sector_size, req); ++ } ++ } ++ if (rc > 0) { ++ req->payload.payload_type = NVME_PAYLOAD_TYPE_SGL_PRP; ++ payload->payload_type = NVME_PAYLOAD_TYPE_SGL_PRP; ++ } ++#endif + return _nvme_ns_cmd_split_request(ns, qpair, payload, payload_offset, md_offset, lba, lba_count, + cb_fn, + cb_arg, opc, + io_flags, req, sectors_per_max_io, 0, apptag_mask, apptag); + } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL && check_sgl) { + if (ns->ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED) { ++#ifdef SPDK_CONFIG_APP_RW ++ if ((payload->payload_type == NVME_PAYLOAD_TYPE_SGL_PRP) ++ || (_nvme_ns_check_hw_sgl_request(qpair, payload, payload_offset, sector_size, req) > 0)) { ++ req->payload.payload_type = NVME_PAYLOAD_TYPE_SGL_PRP; ++ payload->payload_type = NVME_PAYLOAD_TYPE_SGL_PRP; ++ return _nvme_ns_cmd_split_request_prp(ns, qpair, payload, payload_offset, md_offset, ++ lba, lba_count, cb_fn, cb_arg, opc, io_flags, ++ req, apptag_mask, apptag); ++ } ++#endif + return _nvme_ns_cmd_split_request_sgl(ns, qpair, payload, payload_offset, md_offset, + lba, lba_count, cb_fn, cb_arg, opc, io_flags, + req, apptag_mask, apptag); +@@ -1129,3 +1220,38 @@ spdk_nvme_ns_cmd_reservation_report(struct spdk_nvme_ns *ns, + + return nvme_qpair_submit_request(qpair, req); + } ++ ++#ifdef SPDK_CONFIG_APP_RW ++int ++spdk_nvme_ns_cmd_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, ++ uint32_t type, ++ const struct spdk_nvme_dsm_range *ranges, uint16_t num_ranges, ++ spdk_nvme_cmd_cb cb_fn, void *cb_arg) ++{ ++ struct nvme_request *req = NULL; ++ struct spdk_nvme_cmd *cmd = NULL; ++ struct nvme_payload payload; ++ ++ if (ranges == NULL) { ++ return -EINVAL; ++ } ++ ++ payload = NVME_PAYLOAD_CONTIG((void *)ranges, NULL); ++ ++ req = nvme_allocate_request(qpair, &payload, num_ranges * sizeof(struct spdk_nvme_dsm_range), ++ 0, cb_fn, cb_arg); ++ if (req == NULL) { ++ return -ENOMEM; ++ } ++ ++ req->user_cb_arg = cb_arg; ++ ++ cmd = &req->cmd; ++ cmd->opc = SPDK_NVME_OPC_DATASET_MANAGEMENT; ++ cmd->nsid = ns->id; ++ ++ cmd->cdw10 = num_ranges - 1; ++ cmd->cdw11 = type; ++ ++ return nvme_qpair_submit_request(qpair, req); ++} +diff --git a/lib/nvme/nvme_ns_self.c b/lib/nvme/nvme_ns_self.c +new file mode 100644 +index 0000000..5aabbaa +--- /dev/null ++++ b/lib/nvme/nvme_ns_self.c +@@ -0,0 +1,93 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++#include "nvme_internal.h" ++ ++bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns) ++{ ++ return (ns->flags & SPDK_NVME_NS_DPS_PI_MDSTART) ? true : false; ++} ++ ++bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns) ++{ ++ return (ns->flags & SPDK_NVME_NS_DEALLOCATE_SUPPORTED) ? true : false; ++} ++ ++int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *nsdata) ++{ ++ if (ctrlr == NULL || nsdata == NULL) { ++ SPDK_ERRLOG("Parameter error\n"); ++ return -1; ++ } ++ ++ struct nvme_completion_poll_status status; ++ int rc = 0; ++ status.done = false; ++ ++ if (spdk_nvme_ctrlr_is_ns_manage_supported(ctrlr)) { ++ rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_NS, 0, SPDK_NVME_GLOBAL_NS_TAG, 0, ++ nsdata, sizeof(*nsdata), nvme_completion_poll_cb, &status); ++ } ++ else { ++ rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_NS, 0, 1, 0, ++ nsdata, sizeof(*nsdata), nvme_completion_poll_cb, &status); ++ } ++ if (rc != 0) { ++ return rc; ++ } ++ ++ if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { ++ SPDK_ERRLOG("Failed to identify nsdata, sct[%x], sc[%x]\n", status.cpl.status.sct, status.cpl.status.sc); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++bool spdk_nvme_ns_is_allocated(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid) ++{ ++ struct spdk_nvme_ns_data nsdata; ++ struct nvme_completion_poll_status status; ++ int rc = 0; ++ ++ if (ctrlr == NULL || nsid == 0) { ++ SPDK_ERRLOG("Parameter error. ns[%u]\n", nsid); ++ return false; ++ } ++ ++ if (!spdk_nvme_ctrlr_is_ns_manage_supported(ctrlr)) { ++ return true; ++ } ++ ++ nsdata.ncap = 0; ++ status.done = false; ++ rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_NS_ALLOCATED, 0, nsid, 0, ++ &nsdata, sizeof(nsdata), nvme_completion_poll_cb, &status); ++ if (rc != 0) { ++ SPDK_ERRLOG("Failed to identify ns[%u]'s nsdata. rc[%d]\n", nsid, rc); ++ return false; ++ } ++ ++ if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { ++ SPDK_ERRLOG("Failed to identify ns[%u]'s nsdata, sct[%x], sc[%x]\n", ++ nsid, status.cpl.status.sct, status.cpl.status.sc); ++ return false; ++ } ++ ++ return (nsdata.ncap != 0); ++} ++ ++bool ++spdk_nvme_ns_ctrl_is_failed(struct spdk_nvme_ns *ns) ++{ ++ return ns->ctrlr->is_failed; ++} +diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c +index 27fc6bb..08fe344 100644 +--- a/lib/nvme/nvme_pcie.c ++++ b/lib/nvme/nvme_pcie.c +@@ -40,6 +40,7 @@ + #include "spdk/env.h" + #include "spdk/likely.h" + #include "spdk/string.h" ++#include "spdk/bdev_module.h" + #include "nvme_internal.h" + #include "nvme_pcie_internal.h" + #include "nvme_uevent.h" +@@ -97,6 +98,7 @@ nvme_pcie_ctrlr_setup_signal(void) + sigaction(SIGBUS, &sa, NULL); + } + ++#ifndef SPDK_CONFIG_APP_RW + static int + _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx) + { +@@ -176,6 +178,7 @@ _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx) + } + return 0; + } ++#endif + + static volatile void * + nvme_pcie_reg_addr(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset) +@@ -575,10 +578,12 @@ nvme_pcie_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx, + enum_ctx.has_pci_addr = true; + } + ++#ifndef SPDK_CONFIG_APP_RW + /* Only the primary process can monitor hotplug. */ + if (spdk_process_is_primary()) { + _nvme_pcie_hotplug_monitor(probe_ctx); + } ++#endif + + if (enum_ctx.has_pci_addr == false) { + return spdk_pci_enumerate(spdk_pci_nvme_get_driver(), +@@ -1175,6 +1180,13 @@ static build_req_fn const g_nvme_pcie_build_req_table[][2] = { + [NVME_PAYLOAD_TYPE_SGL] = { + nvme_pcie_qpair_build_prps_sgl_request, /* PRP */ + nvme_pcie_qpair_build_hw_sgl_request /* SGL */ ++#ifdef SPDK_CONFIG_APP_RW ++ }, ++ /* the NVME_PAYLOAD_TYPE_SGL_PRP type only ues prps func */ ++ [NVME_PAYLOAD_TYPE_SGL_PRP] = { ++ nvme_pcie_qpair_build_prps_sgl_request, ++ nvme_pcie_qpair_build_prps_sgl_request ++#endif + } + }; + +@@ -1238,6 +1250,11 @@ nvme_pcie_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_reques + if (tr == NULL) { + /* Inform the upper layer to try again later. */ + rc = -EAGAIN; ++#ifdef SPDK_CONFIG_APP_RW ++ if (nvme_qpair_is_io_queue(qpair)) { ++ spdk_bdev_set_io_location(req->user_cb_arg, (uint8_t)LOCAL_LIBSTORAGE_HUNG_REQ); ++ } ++#endif + goto exit; + } + +@@ -1250,11 +1267,18 @@ nvme_pcie_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_reques + + if (req->payload_size != 0) { + payload_type = nvme_payload_type(&req->payload); ++#ifdef SPDK_CONFIG_APP_RW ++ if (payload_type == NVME_PAYLOAD_TYPE_SGL) { ++ if (req->payload.payload_type == NVME_PAYLOAD_TYPE_SGL_PRP) { ++ payload_type = NVME_PAYLOAD_TYPE_SGL_PRP; ++ } ++ } ++#endif + /* According to the specification, PRPs shall be used for all + * Admin commands for NVMe over PCIe implementations. + */ + sgl_supported = (ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED) != 0 && +- !nvme_qpair_is_admin_queue(qpair); ++ !nvme_qpair_is_admin_queue(qpair) && (payload_type != NVME_PAYLOAD_TYPE_CONTIG); + + if (sgl_supported) { + /* Don't use SGL for DSM command */ +diff --git a/lib/nvme/nvme_pcie_common.c b/lib/nvme/nvme_pcie_common.c +index 0ef56cb..b0b14f6 100644 +--- a/lib/nvme/nvme_pcie_common.c ++++ b/lib/nvme/nvme_pcie_common.c +@@ -597,6 +597,12 @@ nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracke + if (!pqpair->flags.delay_cmd_submit) { + nvme_pcie_qpair_ring_sq_doorbell(qpair); + } ++ ++#ifdef SPDK_CONFIG_APP_RW ++ if (nvme_qpair_is_io_queue(qpair)) { ++ spdk_bdev_set_io_location(req->user_cb_arg, (uint8_t)LOCAL_LIBSTORAGE_TO_DISK); ++ } ++#endif + } + + void +@@ -668,6 +674,7 @@ nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr) + { + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct nvme_tracker *tr, *temp, *last; ++ int count = 0; + + last = TAILQ_LAST(&pqpair->outstanding_tr, nvme_outstanding_tr_head); + +@@ -676,6 +683,7 @@ nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr) + if (!qpair->ctrlr->opts.disable_error_logging) { + SPDK_ERRLOG("aborting outstanding command\n"); + } ++ count++; + nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, + SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true); + +@@ -683,6 +691,10 @@ nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr) + break; + } + } ++ ++ if (count != 0) { ++ SPDK_ERRLOG("Aborted %u qpair[%p]'s outstanding command\n", count, pqpair); ++ } + } + + void +diff --git a/lib/nvme/nvme_qpair.c b/lib/nvme/nvme_qpair.c +index 1e721df..3aabd63 100644 +--- a/lib/nvme/nvme_qpair.c ++++ b/lib/nvme/nvme_qpair.c +@@ -612,7 +612,8 @@ nvme_qpair_check_enabled(struct spdk_nvme_qpair *qpair) + * from the old transport connection and encourage the application to retry them. We also need + * to submit any queued requests that built up while we were in the connected or enabling state. + */ +- if (nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTED && !qpair->ctrlr->is_resetting) { ++ if (nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTED && !qpair->ctrlr->is_resetting ++ && !qpair->ctrlr->is_removed && !qpair->ctrlr->is_destructed) { + nvme_qpair_set_state(qpair, NVME_QPAIR_ENABLING); + /* + * PCIe is special, for fabrics transports, we can abort requests before disconnect during reset +@@ -856,6 +857,13 @@ _nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *r + rc = nvme_qpair_submit_request(qpair, child_req); + if (spdk_unlikely(rc != 0)) { + child_req_failed = true; ++#ifdef SPDK_CONFIG_APP_RW ++ if (rc == -ENXIO && child_req->num_children == 0) { ++ SPDK_WARNLOG("Warning: child req submit failed.\n"); ++ nvme_request_remove_child(req, child_req); ++ nvme_free_request(child_req); ++ } ++#endif + } + } else { /* free remaining child_reqs since one child_req fails */ + nvme_request_remove_child(req, child_req); +diff --git a/lib/nvme/nvme_rebind.c b/lib/nvme/nvme_rebind.c +new file mode 100644 +index 0000000..5836fa3 +--- /dev/null ++++ b/lib/nvme/nvme_rebind.c +@@ -0,0 +1,262 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "spdk/log.h" ++#include "spdk/nvme.h" ++ ++#define PATH_LEN 4096 ++#define ID_LEN 16 ++ ++// nvme that fails to bind uio ++struct failed_nvme { ++ char *pci_addr; ++ TAILQ_ENTRY(failed_nvme) tailq; ++}; ++ ++/** ++ * failed nvmes list, failed nvme will send a "nvme add uevent" when we bind it back to nvme driver ++ * in spdk_rebind_driver, we should ignore this event or we wouldn't stop binding this nvme to uio. ++ */ ++static TAILQ_HEAD(failed_nvme_list, failed_nvme) g_failed_nvmes = TAILQ_HEAD_INITIALIZER(g_failed_nvmes); ++ ++// get vendor id from /sys/bus/pci/devices/pci_addr/vendor ++// get device id from /sys/bus/pci/devices/pci_addr/device ++static int32_t get_id_from_sysfs(const char *pci_addr, const char *id_type, char *ret_id, uint8_t ret_id_len) ++{ ++ int32_t fd = -1; ++ char sysfs_path[PATH_LEN]; ++ char tmp_id[ID_LEN] = {0}; ++ char *tmp = NULL; ++ ++ // id's length is 5 byte,like XXXX'\0' ++ if (ret_id_len < 5) { ++ SPDK_ERRLOG("ret_id_len is less than 5 bytes\n"); ++ return -1; ++ } ++ ++ // construct path in sysfs which stores id ++ if (snprintf_s(sysfs_path, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/devices/%s/%s", pci_addr, id_type) > 0) { ++ fd = open(sysfs_path, O_RDONLY); ++ } ++ if (fd < 0) { ++ SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_path, errno, strerror(errno)); ++ return -1; ++ } ++ ++ // id in sysfs is like 0xDDDD ++ if (read(fd, tmp_id, ID_LEN - 1) <= 0) { ++ SPDK_ERRLOG("fail to read id from %s, errno(%d): %s\n", sysfs_path, errno, strerror(errno)); ++ close(fd); ++ return -1; ++ } ++ ++ // 2 means skipping prefix "0x" of id read from sysfs ++ tmp = tmp_id + 2; ++ // 4 means the value of id read from sysfs, not including prefix "0x" ++ if (snprintf_s(ret_id, ret_id_len, 4, "%s", tmp) <= 0) { ++ SPDK_ERRLOG("string copy failed\n"); ++ } ++ ++ close(fd); ++ return 0; ++} ++ ++// get ven_dev_id which combines vendor id and device id ++static int32_t get_ven_dev_id(const char *pci_addr, char *ven_dev_id, uint8_t ven_dev_id_len) ++{ ++ char ven_id[ID_LEN], dev_id[ID_LEN]; ++ ++ // ven_dev_id combines with vendor id and device id,like "DDDD XXXX'\0'",length is 10 bytes ++ if (ven_dev_id_len < 10) { ++ SPDK_ERRLOG("ven_dev_id_len is less than 10 bytes\n"); ++ return -1; ++ } ++ ++ // get vendor id from sysfs,format is like "DDDD" ++ if (get_id_from_sysfs(pci_addr, "vendor", ven_id, ID_LEN) < 0) { ++ SPDK_ERRLOG("fail to get vendor id\n"); ++ return -1; ++ } ++ ++ // get device id from sysfs,format is like "XXXX" ++ if (get_id_from_sysfs(pci_addr, "device", dev_id, ID_LEN) < 0) { ++ SPDK_ERRLOG("fail to get device id\n"); ++ return -1; ++ } ++ ++ if (snprintf_s(ven_dev_id, ven_dev_id_len, ven_dev_id_len - 1, "%s %s", ven_id, dev_id) <= 0) { ++ SPDK_ERRLOG("string copy failed\n"); ++ return -1; ++ } ++ return 0; ++} ++ ++// unbind driver by writing remove_id and unbind files in sysfs ++static int32_t unbind_driver(char *pci_addr, const char *ven_dev_id) ++{ ++ char sysfs_dev_remove_id[PATH_LEN]; // remove_id file path in sysfs ++ char sysfs_dev_unbind[PATH_LEN]; // unbind file path in sysfs ++ int32_t remove_id_fd = -1; // file description of remove_id file ++ int32_t unbind_fd = -1; // file description of unbind file ++ int32_t ret; ++ ++ ret = snprintf_s(sysfs_dev_remove_id, PATH_LEN, PATH_LEN - 1, ++ "/sys/bus/pci/devices/%s/driver/remove_id", pci_addr); ++ if (ret <= 0) { ++ SPDK_ERRLOG("copy dev id failed\n"); ++ return -1; ++ } ++ ret = snprintf_s(sysfs_dev_unbind, PATH_LEN, PATH_LEN - 1, ++ "/sys/bus/pci/devices/%s/driver/unbind", pci_addr); ++ if (ret <= 0) { ++ SPDK_ERRLOG("copy dev unbind failed\n"); ++ return -1; ++ } ++ ++ remove_id_fd = open(sysfs_dev_remove_id, O_WRONLY); ++ if (remove_id_fd < 0) { ++ SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_dev_remove_id, errno, strerror(errno)); ++ return -1; ++ } ++ ++ (void)write(remove_id_fd, ven_dev_id, strlen(ven_dev_id) + 1); ++ close(remove_id_fd); ++ ++ // unbind driver by wrting unbind file ++ unbind_fd = open(sysfs_dev_unbind, O_WRONLY); ++ if (unbind_fd < 0) { ++ SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_dev_unbind, errno, strerror(errno)); ++ return -1; ++ } ++ ++ ret = write(unbind_fd, pci_addr, strlen(pci_addr) + 1); ++ if (ret < 0) { ++ SPDK_ERRLOG("write %s to %s fail, errno(%d): %s\n",pci_addr, sysfs_dev_unbind, errno, strerror(errno)); ++ close(unbind_fd); ++ return -1; ++ } ++ ++ close(unbind_fd); ++ ++ return 0; ++} ++ ++// bind device to new driver by writing new_id and bind files in sysfs ++static int32_t bind_driver(const char *pci_addr, const char *ven_dev_id, const char *driver_name) ++{ ++ char sysfs_driver_new_id[PATH_LEN]; // new_id file path in sysfs ++ char sysfs_driver_bind[PATH_LEN]; // bind file path in sysfs ++ int32_t new_id_fd = -1; // file description of new_id file ++ int32_t bind_fd = -1; // file descriptoin of bind file ++ int rc; ++ ++ rc = snprintf_s(sysfs_driver_new_id, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/drivers/%s/new_id", driver_name); ++ if (rc > 0) { ++ rc = snprintf_s(sysfs_driver_bind, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/drivers/%s/bind", driver_name); ++ } ++ if (rc <= 0) { ++ SPDK_ERRLOG("string copy failed\n"); ++ return -1; ++ } ++ ++ // try to bind driver by write ven_dev_id to new_id file ++ new_id_fd = open(sysfs_driver_new_id, O_WRONLY); ++ if (new_id_fd < 0) { ++ SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_driver_new_id, errno, strerror(errno)); ++ return -1; ++ } ++ ++ (void)write(new_id_fd, ven_dev_id, strlen(ven_dev_id) + 1); ++ close(new_id_fd); ++ ++ // bind driver by writing pci_addr to bind file if writing new_id file failed ++ bind_fd = open(sysfs_driver_bind, O_WRONLY); ++ if (bind_fd < 0) { ++ SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_driver_bind, errno, strerror(errno)); ++ return -1; ++ } ++ ++ (void)write(bind_fd, pci_addr, strlen(pci_addr) + 1); ++ close(bind_fd); ++ return 0; ++} ++ ++int32_t spdk_rebind_driver(char *pci_addr, char *driver_name) ++{ ++ char ven_dev_id[ID_LEN] = {0}; ++ struct failed_nvme *iter = NULL; ++ unsigned int sleep_time = 1000; ++ ++ if (pci_addr == NULL || driver_name == NULL) { ++ SPDK_ERRLOG("pci address and driver_name can't be NULL to rebind driver\n"); ++ return -1; ++ } ++ ++ // ignore event from binding pci back to nvme driver ++ TAILQ_FOREACH(iter, &g_failed_nvmes, tailq) { ++ if (strncmp(iter->pci_addr, pci_addr, strlen(iter->pci_addr)) == 0) { ++ // oncely ignore nvme add event from binding back to nvme,so do rebind when next hotplug of this pci happen ++ TAILQ_REMOVE(&g_failed_nvmes, iter, tailq); ++ free(iter->pci_addr); ++ free(iter); ++ SPDK_WARNLOG("ignore failed nvme %s\n", pci_addr); ++ return 0; ++ } ++ } ++ ++ if (get_ven_dev_id(pci_addr, ven_dev_id, ID_LEN) < 0) { ++ SPDK_ERRLOG("failed to get ven_dev_id\n"); ++ return -1; ++ } ++ ++ while (unbind_driver(pci_addr, ven_dev_id) < 0) { ++ usleep(sleep_time); ++ sleep_time = sleep_time * 2; ++ if (sleep_time > 1000000) { ++ SPDK_ERRLOG("failed to unbind driver of %s\n", pci_addr); ++ return -1; ++ } ++ } ++ ++ if (bind_driver(pci_addr, ven_dev_id, driver_name) < 0) { ++ // retry ++ if (bind_driver(pci_addr, ven_dev_id, driver_name) < 0) { ++ SPDK_ERRLOG("fail to bind %s to %s\n", pci_addr, driver_name); ++ // add fialed nvme to g_failed_nvmes ++ struct failed_nvme *failed_nvme = (struct failed_nvme *)malloc(sizeof(struct failed_nvme)); ++ if (failed_nvme == NULL) { ++ SPDK_ERRLOG("failed to malloc for failed_nvme,can't bind %s back to nvme\n", pci_addr); ++ return -1; ++ } ++ failed_nvme->pci_addr = strdup(pci_addr); ++ if (failed_nvme->pci_addr == NULL) { ++ SPDK_ERRLOG("failed to malloc for failed_nvme,can't bind %s back to nvme\n", pci_addr); ++ free(failed_nvme); ++ return -1; ++ } ++ TAILQ_INSERT_TAIL(&g_failed_nvmes, failed_nvme, tailq); ++ ++ // bind device back to nvme driver if failed to bind uio ++ bind_driver(pci_addr, ven_dev_id, "nvme"); ++ } ++ } ++ return 0; ++} +diff --git a/lib/nvme/nvme_uevent.c b/lib/nvme/nvme_uevent.c +index b413ceb..1bef985 100644 +--- a/lib/nvme/nvme_uevent.c ++++ b/lib/nvme/nvme_uevent.c +@@ -45,6 +45,8 @@ + #define SPDK_UEVENT_MSG_LEN 4096 + #define SPDK_UEVENT_RECVBUF_SIZE 1024 * 1024 + ++static int parse_event(const char *buf, struct spdk_uevent *event); ++ + int + nvme_uevent_connect(void) + { +@@ -152,7 +154,9 @@ parse_event(const char *buf, struct spdk_uevent *event) + return -1; + } + spdk_pci_addr_fmt(event->traddr, sizeof(event->traddr), &pci_addr); +- } else if (!strncmp(driver, "vfio-pci", 8)) { ++ return 1; ++ } ++ if (!strncmp(driver, "vfio-pci", 8)) { + struct spdk_pci_addr pci_addr; + + event->subsystem = SPDK_NVME_UEVENT_SUBSYSTEM_VFIO; +diff --git a/lib/nvme/nvme_uevent.h b/lib/nvme/nvme_uevent.h +index 94f6710..1921801 100644 +--- a/lib/nvme/nvme_uevent.h ++++ b/lib/nvme/nvme_uevent.h +@@ -41,6 +41,7 @@ + #ifndef SPDK_UEVENT_H_ + #define SPDK_UEVENT_H_ + ++#ifndef SPDK_CONFIG_APP_RW + #define SPDK_NVME_UEVENT_SUBSYSTEM_UNRECOGNIZED 0 + #define SPDK_NVME_UEVENT_SUBSYSTEM_UIO 1 + #define SPDK_NVME_UEVENT_SUBSYSTEM_VFIO 2 +@@ -58,5 +59,6 @@ struct spdk_uevent { + + int nvme_uevent_connect(void); + int nvme_get_uevent(int fd, struct spdk_uevent *uevent); ++#endif + + #endif /* SPDK_UEVENT_H_ */ +-- +2.33.0 + diff --git a/0021-module-bdev-Add-bdev-module-support-for-HSAK.patch b/0021-module-bdev-Add-bdev-module-support-for-HSAK.patch new file mode 100644 index 0000000..18dcc02 --- /dev/null +++ b/0021-module-bdev-Add-bdev-module-support-for-HSAK.patch @@ -0,0 +1,2729 @@ +From ddfaaf6e01ba245d1941a955e61488a730dce09a Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Thu, 25 Feb 2021 18:21:53 +0800 +Subject: [PATCH 21/27] module/bdev: Add bdev module support for HSAK + +Signed-off-by: sunshihao +--- + app/spdk_lspci/Makefile | 2 +- + examples/accel/perf/Makefile | 2 +- + examples/interrupt_tgt/Makefile | 2 +- + examples/sock/hello_world/Makefile | 2 +- + include/spdk/bdev.h | 20 +- + include/spdk/bdev_module.h | 28 +- + include/spdk/nvme.h | 72 +--- + lib/bdev/bdev.c | 10 +- + lib/nvme/nvme_ctrlr_self.c | 75 ++-- + lib/nvme/nvme_ns.c | 5 - + lib/nvme/nvme_ns_cmd.c | 1 + + lib/nvme/nvme_ns_self.c | 11 +- + lib/nvme/nvme_pcie.c | 5 +- + lib/nvme/nvme_pcie_common.c | 1 + + lib/nvme/nvme_rebind.c | 86 ++-- + lib/rpc/rpc.c | 2 + + lib/thread/thread.c | 38 +- + mk/nvme.libtest.mk | 2 +- + mk/spdk.common.mk | 5 +- + mk/spdk.modules.mk | 2 +- + module/bdev/nvme/Makefile | 1 + + module/bdev/nvme/bdev_nvme.c | 285 ++++++++++++- + module/bdev/nvme/bdev_nvme.h | 42 ++ + module/bdev/nvme/bdev_nvme_self.c | 661 +++++++++++++++++++++++++++++ + module/bdev/nvme/bdev_nvme_self.h | 43 ++ + module/bdev/nvme/common.h | 4 + + scripts/setup_self.sh | 347 +++++++++++++++ + 27 files changed, 1538 insertions(+), 216 deletions(-) + create mode 100644 module/bdev/nvme/bdev_nvme_self.c + create mode 100644 module/bdev/nvme/bdev_nvme_self.h + create mode 100755 scripts/setup_self.sh + +diff --git a/app/spdk_lspci/Makefile b/app/spdk_lspci/Makefile +index 5efb95f..c4f11be 100644 +--- a/app/spdk_lspci/Makefile ++++ b/app/spdk_lspci/Makefile +@@ -39,6 +39,6 @@ APP = spdk_lspci + + C_SRCS := spdk_lspci.c + +-SPDK_LIB_LIST = $(SOCK_MODULES_LIST) nvme vmd ++SPDK_LIB_LIST = $(SOCK_MODULES_LIST) nvme vmd trace + + include $(SPDK_ROOT_DIR)/mk/spdk.app.mk +diff --git a/examples/accel/perf/Makefile b/examples/accel/perf/Makefile +index 53b9ae6..555ccf0 100644 +--- a/examples/accel/perf/Makefile ++++ b/examples/accel/perf/Makefile +@@ -39,6 +39,6 @@ APP = accel_perf + + C_SRCS := accel_perf.c + +-SPDK_LIB_LIST = $(ACCEL_MODULES_LIST) event_accel ++SPDK_LIB_LIST = $(ACCEL_MODULES_LIST) event_accel conf + + include $(SPDK_ROOT_DIR)/mk/spdk.app.mk +diff --git a/examples/interrupt_tgt/Makefile b/examples/interrupt_tgt/Makefile +index 90a2b8a..c27a2c7 100644 +--- a/examples/interrupt_tgt/Makefile ++++ b/examples/interrupt_tgt/Makefile +@@ -41,7 +41,7 @@ C_SRCS := interrupt_tgt.c + + SPDK_LIB_LIST = $(INTR_BLOCKDEV_MODULES_LIST) event_bdev conf + +-SPDK_LIB_LIST += event_nbd ++SPDK_LIB_LIST += event_nbd bdev_nvme + SPDK_LIB_LIST += event_vhost + + ifeq ($(SPDK_ROOT_DIR)/lib/env_dpdk,$(CONFIG_ENV)) +diff --git a/examples/sock/hello_world/Makefile b/examples/sock/hello_world/Makefile +index f86df44..ea5d552 100644 +--- a/examples/sock/hello_world/Makefile ++++ b/examples/sock/hello_world/Makefile +@@ -38,6 +38,6 @@ APP = hello_sock + C_SRCS := hello_sock.c + + SPDK_LIB_LIST = $(SOCK_MODULES_LIST) +-SPDK_LIB_LIST += event_net sock ++SPDK_LIB_LIST += event_net sock conf + + include $(SPDK_ROOT_DIR)/mk/spdk.app.mk +diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h +index 22b87ec..d0284d9 100644 +--- a/include/spdk/bdev.h ++++ b/include/spdk/bdev.h +@@ -119,7 +119,6 @@ enum spdk_bdev_status { + }; + + #ifdef SPDK_CONFIG_APP_RW +-/** ns status */ + enum spdk_bdev_ns_status { + SPDK_BDEV_NS_STATUS_INVALID, + SPDK_BDEV_NS_STATUS_READY, +@@ -128,7 +127,6 @@ enum spdk_bdev_ns_status { + }; + + typedef void (*LIBSTORAGE_CALLBACK_FUNC)(int32_t cb_status, int32_t sct_code, void *cb_arg); +- + typedef struct libstorage_io { + uint8_t *buf; + struct iovec *iovs; /* array of iovecs to transfer */ +@@ -1411,19 +1409,13 @@ int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + */ +-int +-spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, +- void *unmap_d, uint16_t unmap_count, +- spdk_bdev_io_completion_cb cb, void *cb_arg); +- +-void * +-spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch); +- +-void * +-spdk_bdev_io_get_pool(size_t nbytes); ++int spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, ++ void *unmap_d, uint16_t unmap_count, ++ spdk_bdev_io_completion_cb cb, void *cb_arg); + +-bool +-spdk_bdev_have_io_in_channel(struct spdk_io_channel *bdevIoCh); ++void *spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch); ++void *spdk_bdev_io_get_pool(size_t nbytes); ++bool spdk_bdev_have_io_in_channel(struct spdk_io_channel *bdevIoCh); + #endif + + /** +diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h +index 3ff7e28..55dc980 100644 +--- a/include/spdk/bdev_module.h ++++ b/include/spdk/bdev_module.h +@@ -225,21 +225,12 @@ struct spdk_bdev_fn_table { + + #ifdef SPDK_CONFIG_APP_RW + uint16_t (*get_io_channel_id)(struct spdk_io_channel *ch); +- + int (*bdev_poll_rsp)(void *pollCh); +- + uint64_t (*get_timeout_count)(struct spdk_io_channel *ch); + #endif + }; + + #ifdef SPDK_CONFIG_APP_RW +-static inline void spdk_bdev_set_io_location(void *bdev_ctx, uint8_t location) +-{ +- struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bdev_ctx); +- uint8_t *ioLoc = (uint8_t *)bdev_io->internal.caller_ctx; +- *ioLoc = location; +-} +- + enum spdk_bdev_driver_ctx { + SPDK_BDEV_IO_ACTION_PI, + SPDK_BDEV_IO_ACTION_FUA, +@@ -262,12 +253,6 @@ enum spdk_bdev_io_fua { + IO_FUA_YES = 1 + }; + +-void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr); +- +-void spdk_bdev_fail_ctrlr(const char *traddr); +- +-void *nvme_channel_get_group(void *io_ch); +- + enum reqLocation_E { + LOCAL_RECEIVE_APP = 1, + LOCAL_LIBSTORAGE_SUBMIT = 2, +@@ -280,6 +265,10 @@ enum reqLocation_E { + LOCAL_LIBSTORAGE_SUBMIT_RETRY = 9, + LOCAL_LIBSTORAGE_BDEV_NOMEM = 10, + }; ++ ++void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr); ++void spdk_bdev_fail_ctrlr(const char *traddr); ++void *nvme_channel_get_group(void *io_ch); + #endif + + /** bdev I/O completion status */ +@@ -1351,6 +1340,15 @@ int spdk_bdev_push_media_events(struct spdk_bdev *bdev, const struct spdk_bdev_m + */ + void spdk_bdev_notify_media_management(struct spdk_bdev *bdev); + ++#ifdef SPDK_CONFIG_APP_RW ++static inline void spdk_bdev_set_io_location(void *bdev_ctx, uint8_t location) ++{ ++ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bdev_ctx); ++ uint8_t *ioLoc = (uint8_t *)bdev_io->internal.caller_ctx; ++ *ioLoc = location; ++} ++#endif ++ + /* + * Macro used to register module for later initialization. + */ +diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h +index adda642..6393db3 100644 +--- a/include/spdk/nvme.h ++++ b/include/spdk/nvme.h +@@ -3436,6 +3436,7 @@ struct spdk_nvme_transport_ops { + void spdk_nvme_transport_register(const struct spdk_nvme_transport_ops *ops); + + #ifdef SPDK_CONFIG_APP_RW ++#define NVME_MAX_CONTROLLERS 1024 + struct nvme_ctrlr_info { + char ctrlName[16]; + char pciAddr[24]; +@@ -3454,10 +3455,10 @@ struct nvme_ctrlr_info { + uint16_t ssvid; /* Subsystem vendor id */ + uint16_t ctrlid; /* Controller id */ + uint16_t trtype; /* Transport type */ +- uint16_t support_ns : 1; /* Supports the Namespace Management and Namespace Attachment commands */ +- uint16_t directives : 1; /* Supports Directives */ +- uint16_t streams : 1; /* Supports Streams Directives */ +- uint16_t dsm : 1; /* Supports the controller supports the Dataset Management command */ ++ uint16_t support_ns : 1; /* Supports the Namespace Management and Namespace Attachment commands */ ++ uint16_t directives : 1; /* Supports Directives */ ++ uint16_t streams : 1; /* Supports Streams Directives */ ++ uint16_t dsm : 1; /* Supports the controller supports the Dataset Management command */ + uint16_t reserved : 12; + uint16_t reserved2[3]; + }; +@@ -3494,7 +3495,7 @@ bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_streams_supported(struct spdk_nvme_ctrlr *ctrlr); +-int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, ++int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, + void *payload); + int32_t spdk_nvme_ctrlr_enable_streams(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid); + int32_t spdk_nvme_ctrlr_ret_streams_param(struct spdk_nvme_ctrlr *ctrlr, void *payload); +@@ -3540,38 +3541,13 @@ struct spdk_uevent { + char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1]; + }; + +-/* make a socket to get uevent */ + int nvme_uevent_connect(void); +- +-/* get uevent from socket fd */ + int nvme_get_uevent(int fd, struct spdk_uevent *uevent); +- +-/* blocked to get uevent from socket fd */ + int nvme_get_uevent_block(int fd, struct spdk_uevent *uevent); +- +-/** +- * @Description: bind device with pci_addr to driver +- * @param pci_addr: device's pci_addr,like "0000:08:00.0" +- * @param driver: driver name which device bind to +- */ + int32_t spdk_rebind_driver(char *pci_addr, char *driver_name); +- +-/** +- * \brief True if the protection information transferred at the start of metadata +- * when end-to-end data protection enabled. +- * +- * This function is thread safe and can be called at any point while the controller is attached to +- * the SPDK NVMe driver. +- */ + bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns); +- +-/** +- * \brief True if the namespace supports Dataset Management command. +- * +- * This function is thread safe and can be called at any point while the controller is attached to +- * the SPDK NVMe driver. +- */ + bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns); ++uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair); + + /** + * Submit a data set management request to the specified NVMe namespace. Data set +@@ -3632,40 +3608,6 @@ int spdk_nvme_ns_cmd_writev_stream(struct spdk_nvme_ns *ns, struct spdk_nvme_qpa + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn); +- +-/** +- * \brief Send comman to NVMe controller to start or abort a self-test operation. +- * +- * \param ctrlr NVMe controller to operate self-test command. +- * \param nsid Depending on the log page, this may be 0, a namespace identifier, or SPDK_NVME_GLOBAL_NS_TAG. +- * \param stc self-test code, which specifies the action taken by the Device Self-test command. +- * \param payload The pointer to the payload buffer. it doesn't work actually. +- * \param payload_size The size of payload buffer. it doesn't work actually. +- * \param cb_fn Callback function to invoke when the feature has been retrieved. +- * \param cb_arg Argument to pass to the callback function. +- * +- * \return 0 if successfully submitted, ENOMEM if resources could not be allocated for this request +- * +- * This function is thread safe and can be called at any point while the controller is attached to +- * the SPDK NVMe driver. +- * +- * Call \ref spdk_nvme_ctrlr_process_admin_completions() to poll for completion +- * of commands submitted through this function. +- * +- * \sa spdk_nvme_ctrlr_cmd_self_test_operation() +- */ +-int spdk_nvme_ctrlr_cmd_self_test_operation(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, +- uint32_t stc, +- void *payload, uint32_t payload_size, +- spdk_nvme_cmd_cb cb_fn, void *cb_arg); +- +-/** +- *\get I/O queue pair id +- *\param qpair I/O queue pair to submit the request +- *\ +- *\return I/O queue pair id +- */ +-uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair); + #endif + + /* +diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c +index bf102bb..1d8ce99 100644 +--- a/lib/bdev/bdev.c ++++ b/lib/bdev/bdev.c +@@ -3568,6 +3568,7 @@ _bdev_io_check_md_buf(const struct iovec *iovs, const void *md_buf) + return _is_buf_allocated(iovs) == (md_buf != NULL); + } + ++#ifdef SPDK_CONFIG_APP_RW + static void + bdev_build_contig_io(uint8_t type, void *buf, void *md_buf, uint64_t offset_blocks, + uint64_t num_blocks, +@@ -3587,6 +3588,7 @@ bdev_build_contig_io(uint8_t type, void *buf, void *md_buf, uint64_t offset_bloc + bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_1] = (io->streamId >> 8) & 0xFF; + } + } ++#endif + + static int + bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf, +@@ -3664,11 +3666,11 @@ spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channe + if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } +- ++#endif + if (!_bdev_io_check_md_buf(&iov, md_buf)) { + return -EINVAL; + } +-#endif ++ + return bdev_read_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks, + cb, cb_arg); + } +@@ -3841,11 +3843,11 @@ spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann + if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } +- ++#endif + if (!_bdev_io_check_md_buf(&iov, md_buf)) { + return -EINVAL; + } +-#endif ++ + return bdev_write_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks, + cb, cb_arg); + } +diff --git a/lib/nvme/nvme_ctrlr_self.c b/lib/nvme/nvme_ctrlr_self.c +index d3937d9..4ac1925 100644 +--- a/lib/nvme/nvme_ctrlr_self.c ++++ b/lib/nvme/nvme_ctrlr_self.c +@@ -14,18 +14,16 @@ + #include "spdk/stdinc.h" + #include "nvme_internal.h" + +-void +-spdk_nvme_ctrlr_set_shutdown(struct spdk_nvme_ctrlr *ctrlr, bool is_shutdown) ++void spdk_nvme_ctrlr_set_shutdown(struct spdk_nvme_ctrlr *ctrlr, bool is_shutdown) + { + nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); +- ctrlr-> is_destructed= is_shutdown; ++ ctrlr->is_destructed = is_shutdown; + nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); + } + +-bool +-spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr) ++bool spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr) + { +- if(NULL == ctrlr) { ++ if (NULL == ctrlr) { + SPDK_ERRLOG("spdk_nvme_ctrlr_is_smart_per_namespace_supported: Invalid Parameters!\n"); + return false; + } +@@ -33,7 +31,7 @@ spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr) + /* check Bit 0 of Log Page Attributes(LPA), + to find out whether the controller supports namespace basis or not. */ + +- if(0 == ctrlr->cdata.lpa.ns_smart) { ++ if (0 == ctrlr->cdata.lpa.ns_smart) { + SPDK_NOTICELOG("This controller does not support the SMART information on a per namespace basis.\n"); + return false; + } +@@ -42,14 +40,14 @@ spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr) + } + + static int nvme_get_log_info(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page, uint32_t nsid, +- void *payload, uint32_t payload_size) ++ void *payload, uint32_t payload_size) + { + struct nvme_completion_poll_status status = {0x0}; + int ret; + + status.done = false; + ret = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, log_page, nsid, payload, payload_size, 0, +- nvme_completion_poll_cb, &status); ++ nvme_completion_poll_cb, &status); + if (ret) { + return ret; + } +@@ -61,31 +59,30 @@ static int nvme_get_log_info(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page, ui + } + if (spdk_nvme_cpl_is_error(&status.cpl)) { + SPDK_ERRLOG("spdk_nvme_ctrlr_get_smart_info failed! sc[0x%x], sct[0x%x]\n", +- status.cpl.status.sc, status.cpl.status.sct); ++ status.cpl.status.sc, status.cpl.status.sct); + return -ENXIO; + } + return 0; + } + +-int +-spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, +- struct spdk_nvme_health_information_page *smart_info) ++int spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, ++ struct spdk_nvme_health_information_page *smart_info) + { + struct spdk_nvme_ns *ns = NULL; + +- if(NULL == ctrlr || NULL == smart_info) { ++ if (NULL == ctrlr || NULL == smart_info) { + SPDK_ERRLOG("Invalid parameters!\n"); + return -EINVAL; + } + + /* if controller does not support namespase basis, then set the nsid to 0xFFFFFFFF, and continue the process. + and if nsid is 0, set the nsid to 0xFFFFFFFF too. */ +- if(!spdk_nvme_ctrlr_is_smart_per_namespace_supported(ctrlr) || 0 == nsid) { ++ if (!spdk_nvme_ctrlr_is_smart_per_namespace_supported(ctrlr) || 0 == nsid) { + nsid = SPDK_NVME_GLOBAL_NS_TAG; + } + + /* nsid should be 0xffffffff or on a per namespace basis. */ +- if(nsid != SPDK_NVME_GLOBAL_NS_TAG) { ++ if (nsid != SPDK_NVME_GLOBAL_NS_TAG) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (NULL == ns) { + SPDK_ERRLOG("Invalid NS %u\n", nsid); +@@ -100,18 +97,17 @@ spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, + } + + return nvme_get_log_info(ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION, nsid, smart_info, +- sizeof(struct spdk_nvme_health_information_page)); ++ sizeof(struct spdk_nvme_health_information_page)); + } + +-int +-spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, +- struct spdk_nvme_error_information_entry *error_info) ++int spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, ++ struct spdk_nvme_error_information_entry *error_info) + { + const struct spdk_nvme_ctrlr_data *cdata = NULL; + uint32_t nsid = SPDK_NVME_GLOBAL_NS_TAG; + int ret; + +- if(NULL == ctrlr || NULL == error_info) { ++ if (NULL == ctrlr || NULL == error_info) { + SPDK_ERRLOG("Invalid parameters!\n"); + return -EINVAL; + } +@@ -121,7 +117,7 @@ spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entri + if (err_entries > (cdata->elpe + 1u)) { + /* if the parameter is bigger, then cut it into the maximum number supported. */ + SPDK_WARNLOG("Cannot get %d error log entries, the controller only support %d errors.\n", +- err_entries, cdata->elpe + 1); ++ err_entries, cdata->elpe + 1); + err_entries = cdata->elpe + 1; + } + +@@ -134,38 +130,32 @@ spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entri + return err_entries; + } + +-struct spdk_nvme_ctrlr_opts * +-spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr) ++struct spdk_nvme_ctrlr_opts * spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr) + { + return &ctrlr->opts; + } + +-bool +-spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr) ++bool spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr) + { + return ctrlr->cdata.oacs.ns_manage != 0; + } + +-bool +-spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr) ++bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr) + { + return ctrlr->cdata.oacs.format != 0; + } + +-bool +-spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr) ++bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr) + { + return ctrlr->cdata.fna.format_all_ns != 0; + } + +-bool +-spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr) ++bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr) + { + return ctrlr->cdata.oacs.directives != 0; + } + +-void +-spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr) ++void spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr) + { + int rc; + struct nvme_completion_poll_status status; +@@ -192,8 +182,7 @@ spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr) + ctrlr->cdata.unvmcap[1] = cdata.unvmcap[1]; + } + +-int32_t +-spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, void *payload) ++int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, void *payload) + { + struct nvme_completion_poll_status status; + int32_t res; +@@ -209,10 +198,11 @@ spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid + } + + status.done = false; +- res = nvme_ctrlr_cmd_directive_receive(ctrlr, nsid, SPDK_NVME_ID_RECV_OP_RET_PARA, +- SPDK_NVME_DIR_TYPE_IDENTIFY, 0, payload, +- sizeof(struct spdk_nvme_identify_recv_ret_para), +- 0, nvme_completion_poll_cb, &status); ++ res = spdk_nvme_ctrlr_cmd_directive_receive(ctrlr, nsid, ++ SPDK_NVME_IDENTIFY_DIRECTIVE_RECEIVE_RETURN_PARAM, ++ SPDK_NVME_DIRECTIVE_TYPE_IDENTIFY, 0, payload, ++ sizeof(struct spdk_nvme_ns_identify_directive_param), ++ 0, 0, nvme_completion_poll_cb, &status); + if (res != 0) { + return res; + } +@@ -225,15 +215,14 @@ spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid + + if (spdk_nvme_cpl_is_error(&status.cpl)) { + SPDK_ERRLOG("Failed to Identify directive! sc[0x%x], sct[0x%x]\n", +- status.cpl.status.sc, status.cpl.status.sct); ++ status.cpl.status.sc, status.cpl.status.sct); + return -ENXIO; + } + + return 0; + } + +-uint16_t +-spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair) ++uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair) + { + return qpair->id; + } +diff --git a/lib/nvme/nvme_ns.c b/lib/nvme/nvme_ns.c +index 458d32f..f5cf75b 100644 +--- a/lib/nvme/nvme_ns.c ++++ b/lib/nvme/nvme_ns.c +@@ -108,11 +108,6 @@ nvme_ns_set_identify_data(struct spdk_nvme_ns *ns) + ns->flags |= SPDK_NVME_NS_DPS_PI_SUPPORTED; + ns->pi_type = nsdata->dps.pit; + } +-#ifdef SPDK_CONFIG_APP_RW +- if (nsdata->dps.md_start) { +- ns->flags |= SPDK_NVME_NS_DPS_PI_MDSTART; +- } +-#endif + } + + static int +diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c +index 4d706bc..37dcdc2 100644 +--- a/lib/nvme/nvme_ns_cmd.c ++++ b/lib/nvme/nvme_ns_cmd.c +@@ -462,6 +462,7 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint32_t sector_size = _nvme_get_host_buffer_sector_size(ns, io_flags); + uint32_t sectors_per_max_io = ns->sectors_per_max_io; + uint32_t sectors_per_stripe = ns->sectors_per_stripe; ++ int rc; + + req = nvme_allocate_request(qpair, payload, lba_count * sector_size, lba_count * ns->md_size, + cb_fn, cb_arg); +diff --git a/lib/nvme/nvme_ns_self.c b/lib/nvme/nvme_ns_self.c +index 5aabbaa..9e9def8 100644 +--- a/lib/nvme/nvme_ns_self.c ++++ b/lib/nvme/nvme_ns_self.c +@@ -14,12 +14,13 @@ + + bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns) + { +- return (ns->flags & SPDK_NVME_NS_DPS_PI_MDSTART) ? true : false; ++ struct spdk_nvme_ns_data *nsdata = &ns->ctrlr->nsdata[ns->id - 1]; ++ return nsdata->dps.md_start == 1; + } + + bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns) + { +- return (ns->flags & SPDK_NVME_NS_DEALLOCATE_SUPPORTED) ? true : false; ++ return (ns->flags & SPDK_NVME_NS_DEALLOCATE_SUPPORTED) == 1; + } + + int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *nsdata) +@@ -36,8 +37,7 @@ int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_d + if (spdk_nvme_ctrlr_is_ns_manage_supported(ctrlr)) { + rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_NS, 0, SPDK_NVME_GLOBAL_NS_TAG, 0, + nsdata, sizeof(*nsdata), nvme_completion_poll_cb, &status); +- } +- else { ++ } else { + rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_NS, 0, 1, 0, + nsdata, sizeof(*nsdata), nvme_completion_poll_cb, &status); + } +@@ -46,7 +46,8 @@ int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_d + } + + if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { +- SPDK_ERRLOG("Failed to identify nsdata, sct[%x], sc[%x]\n", status.cpl.status.sct, status.cpl.status.sc); ++ SPDK_ERRLOG("Failed to identify nsdata, sct[%x], sc[%x]\n", status.cpl.status.sct, ++ status.cpl.status.sc); + return -1; + } + +diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c +index 08fe344..0e9e24d 100644 +--- a/lib/nvme/nvme_pcie.c ++++ b/lib/nvme/nvme_pcie.c +@@ -51,9 +51,10 @@ struct nvme_pcie_enum_ctx { + bool has_pci_addr; + }; + ++#ifndef SPDK_CONFIG_APP_RW + static int nvme_pcie_ctrlr_attach(struct spdk_nvme_probe_ctx *probe_ctx, + struct spdk_pci_addr *pci_addr); +- ++#endif + static uint16_t g_signal_lock; + static bool g_sigset = false; + +@@ -594,6 +595,7 @@ nvme_pcie_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx, + } + } + ++#ifndef SPDK_CONFIG_APP_RW + static int + nvme_pcie_ctrlr_attach(struct spdk_nvme_probe_ctx *probe_ctx, struct spdk_pci_addr *pci_addr) + { +@@ -605,6 +607,7 @@ nvme_pcie_ctrlr_attach(struct spdk_nvme_probe_ctx *probe_ctx, struct spdk_pci_ad + + return spdk_pci_enumerate(spdk_pci_nvme_get_driver(), pcie_nvme_enum_cb, &enum_ctx); + } ++#endif + + static struct spdk_nvme_ctrlr *nvme_pcie_ctrlr_construct(const struct spdk_nvme_transport_id *trid, + const struct spdk_nvme_ctrlr_opts *opts, +diff --git a/lib/nvme/nvme_pcie_common.c b/lib/nvme/nvme_pcie_common.c +index b0b14f6..564f81b 100644 +--- a/lib/nvme/nvme_pcie_common.c ++++ b/lib/nvme/nvme_pcie_common.c +@@ -36,6 +36,7 @@ + + #include "spdk/stdinc.h" + #include "spdk/likely.h" ++#include "spdk/bdev_module.h" + #include "spdk/string.h" + #include "nvme_internal.h" + #include "nvme_pcie_internal.h" +diff --git a/lib/nvme/nvme_rebind.c b/lib/nvme/nvme_rebind.c +index 5836fa3..1d8dadf 100644 +--- a/lib/nvme/nvme_rebind.c ++++ b/lib/nvme/nvme_rebind.c +@@ -11,13 +11,8 @@ + * GNU General Public License for more details. + */ + +-#include +-#include +-#include +-#include +-#include ++#include "spdk/stdinc.h" + #include +-#include + #include + #include "spdk/log.h" + #include "spdk/nvme.h" +@@ -25,7 +20,7 @@ + #define PATH_LEN 4096 + #define ID_LEN 16 + +-// nvme that fails to bind uio ++/* nvme that fails to bind uio */ + struct failed_nvme { + char *pci_addr; + TAILQ_ENTRY(failed_nvme) tailq; +@@ -35,25 +30,29 @@ struct failed_nvme { + * failed nvmes list, failed nvme will send a "nvme add uevent" when we bind it back to nvme driver + * in spdk_rebind_driver, we should ignore this event or we wouldn't stop binding this nvme to uio. + */ +-static TAILQ_HEAD(failed_nvme_list, failed_nvme) g_failed_nvmes = TAILQ_HEAD_INITIALIZER(g_failed_nvmes); ++static TAILQ_HEAD(failed_nvme_list, ++ failed_nvme) g_failed_nvmes = TAILQ_HEAD_INITIALIZER(g_failed_nvmes); + +-// get vendor id from /sys/bus/pci/devices/pci_addr/vendor +-// get device id from /sys/bus/pci/devices/pci_addr/device +-static int32_t get_id_from_sysfs(const char *pci_addr, const char *id_type, char *ret_id, uint8_t ret_id_len) ++/* get vendor id from /sys/bus/pci/devices/pci_addr/vendor ++ * get device id from /sys/bus/pci/devices/pci_addr/device ++ */ ++static int32_t get_id_from_sysfs(const char *pci_addr, const char *id_type, char *ret_id, ++ uint8_t ret_id_len) + { + int32_t fd = -1; + char sysfs_path[PATH_LEN]; + char tmp_id[ID_LEN] = {0}; + char *tmp = NULL; + +- // id's length is 5 byte,like XXXX'\0' ++ /* id's length is 5 byte,like XXXX'\0' */ + if (ret_id_len < 5) { + SPDK_ERRLOG("ret_id_len is less than 5 bytes\n"); + return -1; + } + +- // construct path in sysfs which stores id +- if (snprintf_s(sysfs_path, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/devices/%s/%s", pci_addr, id_type) > 0) { ++ /* construct path in sysfs which stores i */ ++ if (snprintf_s(sysfs_path, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/devices/%s/%s", pci_addr, ++ id_type) > 0) { + fd = open(sysfs_path, O_RDONLY); + } + if (fd < 0) { +@@ -61,16 +60,16 @@ static int32_t get_id_from_sysfs(const char *pci_addr, const char *id_type, char + return -1; + } + +- // id in sysfs is like 0xDDDD ++ /* id in sysfs is like 0xDDDD */ + if (read(fd, tmp_id, ID_LEN - 1) <= 0) { + SPDK_ERRLOG("fail to read id from %s, errno(%d): %s\n", sysfs_path, errno, strerror(errno)); + close(fd); + return -1; + } + +- // 2 means skipping prefix "0x" of id read from sysfs ++ /* 2 means skipping prefix "0x" of id read from sysfs */ + tmp = tmp_id + 2; +- // 4 means the value of id read from sysfs, not including prefix "0x" ++ /* 4 means the value of id read from sysfs, not including prefix "0x" */ + if (snprintf_s(ret_id, ret_id_len, 4, "%s", tmp) <= 0) { + SPDK_ERRLOG("string copy failed\n"); + } +@@ -79,24 +78,24 @@ static int32_t get_id_from_sysfs(const char *pci_addr, const char *id_type, char + return 0; + } + +-// get ven_dev_id which combines vendor id and device id ++/* get ven_dev_id which combines vendor id and device id */ + static int32_t get_ven_dev_id(const char *pci_addr, char *ven_dev_id, uint8_t ven_dev_id_len) + { + char ven_id[ID_LEN], dev_id[ID_LEN]; + +- // ven_dev_id combines with vendor id and device id,like "DDDD XXXX'\0'",length is 10 bytes ++ /* ven_dev_id combines with vendor id and device id,like "DDDD XXXX'\0'",length is 10 bytes */ + if (ven_dev_id_len < 10) { + SPDK_ERRLOG("ven_dev_id_len is less than 10 bytes\n"); + return -1; + } + +- // get vendor id from sysfs,format is like "DDDD" ++ /* get vendor id from sysfs,format is like "DDDD" */ + if (get_id_from_sysfs(pci_addr, "vendor", ven_id, ID_LEN) < 0) { + SPDK_ERRLOG("fail to get vendor id\n"); + return -1; + } + +- // get device id from sysfs,format is like "XXXX" ++ /* get device id from sysfs,format is like "XXXX" */ + if (get_id_from_sysfs(pci_addr, "device", dev_id, ID_LEN) < 0) { + SPDK_ERRLOG("fail to get device id\n"); + return -1; +@@ -109,13 +108,13 @@ static int32_t get_ven_dev_id(const char *pci_addr, char *ven_dev_id, uint8_t ve + return 0; + } + +-// unbind driver by writing remove_id and unbind files in sysfs ++/* unbind driver by writing remove_id and unbind files in sysfs */ + static int32_t unbind_driver(char *pci_addr, const char *ven_dev_id) + { +- char sysfs_dev_remove_id[PATH_LEN]; // remove_id file path in sysfs +- char sysfs_dev_unbind[PATH_LEN]; // unbind file path in sysfs +- int32_t remove_id_fd = -1; // file description of remove_id file +- int32_t unbind_fd = -1; // file description of unbind file ++ char sysfs_dev_remove_id[PATH_LEN]; /* remove_id file path in sysfs */ ++ char sysfs_dev_unbind[PATH_LEN]; /* unbind file path in sysfs */ ++ int32_t remove_id_fd = -1; /* file description of remove_id file */ ++ int32_t unbind_fd = -1; /* file description of unbind file */ + int32_t ret; + + ret = snprintf_s(sysfs_dev_remove_id, PATH_LEN, PATH_LEN - 1, +@@ -140,7 +139,7 @@ static int32_t unbind_driver(char *pci_addr, const char *ven_dev_id) + (void)write(remove_id_fd, ven_dev_id, strlen(ven_dev_id) + 1); + close(remove_id_fd); + +- // unbind driver by wrting unbind file ++ /* unbind driver by wrting unbind file */ + unbind_fd = open(sysfs_dev_unbind, O_WRONLY); + if (unbind_fd < 0) { + SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_dev_unbind, errno, strerror(errno)); +@@ -149,7 +148,8 @@ static int32_t unbind_driver(char *pci_addr, const char *ven_dev_id) + + ret = write(unbind_fd, pci_addr, strlen(pci_addr) + 1); + if (ret < 0) { +- SPDK_ERRLOG("write %s to %s fail, errno(%d): %s\n",pci_addr, sysfs_dev_unbind, errno, strerror(errno)); ++ SPDK_ERRLOG("write %s to %s fail, errno(%d): %s\n", pci_addr, sysfs_dev_unbind, errno, ++ strerror(errno)); + close(unbind_fd); + return -1; + } +@@ -159,25 +159,27 @@ static int32_t unbind_driver(char *pci_addr, const char *ven_dev_id) + return 0; + } + +-// bind device to new driver by writing new_id and bind files in sysfs ++/* bind device to new driver by writing new_id and bind files in sysfs */ + static int32_t bind_driver(const char *pci_addr, const char *ven_dev_id, const char *driver_name) + { +- char sysfs_driver_new_id[PATH_LEN]; // new_id file path in sysfs +- char sysfs_driver_bind[PATH_LEN]; // bind file path in sysfs +- int32_t new_id_fd = -1; // file description of new_id file +- int32_t bind_fd = -1; // file descriptoin of bind file ++ char sysfs_driver_new_id[PATH_LEN]; /* new_id file path in sysfs */ ++ char sysfs_driver_bind[PATH_LEN]; /* bind file path in sysfs */ ++ int32_t new_id_fd = -1; /* file description of new_id file */ ++ int32_t bind_fd = -1; /* file descriptoin of bind file */ + int rc; + +- rc = snprintf_s(sysfs_driver_new_id, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/drivers/%s/new_id", driver_name); ++ rc = snprintf_s(sysfs_driver_new_id, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/drivers/%s/new_id", ++ driver_name); + if (rc > 0) { +- rc = snprintf_s(sysfs_driver_bind, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/drivers/%s/bind", driver_name); ++ rc = snprintf_s(sysfs_driver_bind, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/drivers/%s/bind", ++ driver_name); + } + if (rc <= 0) { + SPDK_ERRLOG("string copy failed\n"); + return -1; + } + +- // try to bind driver by write ven_dev_id to new_id file ++ /* try to bind driver by write ven_dev_id to new_id file */ + new_id_fd = open(sysfs_driver_new_id, O_WRONLY); + if (new_id_fd < 0) { + SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_driver_new_id, errno, strerror(errno)); +@@ -187,7 +189,7 @@ static int32_t bind_driver(const char *pci_addr, const char *ven_dev_id, const c + (void)write(new_id_fd, ven_dev_id, strlen(ven_dev_id) + 1); + close(new_id_fd); + +- // bind driver by writing pci_addr to bind file if writing new_id file failed ++ /* bind driver by writing pci_addr to bind file if writing new_id file failed */ + bind_fd = open(sysfs_driver_bind, O_WRONLY); + if (bind_fd < 0) { + SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_driver_bind, errno, strerror(errno)); +@@ -210,10 +212,10 @@ int32_t spdk_rebind_driver(char *pci_addr, char *driver_name) + return -1; + } + +- // ignore event from binding pci back to nvme driver ++ /* ignore event from binding pci back to nvme driver */ + TAILQ_FOREACH(iter, &g_failed_nvmes, tailq) { + if (strncmp(iter->pci_addr, pci_addr, strlen(iter->pci_addr)) == 0) { +- // oncely ignore nvme add event from binding back to nvme,so do rebind when next hotplug of this pci happen ++ /* oncely ignore nvme add event from binding back to nvme,so do rebind when next hotplug of this pci happen */ + TAILQ_REMOVE(&g_failed_nvmes, iter, tailq); + free(iter->pci_addr); + free(iter); +@@ -237,10 +239,10 @@ int32_t spdk_rebind_driver(char *pci_addr, char *driver_name) + } + + if (bind_driver(pci_addr, ven_dev_id, driver_name) < 0) { +- // retry ++ /* retry */ + if (bind_driver(pci_addr, ven_dev_id, driver_name) < 0) { + SPDK_ERRLOG("fail to bind %s to %s\n", pci_addr, driver_name); +- // add fialed nvme to g_failed_nvmes ++ /* add fialed nvme to g_failed_nvmes */ + struct failed_nvme *failed_nvme = (struct failed_nvme *)malloc(sizeof(struct failed_nvme)); + if (failed_nvme == NULL) { + SPDK_ERRLOG("failed to malloc for failed_nvme,can't bind %s back to nvme\n", pci_addr); +@@ -254,7 +256,7 @@ int32_t spdk_rebind_driver(char *pci_addr, char *driver_name) + } + TAILQ_INSERT_TAIL(&g_failed_nvmes, failed_nvme, tailq); + +- // bind device back to nvme driver if failed to bind uio ++ /* bind device back to nvme driver if failed to bind uio */ + bind_driver(pci_addr, ven_dev_id, "nvme"); + } + } +diff --git a/lib/rpc/rpc.c b/lib/rpc/rpc.c +index 9662b88..9b2caed 100644 +--- a/lib/rpc/rpc.c ++++ b/lib/rpc/rpc.c +@@ -110,6 +110,8 @@ jsonrpc_handler(struct spdk_jsonrpc_request *request, + + assert(method != NULL); + ++ SPDK_NOTICELOG("[spdk] jsonrpc handle request: %p, handling method: %s\n", request, ++ (char *)method->start); + m = _get_rpc_method(method); + if (m == NULL) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_METHOD_NOT_FOUND, "Method not found"); +diff --git a/lib/thread/thread.c b/lib/thread/thread.c +index 08a1284..1ab822b 100644 +--- a/lib/thread/thread.c ++++ b/lib/thread/thread.c +@@ -52,6 +52,12 @@ + #define SPDK_MSG_BATCH_SIZE 8 + #define SPDK_MAX_DEVICE_NAME_LEN 256 + #define SPDK_THREAD_EXIT_TIMEOUT_SEC 5 ++#ifdef SPDK_CONFIG_APP_RW ++void spdk_set_thread_exited(struct spdk_thread *thread) ++{ ++ thread->state = SPDK_THREAD_STATE_EXITED; ++} ++#endif + + static pthread_mutex_t g_devlist_mutex = PTHREAD_MUTEX_INITIALIZER; + +@@ -296,7 +302,6 @@ spdk_thread_create(const char *name, struct spdk_cpuset *cpumask) + thread->msg_cache_count++; + } + } +- + if (name) { + snprintf(thread->name, sizeof(thread->name), "%s", name); + } else { +@@ -315,8 +320,8 @@ spdk_thread_create(const char *name, struct spdk_cpuset *cpumask) + g_thread_count++; + pthread_mutex_unlock(&g_devlist_mutex); + +- SPDK_DEBUGLOG(thread, "Allocating new thread (%" PRIu64 ", %s)\n", +- thread->id, thread->name); ++ SPDK_NOTICELOG("Allocating new thread (%" PRIu64 ", %s)\n", ++ thread->id, thread->name); + + if (spdk_interrupt_mode_is_enabled()) { + thread->interrupt_mode = true; +@@ -894,7 +899,6 @@ spdk_thread_send_msg(const struct spdk_thread *thread, spdk_msg_fn fn, void *ctx + int rc; + + assert(thread != NULL); +- + if (spdk_unlikely(thread->state == SPDK_THREAD_STATE_EXITED)) { + SPDK_ERRLOG("Thread %s is marked as exited.\n", thread->name); + return -EIO; +@@ -1143,6 +1147,11 @@ spdk_poller_unregister(struct spdk_poller **ppoller) + struct spdk_thread *thread; + struct spdk_poller *poller; + ++ if (!g_bRunReactor) { ++ *ppoller = NULL; ++ return; ++ } ++ + poller = *ppoller; + if (poller == NULL) { + return; +@@ -1427,8 +1436,12 @@ io_device_free(struct io_device *dev) + assert(dev->unregister_thread != NULL); + SPDK_DEBUGLOG(thread, "io_device %s (%p) needs to unregister from thread %s\n", + dev->name, dev->io_device, dev->unregister_thread->name); ++#ifndef SPDK_CONFIG_APP_RW + rc = spdk_thread_send_msg(dev->unregister_thread, _finish_unregister, dev); + assert(rc == 0); ++#else ++ _finish_unregister((void *)dev); ++#endif + } + } + +@@ -1779,8 +1792,13 @@ spdk_for_each_channel(void *io_device, spdk_channel_msg fn, void *ctx, + i->cur_thread = thread; + i->ch = ch; + pthread_mutex_unlock(&g_devlist_mutex); ++#ifndef SPDK_CONFIG_APP_RW + rc = spdk_thread_send_msg(thread, _call_channel, i); + assert(rc == 0); ++#else ++ _call_channel(i); ++#endif ++ assert(rc == 0); + return; + } + } +@@ -1788,8 +1806,12 @@ spdk_for_each_channel(void *io_device, spdk_channel_msg fn, void *ctx, + + pthread_mutex_unlock(&g_devlist_mutex); + ++#ifndef SPDK_CONFIG_APP_RW + rc = spdk_thread_send_msg(i->orig_thread, _call_completion, i); + assert(rc == 0); ++#else ++ _call_completion(i); ++#endif + } + + void +@@ -1814,8 +1836,12 @@ spdk_for_each_channel_continue(struct spdk_io_channel_iter *i, int status) + i->cur_thread = thread; + i->ch = ch; + pthread_mutex_unlock(&g_devlist_mutex); ++#ifndef SPDK_CONFIG_APP_RW + rc = spdk_thread_send_msg(thread, _call_channel, i); + assert(rc == 0); ++#else ++ _call_channel(i); ++#endif + return; + } + } +@@ -1827,8 +1853,12 @@ end: + i->ch = NULL; + pthread_mutex_unlock(&g_devlist_mutex); + ++#ifndef SPDK_CONFIG_APP_RW + rc = spdk_thread_send_msg(i->orig_thread, _call_completion, i); + assert(rc == 0); ++#else ++ _call_completion(i); ++#endif + } + + struct spdk_interrupt { +diff --git a/mk/nvme.libtest.mk b/mk/nvme.libtest.mk +index 201db50..03f4fe4 100644 +--- a/mk/nvme.libtest.mk ++++ b/mk/nvme.libtest.mk +@@ -38,6 +38,6 @@ include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + + C_SRCS := $(APP:%=%.c) + +-SPDK_LIB_LIST = $(SOCK_MODULES_LIST) nvme vmd ++SPDK_LIB_LIST = $(SOCK_MODULES_LIST) nvme vmd trace + + include $(SPDK_ROOT_DIR)/mk/spdk.app.mk +diff --git a/mk/spdk.common.mk b/mk/spdk.common.mk +index 8569687..6bdc1dd 100644 +--- a/mk/spdk.common.mk ++++ b/mk/spdk.common.mk +@@ -81,7 +81,7 @@ else ifeq ($(TARGET_MACHINE),aarch64) + COMMON_CFLAGS += -march=$(TARGET_ARCHITECTURE) + COMMON_CFLAGS += -DPAGE_SIZE=$(shell getconf PAGESIZE) + else +-COMMON_CFLAGS += -march=$(TARGET_ARCHITECTURE) ++COMMON_CFLAGS += -march=core-avx-i + endif + + ifeq ($(CONFIG_WERROR), y) +@@ -248,12 +248,13 @@ endif + COMMON_CFLAGS += -pthread + LDFLAGS += -pthread + +-CFLAGS += $(COMMON_CFLAGS) -Wno-pointer-sign -Wstrict-prototypes -Wold-style-definition -std=gnu99 ++CFLAGS += $(COMMON_CFLAGS) -Wno-pointer-sign -Wstrict-prototypes -Wold-style-definition -std=gnu99 -include spdk/config.h + CXXFLAGS += $(COMMON_CFLAGS) + + SYS_LIBS += -lrt + SYS_LIBS += -luuid + SYS_LIBS += -lcrypto ++SYS_LIBS += -lsecurec + + ifneq ($(CONFIG_NVME_CUSE)$(CONFIG_FUSE),nn) + SYS_LIBS += -lfuse3 +diff --git a/mk/spdk.modules.mk b/mk/spdk.modules.mk +index 415a3b2..d45702c 100644 +--- a/mk/spdk.modules.mk ++++ b/mk/spdk.modules.mk +@@ -34,7 +34,7 @@ + BLOCKDEV_MODULES_LIST = bdev_malloc bdev_null bdev_nvme bdev_passthru bdev_lvol + BLOCKDEV_MODULES_LIST += bdev_raid bdev_error bdev_gpt bdev_split bdev_delay + BLOCKDEV_MODULES_LIST += bdev_zone_block +-BLOCKDEV_MODULES_LIST += blobfs blobfs_bdev blob_bdev blob lvol vmd nvme ++BLOCKDEV_MODULES_LIST += blobfs blobfs_bdev blob_bdev blob lvol vmd nvme conf + + # Some bdev modules don't have pollers, so they can directly run in interrupt mode + INTR_BLOCKDEV_MODULES_LIST = bdev_malloc bdev_passthru bdev_error bdev_gpt bdev_split bdev_raid +diff --git a/module/bdev/nvme/Makefile b/module/bdev/nvme/Makefile +index f9ddb23..9ad93ef 100644 +--- a/module/bdev/nvme/Makefile ++++ b/module/bdev/nvme/Makefile +@@ -39,6 +39,7 @@ SO_MINOR := 0 + + C_SRCS = bdev_nvme.c bdev_nvme_rpc.c nvme_rpc.c common.c bdev_ocssd.c bdev_ocssd_rpc.c + C_SRCS-$(CONFIG_NVME_CUSE) += bdev_nvme_cuse_rpc.c ++C_SRCS-$(CONFIG_APP_RW) += bdev_nvme_self.c + + ifeq ($(OS),Linux) + C_SRCS += vbdev_opal.c vbdev_opal_rpc.c +diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c +index e9d730d..01d0238 100644 +--- a/module/bdev/nvme/bdev_nvme.c ++++ b/module/bdev/nvme/bdev_nvme.c +@@ -48,8 +48,14 @@ + + #include "spdk/bdev_module.h" + #include "spdk/log.h" ++#include "spdk/conf.h" + ++#ifdef SPDK_CONFIG_APP_RW ++#include "bdev_nvme_self.h" ++#define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT false ++#else + #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true ++#endif + #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000) + + static int bdev_nvme_config_json(struct spdk_json_write_ctx *w); +@@ -170,7 +176,7 @@ static int bdev_nvme_abort(struct nvme_io_channel *nvme_ch, + struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort); + static int bdev_nvme_reset(struct nvme_io_channel *nvme_ch, struct nvme_bdev_io *bio); + static int bdev_nvme_failover(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, bool remove); +-static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); ++void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); + + typedef void (*populate_namespace_fn)(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, + struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx); +@@ -256,6 +262,10 @@ bdev_nvme_poll(void *arg) + group->start_ticks = spdk_get_ticks(); + } + ++#ifdef SPDK_CONFIG_APP_RW ++ bdev_update_ch_timeout(group); ++#endif ++ + num_completions = spdk_nvme_poll_group_process_completions(group->group, 0, + bdev_nvme_disconnected_qpair_cb); + if (group->collect_spin_stat) { +@@ -270,9 +280,13 @@ bdev_nvme_poll(void *arg) + } + } + ++ if (!spdk_get_reactor_type()) { ++ return num_completions; ++ } + return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; + } + ++#ifndef SPDK_CONFIG_APP_RW + static int + bdev_nvme_poll_adminq(void *arg) + { +@@ -288,6 +302,7 @@ bdev_nvme_poll_adminq(void *arg) + + return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY; + } ++#endif + + static int + bdev_nvme_destruct(void *ctx) +@@ -330,6 +345,7 @@ bdev_nvme_create_qpair(struct nvme_io_channel *nvme_ch) + g_opts.io_queue_requests = opts.io_queue_requests; + + nvme_ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts)); ++ syslog(LOG_INFO, "open a new qpair=%p, thread=%lu.\n", nvme_ch->qpair, pthread_self()); + if (nvme_ch->qpair == NULL) { + return -1; + } +@@ -791,7 +807,11 @@ _bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_ + static void + bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) + { ++#ifdef SPDK_CONFIG_APP_RW ++ int rc = _bdev_nvme_submit_request_self(ch, bdev_io); ++#else + int rc = _bdev_nvme_submit_request(ch, bdev_io); ++#endif + + if (spdk_unlikely(rc != 0)) { + if (rc == -ENOMEM) { +@@ -824,6 +844,12 @@ bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) + case SPDK_BDEV_IO_TYPE_NVME_ADMIN: + case SPDK_BDEV_IO_TYPE_NVME_IO: + case SPDK_BDEV_IO_TYPE_ABORT: ++#ifdef SPDK_CONFIG_APP_RW ++ case SPDK_BDEV_IO_TYPE_READ_NVME: ++ case SPDK_BDEV_IO_TYPE_WRITE_NVME: ++ case SPDK_BDEV_IO_TYPE_READV_NVME: ++ case SPDK_BDEV_IO_TYPE_WRITEV_NVME: ++#endif + return true; + + case SPDK_BDEV_IO_TYPE_COMPARE: +@@ -944,7 +970,7 @@ bdev_nvme_poll_group_create_cb(void *io_device, void *ctx_buf) + + group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us); + +- if (group->poller == NULL) { ++ if (group->poller == NULL && spdk_get_reactor_type()) { + spdk_nvme_poll_group_destroy(group->group); + return -1; + } +@@ -980,6 +1006,7 @@ bdev_nvme_get_module_ctx(void *ctx) + return bdev_nvme_get_ctrlr(&nvme_bdev->disk); + } + ++#ifndef SPDK_CONFIG_APP_RW + static int + bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) + { +@@ -1093,6 +1120,7 @@ bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) + + return 0; + } ++#endif + + static void + bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) +@@ -1128,10 +1156,17 @@ static const struct spdk_bdev_fn_table nvmelib_fn_table = { + .submit_request = bdev_nvme_submit_request, + .io_type_supported = bdev_nvme_io_type_supported, + .get_io_channel = bdev_nvme_get_io_channel, +- .dump_info_json = bdev_nvme_dump_info_json, + .write_config_json = bdev_nvme_write_config_json, + .get_spin_time = bdev_nvme_get_spin_time, + .get_module_ctx = bdev_nvme_get_module_ctx, ++#ifdef SPDK_CONFIG_APP_RW ++ .dump_info_json = bdev_nvme_dump_info_json_self, ++ .bdev_poll_rsp = bdev_nvme_poll, ++ .get_io_channel_id = bdev_nvme_get_io_channel_id, ++ .get_timeout_count = bdev_nvme_get_timeout_count, ++#else ++ .dump_info_json = bdev_nvme_dump_info_json, ++#endif + }; + + static int +@@ -1157,7 +1192,12 @@ nvme_disk_create(struct spdk_bdev *disk, const char *base_name, + /* Enable if the Volatile Write Cache exists */ + disk->write_cache = 1; + } ++ ++#ifdef SPDK_CONFIG_APP_RW ++ disk->blocklen = spdk_nvme_ns_get_sector_size(ns); ++#else + disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns); ++#endif + disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns); + disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns); + +@@ -1356,14 +1396,14 @@ nvme_ctrlr_depopulate_standard_namespace(struct nvme_bdev_ns *nvme_ns) + nvme_ctrlr_depopulate_namespace_done(nvme_ns); + } + +-static void ++void + nvme_ctrlr_populate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *nvme_ns, + struct nvme_async_probe_ctx *ctx) + { + g_populate_namespace_fn[nvme_ns->type](ctrlr, nvme_ns, ctx); + } + +-static void ++void + nvme_ctrlr_depopulate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *nvme_ns) + { + g_depopulate_namespace_fn[nvme_ns->type](nvme_ns); +@@ -1579,8 +1619,10 @@ nvme_bdev_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr, + sizeof(struct nvme_io_channel), + name); + ++#ifndef SPDK_CONFIG_APP_RW + nvme_bdev_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_bdev_ctrlr, + g_opts.nvme_adminq_poll_period_us); ++#endif + + TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nvme_bdev_ctrlr, tailq); + +@@ -1618,7 +1660,7 @@ err_alloc_namespaces: + return rc; + } + +-static void ++void + attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) + { +@@ -1669,7 +1711,7 @@ _nvme_bdev_ctrlr_destruct(void *ctx) + nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); + } + +-static void ++void + remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) + { + struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = cb_ctx; +@@ -2174,6 +2216,9 @@ bdev_nvme_library_init(void) + bdev_nvme_poll_group_destroy_cb, + sizeof(struct nvme_bdev_poll_group), "bdev_nvme_poll_groups"); + ++#ifdef SPDK_CONFIG_APP_RW ++ return bdev_probe_ctrlr(); ++#endif + return 0; + } + +@@ -2363,11 +2408,14 @@ bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) + } + } + +-static void ++void + bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl) + { + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); + ++#ifdef SPDK_CONFIG_APP_RW ++ spdk_bdev_set_io_location(ref, (uint8_t)LOCAL_LIBSTORAGE_FROM_DISK); ++#endif + spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); + } + +@@ -2412,7 +2460,7 @@ bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl) + spdk_thread_send_msg(bio->orig_thread, bdev_nvme_admin_passthru_completion, bio); + } + +-static void ++void + bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset) + { + struct nvme_bdev_io *bio = ref; +@@ -2429,7 +2477,7 @@ bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset) + } + } + +-static int ++int + bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length) + { + struct nvme_bdev_io *bio = ref; +@@ -2979,4 +3027,221 @@ bdev_nvme_get_ctrlr(struct spdk_bdev *bdev) + return SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk)->nvme_ns->ctrlr->ctrlr; + } + ++#ifdef SPDK_CONFIG_APP_RW ++void * ++nvme_channel_get_group(void *io_ch) ++{ ++ struct nvme_io_channel *nvme_io_ch = io_ch; ++ return nvme_io_ch->group; ++} ++struct nvme_bdev_io *nvme_bdev_io_update_args(struct nvme_bdev_io *bio, struct iovec *iov, ++ int iovcnt) ++{ ++ bio->iovs = iov; ++ bio->iovcnt = iovcnt; ++ bio->iovpos = 0; ++ bio->iov_offset = 0; ++ return bio; ++} ++ ++struct nvme_probe_ctx *bdev_nvme_create_probe_ctx(struct spdk_nvme_transport_id *trid, ++ const char *base_name, const char *hostnqn) ++{ ++ struct nvme_probe_ctx *probe_ctx = calloc(1, sizeof(*probe_ctx)); ++ if (probe_ctx == NULL) { ++ SPDK_ERRLOG("Failed to allocate probe_ctx\n"); ++ return NULL; ++ } ++ ++ probe_ctx->count = 1; ++ probe_ctx->trids[0] = *trid; ++ probe_ctx->names[0] = base_name; ++ probe_ctx->hostnqn = hostnqn; ++ return probe_ctx; ++} ++ ++bool ++probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, ++ struct spdk_nvme_ctrlr_opts *opts) ++{ ++ struct nvme_probe_ctx *ctx = cb_ctx; ++ ++ SPDK_DEBUGLOG(nvme, "Probing device %s\n", trid->traddr); ++ ++ if (nvme_bdev_ctrlr_get(trid)) { ++ SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", ++ trid->traddr); ++ return false; ++ } ++ ++ if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { ++ bool claim_device = false; ++ size_t i; ++ ++ for (i = 0; i < ctx->count; i++) { ++ if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) { ++ claim_device = true; ++ break; ++ } ++ } ++ ++ if (!claim_device) { ++ SPDK_DEBUGLOG(nvme, "Not claiming device at %s\n", trid->traddr); ++ return false; ++ } ++ } ++ ++ if (ctx->hostnqn) { ++ snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", ctx->hostnqn); ++ } ++ ++ opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst; ++ opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight; ++ opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight; ++ opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight; ++ ++ return true; ++} ++ ++int bdev_probe_ctrlr(void) ++{ ++ struct spdk_conf_section *sp; ++ const char *val; ++ int rc = 0; ++ int64_t intval = 0; ++ size_t i; ++ struct nvme_probe_ctx *probe_ctx = NULL; ++ int retry_count; ++ uint32_t local_nvme_num = 0; ++ ++ sp = spdk_conf_find_section(NULL, "Nvme"); ++ if (sp == NULL) { ++ SPDK_ERRLOG("config file does not contain [Nvme] section, which need to be provided\n"); ++ goto end; ++ } ++ ++ probe_ctx = calloc(1, sizeof(*probe_ctx)); ++ if (probe_ctx == NULL) { ++ SPDK_ERRLOG("Failed to allocate probe_ctx\n"); ++ rc = -1; ++ goto end; ++ } ++ ++ retry_count = spdk_conf_section_get_intval(sp, "RetryCount"); ++ if (retry_count >= 0) { ++ g_opts.retry_count = retry_count; ++ } ++ if (retry_count > 255) { ++ SPDK_WARNLOG("RetryCount:%d should not be greater than 255, set it to 255 this time\n", ++ retry_count); ++ retry_count = 255; ++ } ++ syslog(LOG_INFO, "RetryCount is set to %d\n", retry_count); ++ ++ val = spdk_conf_section_get_val(sp, "TimeoutUsec"); ++ if (val != NULL) { ++ intval = spdk_strtoll(val, 10); ++ if (intval < 0) { ++ SPDK_ERRLOG("Invalid TimeoutUsec value\n"); ++ rc = -1; ++ goto end; ++ } ++ } ++ syslog(LOG_INFO, "TimeoutUsec is set to %ld\n", intval); ++ g_opts.timeout_us = intval; ++ ++ if (g_opts.timeout_us > 0) { ++ val = spdk_conf_section_get_val(sp, "ActionOnTimeout"); ++ if (val != NULL) { ++ if (!strcasecmp(val, "Reset")) { ++ g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET; ++ } else if (!strcasecmp(val, "Abort")) { ++ g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT; ++ } ++ } ++ } ++ ++ intval = spdk_conf_section_get_intval(sp, "AdminPollRate"); ++ if (intval > 0) { ++ g_opts.nvme_adminq_poll_period_us = intval; ++ } ++ syslog(LOG_INFO, "AdminPollRate is set to %lu\n", g_opts.nvme_adminq_poll_period_us); ++ intval = spdk_conf_section_get_intval(sp, "IOPollRate"); ++ if (intval > 0) { ++ g_opts.nvme_ioq_poll_period_us = intval; ++ } ++ ++ g_opts.delay_cmd_submit = spdk_conf_section_get_boolval(sp, "DelayCmdSubmit", ++ SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT); ++ ++ for (i = 0; i < NVME_MAX_CONTROLLERS; i++) { ++ val = spdk_conf_section_get_nmval(sp, "TransportID", i, 0); ++ if (val == NULL) { ++ break; ++ } ++ ++ rc = spdk_nvme_transport_id_parse(&probe_ctx->trids[i], val); ++ if (rc < 0) { ++ SPDK_ERRLOG("Unable to parse TransportID: %s\n", val); ++ rc = -1; ++ goto end; ++ } ++ ++ rc = spdk_nvme_host_id_parse(&probe_ctx->hostids[i], val); ++ if (rc < 0) { ++ SPDK_ERRLOG("Unable to parse HostID: %s\n", val); ++ rc = -1; ++ goto end; ++ } ++ ++ val = spdk_conf_section_get_nmval(sp, "TransportID", i, 1); ++ if (val == NULL) { ++ SPDK_ERRLOG("No name provided for TransportID\n"); ++ rc = -1; ++ goto end; ++ } ++ ++ probe_ctx->names[i] = val; ++ ++ val = spdk_conf_section_get_nmval(sp, "TransportID", i, 2); ++ if (val != NULL) { ++ rc = spdk_nvme_prchk_flags_parse(&probe_ctx->prchk_flags[i], val); ++ if (rc < 0) { ++ SPDK_ERRLOG("Unable to parse prchk: %s\n", val); ++ rc = -1; ++ goto end; ++ } ++ } ++ ++ probe_ctx->count++; ++ ++ if (probe_ctx->trids[i].trtype == SPDK_NVME_TRANSPORT_PCIE) { ++ local_nvme_num++; ++ } ++ } ++ ++ if (local_nvme_num > 0) { ++ /* used to probe local NVMe device */ ++ if (spdk_nvme_probe(NULL, probe_ctx, probe_cb, attach_cb, remove_cb)) { ++ rc = -1; ++ goto end; ++ } ++ ++ for (i = 0; i < probe_ctx->count; i++) { ++ if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) { ++ continue; ++ } ++ ++ if (!nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) { ++ SPDK_ERRLOG("NVMe SSD \"%s\" could not be found.\n", probe_ctx->trids[i].traddr); ++ SPDK_ERRLOG("Check PCIe BDF and that it is attached to UIO/VFIO driver.\n"); ++ } ++ } ++ } ++end: ++ free(probe_ctx); ++ return rc; ++} ++#endif ++ + SPDK_LOG_REGISTER_COMPONENT(bdev_nvme) +diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h +index e789371..4c81466 100644 +--- a/module/bdev/nvme/bdev_nvme.h ++++ b/module/bdev/nvme/bdev_nvme.h +@@ -42,6 +42,9 @@ + + #include "common.h" + ++struct nvme_bdev_io; ++struct nvme_probe_ctx; ++ + enum spdk_bdev_timeout_action { + SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE = 0, + SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET, +@@ -89,4 +92,43 @@ struct spdk_nvme_ctrlr *bdev_nvme_get_ctrlr(struct spdk_bdev *bdev); + */ + int bdev_nvme_delete(const char *name); + ++#ifdef SPDK_CONFIG_APP_RW ++void ++bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl); ++ ++void ++bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset); ++ ++int ++bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length); ++ ++bool ++probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, ++ struct spdk_nvme_ctrlr_opts *opts); ++ ++void ++attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, ++ struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts); ++ ++void ++remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); ++ ++void ++nvme_ctrlr_populate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns, ++ struct nvme_async_probe_ctx *ctx); ++ ++void ++nvme_ctrlr_depopulate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns); ++ ++int ++bdev_probe_ctrlr(void); ++ ++struct nvme_bdev_io * ++nvme_bdev_io_update_args(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt); ++ ++struct nvme_probe_ctx * ++bdev_nvme_create_probe_ctx(struct spdk_nvme_transport_id *trid, const char *base_name, ++ const char *hostnqn); ++#endif ++ + #endif /* SPDK_BDEV_NVME_H */ +diff --git a/module/bdev/nvme/bdev_nvme_self.c b/module/bdev/nvme/bdev_nvme_self.c +new file mode 100644 +index 0000000..7371ecb +--- /dev/null ++++ b/module/bdev/nvme/bdev_nvme_self.c +@@ -0,0 +1,661 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++#include "bdev_nvme.h" ++ ++#include "spdk/json.h" ++#include "spdk/likely.h" ++#include "spdk/bdev_module.h" ++#include "spdk/nvme_ocssd.h" ++#include "spdk/nvme.h" ++ ++#include "spdk_internal/bdev_stat.h" ++#include "bdev_nvme_self.h" ++#include "common.h" ++#include ++ ++enum data_direction { ++ BDEV_DISK_READ = 0, ++ BDEV_DISK_WRITE = 1 ++}; ++ ++void bdev_update_ch_timeout(struct nvme_bdev_poll_group *group) ++{ ++ uint64_t current_ticks = 0; ++ uint64_t poll_ticks = 0; ++ int64_t poll_time = 0; ++ ++ current_ticks = spdk_get_ticks(); ++ ++ if (spdk_unlikely(g_polltime_threshold)) { ++ if (group->save_start_ticks) { ++ poll_ticks = current_ticks - group->save_start_ticks; ++ poll_time = (poll_ticks * 1000ULL) / spdk_get_ticks_hz(); ++ if (poll_time >= g_polltime_threshold) { ++ group->num_poll_timeout++; ++ SPDK_WARNLOG("group[%p] poll timeout in %ldms", group, poll_time); ++ } ++ } ++ group->save_start_ticks = current_ticks; ++ } ++} ++ ++int ++_bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) ++{ ++ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); ++ ++ if (nvme_ch->qpair == NULL) { ++ /* The device is currently resetting */ ++ return -1; ++ } ++ ++ switch (bdev_io->type) { ++ case SPDK_BDEV_IO_TYPE_READ_NVME: ++ SPDK_DEBUGLOG(bdev_nvme, "read %lu lbas with offset %#lx\n", bdev_io->u.contig.num_blocks, ++ bdev_io->u.contig.offset_blocks); ++ return bdev_nvme_queue_cmd_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, ++ bdev_io->driver_ctx, bdev_io->u.contig.buf, ++ bdev_io->u.contig.md_buf, BDEV_DISK_READ, ++ bdev_io->u.contig.num_blocks, bdev_io->u.contig.offset_blocks); ++ case SPDK_BDEV_IO_TYPE_WRITE_NVME: ++ SPDK_DEBUGLOG(bdev_nvme, "write %lu lbas with offset %#lx\n", bdev_io->u.contig.num_blocks, ++ bdev_io->u.contig.offset_blocks); ++ return bdev_nvme_queue_cmd_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, ++ bdev_io->driver_ctx, bdev_io->u.contig.buf, ++ bdev_io->u.contig.md_buf, BDEV_DISK_WRITE, ++ bdev_io->u.contig.num_blocks, bdev_io->u.contig.offset_blocks); ++ case SPDK_BDEV_IO_TYPE_READV_NVME: ++ SPDK_DEBUGLOG(bdev_nvme, "readv %lu lbas with offset %#lx\n", bdev_io->u.bdev.num_blocks, ++ bdev_io->u.bdev.offset_blocks); ++ return bdev_nvme_queue_cmd_v_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, ++ bdev_io->driver_ctx, BDEV_DISK_READ, ++ bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, ++ bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); ++ case SPDK_BDEV_IO_TYPE_WRITEV_NVME: ++ SPDK_DEBUGLOG(bdev_nvme, "writev %lu lbas with offset %#lx\n", bdev_io->u.bdev.num_blocks, ++ bdev_io->u.bdev.offset_blocks); ++ return bdev_nvme_queue_cmd_v_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, ++ bdev_io->driver_ctx, BDEV_DISK_WRITE, ++ bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, ++ bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); ++ case SPDK_BDEV_IO_TYPE_UNMAP_BLOCKS: ++ return bdev_nvme_unmap_blocks((struct nvme_bdev *)bdev_io->bdev->ctxt, ++ ch, ++ (void *)bdev_io->driver_ctx, ++ (struct spdk_nvme_dsm_range *)bdev_io->u.contig.buf, ++ bdev_io->u.contig.num_blocks); ++ default: ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++int ++bdev_nvme_dump_info_json_self(void *ctx, struct spdk_json_write_ctx *w) ++{ ++ return 0; ++} ++ ++uint16_t ++bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch) ++{ ++ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); ++ uint16_t channel_id; ++ struct spdk_nvme_qpair *qpair = nvme_ch->qpair; ++ channel_id = spdk_nvme_get_qpair_id(qpair); ++ return channel_id; ++} ++ ++uint64_t ++bdev_nvme_get_timeout_count(struct spdk_io_channel *ch) ++{ ++ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); ++ return nvme_ch->group->num_poll_timeout; ++} ++ ++int32_t ++nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr) ++{ ++ uint32_t num_ctrlr = 0, i = 0; ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ struct nvme_ctrlr_info *pCtrlrInfo = NULL; ++ const struct spdk_nvme_ctrlr_data *cdata = NULL; ++ struct spdk_nvme_ctrlr_opts *opts = NULL; ++ ++ struct spdk_pci_device *pci_dev = NULL; ++ int rc; ++ ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ num_ctrlr++; ++ } ++ if (num_ctrlr == 0) { ++ SPDK_NOTICELOG("No any nvme controller.\n"); ++ return 0; ++ } ++ if (ctrlName != NULL) { ++ num_ctrlr = 1; ++ } ++ pCtrlrInfo = calloc(num_ctrlr, sizeof(struct nvme_ctrlr_info)); ++ if (pCtrlrInfo == NULL) { ++ SPDK_ERRLOG("Failed to alloc memory for getting controller infomation.\n"); ++ return -1; ++ } ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ if (i >= num_ctrlr) { /* prevent having controllers be added or deleted */ ++ i++; ++ continue; ++ } ++ if (ctrlName != NULL) { ++ if (strcmp(nvme_bdev_ctrlr->name, ctrlName) != 0) { ++ continue; ++ } ++ } ++ cdata = spdk_nvme_ctrlr_get_data(nvme_bdev_ctrlr->ctrlr); ++ opts = spdk_nvme_ctrlr_get_opts(nvme_bdev_ctrlr->ctrlr); ++ pci_dev = spdk_nvme_ctrlr_get_pci_device(nvme_bdev_ctrlr->ctrlr); ++ if (pci_dev == NULL) { ++ SPDK_ERRLOG("Failed to get pci device\n"); ++ break; ++ } ++ rc = strcpy_s(pCtrlrInfo[i].ctrlName, sizeof(pCtrlrInfo[i].ctrlName), nvme_bdev_ctrlr->name); ++ if (rc != 0) { ++ SPDK_ERRLOG("String copy failed\n"); ++ } ++ rc = strcpy_s(pCtrlrInfo[i].pciAddr, sizeof(pCtrlrInfo[i].pciAddr), ++ nvme_bdev_ctrlr->connected_trid->traddr); ++ if (rc != 0) { ++ SPDK_ERRLOG("String copy failed\n"); ++ } ++ ++ rc = memcpy_s(pCtrlrInfo[i].sn, sizeof(pCtrlrInfo[i].sn), cdata->sn, 20); ++ if (rc != 0) { ++ SPDK_ERRLOG("Memory copy failed\n"); ++ } ++ ++ rc = memcpy_s(pCtrlrInfo[i].fr, sizeof(pCtrlrInfo[i].fr), cdata->fr, 8); ++ if (rc != 0) { ++ SPDK_ERRLOG("Memory copy failed\n"); ++ } ++ ++ rc = memcpy_s(pCtrlrInfo[i].mn, sizeof(pCtrlrInfo[i].mn), cdata->mn, 40); ++ if (rc != 0) { ++ SPDK_ERRLOG("Memory copy failed\n"); ++ } ++ ++ pCtrlrInfo[i].trtype = (uint16_t)nvme_bdev_ctrlr->connected_trid->trtype; ++ pCtrlrInfo[i].tnvmcap = cdata->tnvmcap[0]; ++ pCtrlrInfo[i].unvmcap = cdata->unvmcap[0]; ++ pCtrlrInfo[i].support_ns = cdata->oacs.ns_manage; ++ pCtrlrInfo[i].directives = cdata->oacs.directives; ++ pCtrlrInfo[i].dsm = cdata->oncs.dsm; ++ pCtrlrInfo[i].max_num_ns = cdata->nn; ++ pCtrlrInfo[i].num_io_queues = opts->num_io_queues; ++ pCtrlrInfo[i].io_queue_size = opts->io_queue_size; ++ pCtrlrInfo[i].device_id = spdk_pci_device_get_device_id(pci_dev); ++ pCtrlrInfo[i].subdevice_id = spdk_pci_device_get_subdevice_id(pci_dev); ++ pCtrlrInfo[i].vid = cdata->vid; ++ pCtrlrInfo[i].ssvid = cdata->ssvid; ++ pCtrlrInfo[i].ctrlid = cdata->cntlid; ++ pCtrlrInfo[i].version = spdk_nvme_ctrlr_get_regs_vs(nvme_bdev_ctrlr->ctrlr).raw; ++ i++; ++ if (ctrlName != NULL) { ++ break; ++ } ++ } ++ if (i != num_ctrlr) { ++ SPDK_ERRLOG("It has controller been added or deleted when fetched infomation, please try again later.\n"); ++ free(pCtrlrInfo); ++ return -1; ++ } ++ *ppCtrlr = pCtrlrInfo; ++ return num_ctrlr; ++} ++ ++struct nvme_bdev_ctrlr * ++nvme_ctrlr_get_by_name(const char *name) ++{ ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ ++ if (name == NULL) { ++ return NULL; ++ } ++ ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ if (strcmp(name, nvme_bdev_ctrlr->name) == 0) { ++ return nvme_bdev_ctrlr; ++ } ++ } ++ ++ return NULL; ++} ++ ++struct spdk_nvme_ctrlr * ++spdk_nvme_ctrlr_get_by_name(const char *ctrlname) ++{ ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ if (strcmp(nvme_bdev_ctrlr->name, ctrlname) == 0) { ++ return nvme_bdev_ctrlr->ctrlr; ++ } ++ } ++ ++ return NULL; ++} ++ ++struct spdk_nvme_ctrlr * ++spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) ++{ ++ if (nvme_bdev_ctrlr == NULL) { ++ return NULL; ++ } ++ return nvme_bdev_ctrlr->ctrlr; ++} ++ ++void ++nvme_ctrlr_clear_iostat_by_name(const char *ctrlname) ++{ ++ int i; ++ size_t size = strnlen(ctrlname, 24); ++ ++ for (i = 0; i < STAT_MAX_NUM; i++) { ++ if (strncmp(g_io_stat_map[i].bdev_name, ctrlname, size) == 0) { ++ if ((g_io_stat_map[i].bdev_name[size] == 'n') && isdigit(g_io_stat_map[i].bdev_name[size + 1])) { ++ g_io_stat_map[i].channel_id = 0; ++ memset(g_io_stat_map[i].bdev_name, 0, sizeof(g_io_stat_map[i].bdev_name)); ++ g_io_stat_map[i].num_read_ops = 0; ++ g_io_stat_map[i].num_write_ops = 0; ++ g_io_stat_map[i].bytes_read = 0; ++ g_io_stat_map[i].bytes_written = 0; ++ g_io_stat_map[i].io_outstanding = 0; ++ g_io_stat_map[i].read_latency_ticks = 0; ++ g_io_stat_map[i].write_latency_ticks = 0; ++ g_io_stat_map[i].io_ticks = 0; ++ /* used flag set false in last avoid race in channel create */ ++ g_io_stat_map[i].used = false; ++ } ++ } ++ } ++} ++ ++void ++nvme_ctrlr_clear_iostat_all(void) ++{ ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ nvme_ctrlr_clear_iostat_by_name(nvme_bdev_ctrlr->name); ++ } ++} ++ ++struct spdk_nvme_ns * ++bdev_nvme_get_ns(struct nvme_bdev *nbdev) ++{ ++ return nbdev->nvme_ns->ns; ++} ++ ++void bdev_nvme_update_block_by_nvme_ctrlr(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ uint32_t i; ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ struct nvme_bdev_ns *ns = NULL; ++ struct nvme_bdev *nvme_bdev = NULL, *tmp = NULL; ++ ++ ++ pthread_mutex_lock(&g_bdev_nvme_mutex); ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ if (nvme_bdev_ctrlr->ctrlr != ctrlr) { ++ continue; ++ } ++ ++ pthread_mutex_unlock(&g_bdev_nvme_mutex); ++ for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { ++ ns = nvme_bdev_ctrlr->namespaces[i]; ++ TAILQ_FOREACH_SAFE(nvme_bdev, &ns->bdevs, tailq, tmp) { ++ nvme_bdev->disk.blocklen = spdk_nvme_ns_get_sector_size(nvme_bdev->nvme_ns->ns); ++ nvme_bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(nvme_bdev->nvme_ns->ns); ++ } ++ } ++ return; ++ } ++ pthread_mutex_unlock(&g_bdev_nvme_mutex); ++} ++ ++int ++bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++{ ++ struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr; ++ struct nvme_bdev_ns *ns = NULL; ++ ++ if (nvme_bdev_ctrlr == NULL || nsid > nvme_bdev_ctrlr->num_ns) { ++ SPDK_ERRLOG("Parameter error. nsid[%u], the max nsid is[%u]\n", nsid, nvme_bdev_ctrlr->num_ns); ++ return -1; ++ } ++ ++ ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; ++ ++ if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) { ++ ns->type = NVME_BDEV_NS_OCSSD; ++ } else { ++ ns->type = NVME_BDEV_NS_STANDARD; ++ } ++ ++ if (!ns->populated && spdk_nvme_ctrlr_is_active_ns(nvme_bdev_ctrlr->ctrlr, nsid)) { ++ SPDK_NOTICELOG("NSID %u to be added\n", nsid); ++ ns->id = nsid; ++ ns->ctrlr = nvme_bdev_ctrlr; ++ TAILQ_INIT(&ns->bdevs); ++ /* add a new bdev device in this ns */ ++ nvme_ctrlr_populate_namespace(nvme_bdev_ctrlr, ns, NULL); ++ return 0; ++ } ++ ++ if (ns->populated && !spdk_nvme_ctrlr_is_active_ns(nvme_bdev_ctrlr->ctrlr, nsid)) { ++ SPDK_NOTICELOG("NSID %u is removed\n", nsid); ++ nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); ++ return 0; ++ } ++ return -1; ++} ++ ++bool ++spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++{ ++ struct nvme_bdev_ns *ns = NULL; ++ struct nvme_bdev *bdev = NULL, *tmp = NULL; ++ bool empty = false; ++ ++ ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; ++ if (ns == NULL) { ++ return true; ++ } ++ ++ TAILQ_FOREACH_SAFE(bdev, &ns->bdevs, tailq, tmp) { ++ pthread_mutex_lock(&bdev->disk.internal.mutex); ++ empty = TAILQ_EMPTY(&bdev->disk.internal.open_descs); ++ /* for each bdev in ns, we need to check if any descs is in tailq */ ++ if (empty) { ++ /* one bdev is empty, check next until all bdev is checked */ ++ bdev->disk.internal.ns_status = SPDK_BDEV_NS_STATUS_REMOVING; ++ pthread_mutex_unlock(&bdev->disk.internal.mutex); ++ } else { ++ /* means at least one bdev is used, so we just quit this process ++ and mark the status is false. */ ++ pthread_mutex_unlock(&bdev->disk.internal.mutex); ++ break; ++ } ++ } ++ return empty; ++} ++ ++void ++spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++{ ++ struct nvme_bdev_ns *ns = NULL; ++ struct nvme_bdev *bdev = NULL, *tmp = NULL; ++ ++ ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; ++ if (ns == NULL) { ++ return; ++ } ++ ++ TAILQ_FOREACH_SAFE(bdev, &ns->bdevs, tailq, tmp) { ++ pthread_mutex_lock(&bdev->disk.internal.mutex); ++ /* set the ns_status to ready case ns delete fail */ ++ if (bdev->disk.internal.ns_status == SPDK_BDEV_NS_STATUS_REMOVING) { ++ bdev->disk.internal.ns_status = SPDK_BDEV_NS_STATUS_READY; ++ } ++ pthread_mutex_unlock(&bdev->disk.internal.mutex); ++ } ++} ++ ++int ++bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, void *driver_ctx, ++ void *buffer, void *metadata, int direction, uint64_t lba_count, uint64_t lba) ++{ ++ int rc; ++ uint32_t io_flags = 0; ++ uint8_t *bdev_io_action = (uint8_t *)driver_ctx; ++ /* filter bit 0&1 of io->pi_action to get pi_action */ ++ uint8_t pi_action = bdev_io_action[SPDK_BDEV_IO_ACTION_PI] & 0x03; ++ uint8_t dif_flag = bdev_io_action[SPDK_BDEV_IO_ACTION_PI]; ++ uint8_t fua = bdev_io_action[SPDK_BDEV_IO_ACTION_FUA]; ++ uint32_t pi_type; ++ ++ spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); ++ ++ if (pi_action > IO_NO_PROTECTION) { ++ pi_type = spdk_nvme_ns_get_pi_type(bdev->nvme_ns->ns); ++ if (dif_flag & FLAG_PRCHK) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; ++ } ++ /* type3 not support ref tag */ ++ if (!(dif_flag & FLAG_NO_REF) && (pi_type != SPDK_NVME_FMT_NVM_PROTECTION_TYPE3)) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; ++ } ++ if (pi_action == IO_HALF_WAY_PROTECTION) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRACT; ++ } ++ } ++ ++ if (fua) { ++ io_flags |= SPDK_NVME_IO_FLAGS_FORCE_UNIT_ACCESS; ++ } ++ ++ if (direction == BDEV_DISK_READ) { ++ rc = spdk_nvme_ns_cmd_read_with_md(bdev->nvme_ns->ns, qpair, buffer, metadata, lba, ++ lba_count, bdev_nvme_queued_done, driver_ctx, io_flags, 0, 0); ++ } else { ++ rc = spdk_nvme_ns_cmd_write_with_md(bdev->nvme_ns->ns, qpair, buffer, metadata, lba, ++ lba_count, bdev_nvme_queued_done, driver_ctx, io_flags, 0, 0); ++ } ++ ++ if (rc != 0) { ++ if (rc == -ENOMEM) { ++ SPDK_NOTICELOG("%s failed: rc = %d\n", direction == BDEV_DISK_READ ? "read" : "write", rc); ++ } else { ++ SPDK_ERRLOG("%s failed: rc = %d, qpair is %p\n", direction == BDEV_DISK_READ ? "read" : "write", ++ rc, qpair); ++ } ++ } ++ return rc; ++} ++ ++int ++bdev_nvme_queue_cmd_v_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, ++ void *driver_ctx, ++ int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba) ++{ ++ int rc; ++ struct nvme_bdev_io *bio = NULL; ++ uint32_t io_flags = 0; ++ uint8_t *bdev_io_action = (uint8_t *)driver_ctx; ++ /* filter bit 0&1 of io->pi_action to get pi_action */ ++ uint8_t pi_action = bdev_io_action[SPDK_BDEV_IO_ACTION_PI] & 0x03; ++ uint8_t dif_flag = bdev_io_action[SPDK_BDEV_IO_ACTION_PI]; ++ uint8_t fua = bdev_io_action[SPDK_BDEV_IO_ACTION_FUA]; ++ uint32_t pi_type; ++ ++ spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); ++ ++ if (pi_action > IO_NO_PROTECTION) { ++ pi_type = spdk_nvme_ns_get_pi_type(bdev->nvme_ns->ns); ++ if (dif_flag & FLAG_PRCHK) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; ++ } ++ /* type3 not support ref tag */ ++ if (!(dif_flag & FLAG_NO_REF) && (pi_type != SPDK_NVME_FMT_NVM_PROTECTION_TYPE3)) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; ++ } ++ if (pi_action == IO_HALF_WAY_PROTECTION) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRACT; ++ } ++ } ++ ++ if (fua) { ++ io_flags |= SPDK_NVME_IO_FLAGS_FORCE_UNIT_ACCESS; ++ } ++ ++ bio = nvme_bdev_io_update_args((struct nvme_bdev_io *)driver_ctx, iov, iovcnt); ++ ++ if (direction == BDEV_DISK_READ) { ++ rc = spdk_nvme_ns_cmd_readv(bdev->nvme_ns->ns, qpair, lba, ++ lba_count, bdev_nvme_queued_done, bio, io_flags, ++ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); ++ } else { ++ rc = spdk_nvme_ns_cmd_writev(bdev->nvme_ns->ns, qpair, lba, lba_count, ++ 0, bdev_nvme_queued_done, bio, io_flags, ++ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); ++ } ++ ++ if (rc != 0) { ++ if (rc == -ENOMEM) { ++ SPDK_NOTICELOG("%s failed: rc = %d\n", direction == BDEV_DISK_READ ? "readv" : "writev", rc); ++ } else { ++ SPDK_ERRLOG("%s failed: rc = %d, qpair is %p\n", direction == BDEV_DISK_READ ? "read" : "write", rc, ++ qpair); ++ } ++ } ++ return rc; ++} ++ ++struct nvme_bdev_ctrlr * ++bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc) ++{ ++ struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(bdev_desc); ++ struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev->ctxt; ++ if (nbdev == NULL) { ++ return NULL; ++ } ++ return nbdev->nvme_ns->ctrlr; ++} ++ ++int ++bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void *driver_ctx, ++ struct spdk_nvme_dsm_range *unmap_d, uint16_t unmap_count) ++{ ++ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); ++ int i; ++ ++ if (unmap_count == 0 || unmap_count > SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES) { ++ SPDK_ERRLOG("Invalid parameter, unmap count: %u\n", unmap_count); ++ return -EINVAL; ++ } ++ ++ if (unmap_d == NULL) { ++ return -EINVAL; ++ } ++ ++ for (i = 0; i < unmap_count; i++) { ++ if (unmap_d[i].length > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) { ++ SPDK_ERRLOG("Invalid parameter, unmap block count: %u\n", unmap_d[i].length); ++ return -EINVAL; ++ } ++ unmap_d[i].attributes.raw = 0; ++ } ++ ++ spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); ++ return spdk_nvme_ns_cmd_unmap_blocks(nbdev->nvme_ns->ns, nvme_ch->qpair, ++ SPDK_NVME_DSM_ATTR_DEALLOCATE, ++ unmap_d, unmap_count, ++ bdev_nvme_queued_done, driver_ctx); ++} ++ ++void ++spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr) ++{ ++ remove_cb(cb_ctx, (struct spdk_nvme_ctrlr *)ctrlr); ++} ++ ++void spdk_bdev_fail_ctrlr(const char *traddr) ++{ ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; ++ ++ pthread_mutex_lock(&g_bdev_nvme_mutex); ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ if (strcmp(nvme_bdev_ctrlr->connected_trid->traddr, traddr) == 0) { ++ spdk_nvme_ctrlr_fail(nvme_bdev_ctrlr->ctrlr); ++ remove_cb(NULL, nvme_bdev_ctrlr->ctrlr); ++ return; ++ } ++ } ++} ++ ++int ++spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, ++ const char *base_name, ++ const char **names, size_t *count, ++ const char *hostnqn) ++{ ++ struct nvme_probe_ctx *probe_ctx; ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; ++ struct nvme_bdev_ns *ns; ++ struct nvme_bdev *nvme_bdev; ++ struct nvme_bdev *tmp = NULL; ++ uint32_t i, nsid; ++ size_t j; ++ ++ if (nvme_bdev_ctrlr_get(trid) != NULL) { ++ SPDK_ERRLOG("A controller with the trid (traddr: %s) already exists.\n", trid->traddr); ++ return -1; ++ } ++ ++ probe_ctx = bdev_nvme_create_probe_ctx(trid, base_name, hostnqn); ++ if (probe_ctx == NULL) { ++ SPDK_ERRLOG("Failed to create probe_ctx\n"); ++ return -1; ++ } ++ ++ if (spdk_nvme_probe(trid, probe_ctx, probe_cb, attach_cb, NULL)) { ++ SPDK_ERRLOG("Failed to probe for new devices\n"); ++ free(probe_ctx); ++ return -1; ++ } ++ ++ nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(trid); ++ if (!nvme_bdev_ctrlr) { ++ SPDK_ERRLOG("Failed to find new NVMe controller\n"); ++ free(probe_ctx); ++ return -1; ++ } ++ ++ /* ++ * Report the new bdevs that were created in this call. ++ * There can be more than one bdev per NVMe controller since one bdev is created per namespace. ++ */ ++ j = 0; ++ for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { ++ nsid = i + 1; ++ ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; ++ ++ if (!ns->populated) { ++ continue; ++ } ++ assert(ns->id == nsid); ++ TAILQ_FOREACH_SAFE(nvme_bdev, &ns->bdevs, tailq, tmp) { ++ if (j < *count) { ++ j++; ++ names[j] = nvme_bdev->disk.name; ++ } else { ++ SPDK_ERRLOG("Maximum number of namespaces is %zu.", *count); ++ free(probe_ctx); ++ return -1; ++ } ++ } ++ } ++ ++ *count = j; ++ ++ free(probe_ctx); ++ return 0; ++} +diff --git a/module/bdev/nvme/bdev_nvme_self.h b/module/bdev/nvme/bdev_nvme_self.h +new file mode 100644 +index 0000000..d7cc587 +--- /dev/null ++++ b/module/bdev/nvme/bdev_nvme_self.h +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++void ++bdev_update_ch_timeout(struct nvme_bdev_poll_group *group); ++ ++int ++_bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io); ++ ++int ++bdev_nvme_dump_info_json_self(void *ctx, struct spdk_json_write_ctx *w); ++ ++uint16_t ++bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch); ++ ++uint64_t ++bdev_nvme_get_timeout_count(struct spdk_io_channel *ch); ++ ++int ++bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, void *driver_ctx, ++ void *buffer, void *metadata, int direction, uint64_t lba_count, uint64_t lba); ++ ++int ++bdev_nvme_queue_cmd_v_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, ++ void *driver_ctx, ++ int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba); ++ ++struct nvme_bdev_ctrlr * ++bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc); ++ ++int ++bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void *driver_ctx, ++ struct spdk_nvme_dsm_range *unmap_d, uint16_t unmap_count); +diff --git a/module/bdev/nvme/common.h b/module/bdev/nvme/common.h +index b7555d5..81b4009 100644 +--- a/module/bdev/nvme/common.h ++++ b/module/bdev/nvme/common.h +@@ -127,6 +127,10 @@ struct nvme_bdev_poll_group { + uint64_t spin_ticks; + uint64_t start_ticks; + uint64_t end_ticks; ++#ifdef SPDK_CONFIG_APP_RW ++ uint64_t save_start_ticks; ++ uint64_t num_poll_timeout; ++#endif + }; + + typedef void (*spdk_bdev_create_nvme_fn)(void *ctx, size_t bdev_count, int rc); +diff --git a/scripts/setup_self.sh b/scripts/setup_self.sh +new file mode 100755 +index 0000000..9e77c29 +--- /dev/null ++++ b/scripts/setup_self.sh +@@ -0,0 +1,347 @@ ++#!/usr/bin/env bash ++ ++set -e ++ ++rootdir=$(readlink -f $(dirname $0))/.. ++ ++function linux_iter_pci { ++ # Argument is the class code ++ # TODO: More specifically match against only class codes in the grep ++ # step. ++ lspci -mm -n -D | grep $1 | tr -d '"' | awk -F " " '{print $1}' ++} ++ ++function linux_bind_driver() { ++ bdf="$1" ++ driver_name="$2" ++ old_driver_name="no driver" ++ ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /') ++ ++ if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then ++ old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver)) ++ ++ if [ "$driver_name" = "$old_driver_name" ]; then ++ return 0 ++ fi ++ ++ echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true ++ echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind" ++ fi ++ ++ echo "$bdf ($ven_dev_id): $old_driver_name -> $driver_name" ++ ++ echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true ++ echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true ++} ++ ++function linux_hugetlbfs_mount() { ++ mount | grep ' type hugetlbfs ' | awk '{ print $3 }' ++} ++ ++function is_device_in_except_device_list() { ++ exists_flag=0 ++ if [ $# -gt 1 ]; then ++ except_dev_list=$2 ++ fi ++ ++ for dev in ${except_dev_list[@]} ++ do ++ if [ "$dev" == "$1" ]; then ++ exists_flag=1 ++ fi ++ done ++ echo ${exists_flag} ++} ++ ++function config_linux_device { ++ if [ $# -gt 0 ]; then ++ configlist=$* ++ echo configure devices $configlist ++ else ++ echo "need to specify at least one device to bind uio driver." ++ exit 1 ++ fi ++ driver_name=uio_pci_generic ++ ++ # NVMe ++ modprobe $driver_name || true ++ for bdf in ${configlist[@]}; do ++ existflag=0 ++ for confbdf in $(linux_iter_pci 0108); do ++ if [ "$bdf" == "$confbdf" ]; then ++ linux_bind_driver "$bdf" "$driver_name" ++ existflag=1 ++ break ++ fi ++ done ++ if [ $existflag -eq 0 ]; then ++ echo "nvme device \"$bdf\" is not in present" ++ fi ++ done ++ config_linux_hugepage ++} ++ ++function configure_linux { ++ if [ $# -gt 0 ]; then ++ exceptdevlist=$* ++ echo configure devices except $exceptdevlist ++ fi ++ # Use uio, Not IOMMU. ++ driver_name=uio_pci_generic ++ ++ # NVMe ++ modprobe $driver_name || true ++ for bdf in $(linux_iter_pci 0108); do ++ need_configure=`is_device_in_except_device_list ${bdf} "${exceptdevlist}"` ++ if [ $need_configure -ne 0 ]; then ++ continue ++ fi ++ linux_bind_driver "$bdf" "$driver_name" ++ done ++ ++ echo "1" > "/sys/bus/pci/rescan" ++ ++ config_linux_hugepage ++} ++ ++function config_linux_hugepage { ++ hugetlbfs_mount=$(linux_hugetlbfs_mount) ++ ++ if [ -z "$hugetlbfs_mount" ]; then ++ hugetlbfs_mount=/mnt/huge ++ echo "Mounting hugetlbfs at $hugetlbfs_mount" ++ mkdir -p "$hugetlbfs_mount" ++ mount -t hugetlbfs nodev "$hugetlbfs_mount" ++ fi ++ echo "$NRHUGE" > /proc/sys/vm/nr_hugepages ++} ++ ++function reset_linux { ++ # NVMe ++ modprobe nvme || true ++ for bdf in $(linux_iter_pci 0108); do ++ linux_bind_driver "$bdf" nvme ++ done ++ ++ echo "1" > "/sys/bus/pci/rescan" ++ ++ hugetlbfs_mount=$(linux_hugetlbfs_mount) ++ rm -f "$hugetlbfs_mount"/spdk*map_* ++} ++ ++function status_linux { ++ echo "NVMe devices" ++ ++ echo -e "BDF\t\tNuma Node\tDriver name\t\tDevice name" ++ for bdf in $(linux_iter_pci 0108); do ++ driver=`grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}'` ++ node=`cat /sys/bus/pci/devices/$bdf/numa_node`; ++ if [ "$driver" = "nvme" ]; then ++ if [ -d "/sys/bus/pci/devices/$bdf/nvme" ]; then ++ name="\t"`ls /sys/bus/pci/devices/$bdf/nvme`; ++ else ++ name="\t"`ls /sys/bus/pci/devices/$bdf/misc`; ++ fi ++ else ++ name="-"; ++ fi ++ echo -e "$bdf\t$node\t\t$driver\t\t$name"; ++ done ++} ++ ++function reset_device_linux { ++ #NVMe ++ if [ $# -gt 0 ]; then ++ resetdevlist=$* ++ echo reset nvme devices $resetdevlist ++ else ++ echo no devices to reset ++ return ++ fi ++ ++ for bdf in ${resetdevlist[@]}; do ++ exist=0 ++ for existbdf in $(linux_iter_pci 0108); do ++ if [[ "$existbdf" == "$bdf" ]]; then ++ exist=1 ++ fi ++ done ++ ++ if [ $exist -eq 0 ]; then ++ echo nvme device \"$bdf\" is not in present ++ continue ++ fi ++ ++ linux_bind_driver "$bdf" nvme ++ done ++} ++ ++function reset_all_linux { ++ # NVMe ++ echo "1" > "/sys/bus/pci/rescan" ++ reset_device_linux $(linux_iter_pci 0108) ++ ++ hugetlbfs_mount=$(linux_hugetlbfs_mount) ++ rm -f "$hugetlbfs_mount"/spdk*map_* ++} ++ ++function help_linux { ++ # NVMe ++ echo "" ++ echo "setup.sh" ++ echo "setup.sh config" ++ echo "setup.sh status" ++ echo "setup.sh reset" ++ echo "setup.sh hugepage" ++ echo "setup.sh config except_device=\"pci_addr\"" ++ echo "setup.sh config except_device=\"pci_addr1,pci_addr2,pci_addr3,...\"" ++ echo "setup.sh config_device \"pci_addr\"" ++ echo "setup.sh config_device \"pci_addr1,pci_addr2,pci_addr3,...\"" ++ echo "setup.sh reset_device \"pci_addr\"" ++ echo "setup.sh reset_device \"pci_addr1,pci_addr2,pci_addr3,...\"" ++ echo "setup.sh reset_all" ++ echo "" ++} ++ ++function configure_freebsd { ++ TMP=`mktemp` ++ ++ # NVMe ++ GREP_STR="class=0x010802" ++ ++ AWK_PROG="{if (count > 0) printf \",\"; printf \"%s:%s:%s\",\$2,\$3,\$4; count++}" ++ echo $AWK_PROG > $TMP ++ ++ BDFS=`pciconf -l | grep "${GREP_STR}" | awk -F: -f $TMP` ++ ++ kldunload nic_uio.ko || true ++ kenv hw.nic_uio.bdfs=$BDFS ++ kldload nic_uio.ko ++ rm $TMP ++ ++ kldunload contigmem.ko || true ++ kenv hw.contigmem.num_buffers=$((NRHUGE * 2 / 256)) ++ kenv hw.contigmem.buffer_size=$((256 * 1024 * 1024)) ++ kldload contigmem.ko ++} ++ ++function reset_freebsd { ++ kldunload contigmem.ko || true ++ kldunload nic_uio.ko || true ++} ++ ++function get_slot_id { ++ pciaddr=$1 ++ ++ return_msg=`lspci -vvv -xxx -s "$pciaddr" | grep -i "Slot:"` ++ slot_id=${return_msg##* } ++ ++ echo $slot_id ++} ++ ++function get_except_device_linux { ++ param=$1 ++ if [[ $param == except_device=* ]]; then ++ devstr=${param#*=} ++ OLD_IFS="$IFS" ++ IFS="," ++ expdev=($devstr) ++ IFS=$OLD_IFS ++ fi ++ if [ ${#expdev[@]} -ne 0 ]; then ++ echo ${expdev[@]} ++ fi ++} ++ ++function get_device_linux { ++ devstr=$1 ++ OLD_IFS="$IFS" ++ IFS="," ++ resetdev=($devstr) ++ IFS=$OLD_IFS ++ ++ if [ ${#resetdev[@]} -ne 0 ]; then ++ echo ${resetdev[@]} ++ fi ++} ++ ++: ${NRHUGE:=1024} ++ ++username=$1 ++mode=$2 ++ ++if [ "$username" = "reset" -o "$username" = "config" -o "$username" = "status" ]; then ++ mode="$username" ++ username="" ++fi ++ ++if [ "$username" = "reset_device" -o "$username" = "reset_all" -o "$username" = "help" ]; then ++ mode="$username" ++ username="" ++fi ++ ++if [ "$username" = "config_device" -o "$username" = "hugepage" ]; then ++ mode="$username" ++ username="" ++fi ++ ++if [ "$mode" == "" ]; then ++ mode="config" ++fi ++ ++if [ "$username" = "" ]; then ++ username="$SUDO_USER" ++ if [ "$username" = "" ]; then ++ username=`logname 2>/dev/null` || true ++ fi ++fi ++ ++if [ "$mode" == "config" ]; then ++ paramcnt=$# ++ if [ $paramcnt -eq 2 ]; then ++ paramstr=$2 ++ exceptdev=`get_except_device_linux $paramstr` ++ fi ++fi ++ ++if [ "$mode" == "reset_device" ]; then ++ paramcnt=$# ++ if [ $paramcnt -eq 2 ]; then ++ paramstr=$2 ++ resetdev=`get_device_linux $paramstr` ++ fi ++fi ++ ++if [ "$mode" == "config_device" ]; then ++ paramcnt=$# ++ if [ $paramcnt -eq 2 ]; then ++ paramstr=$2 ++ configdev=`get_device_linux $paramstr` ++ fi ++fi ++ ++if [ `uname` = Linux ]; then ++ if [ "$mode" == "config" ]; then ++ configure_linux $exceptdev ++ elif [ "$mode" == "reset" ]; then ++ reset_linux ++ elif [ "$mode" == "status" ]; then ++ status_linux ++ elif [ "$mode" == "reset_device" ]; then ++ reset_device_linux $resetdev ++ elif [ "$mode" == "reset_all" ]; then ++ reset_all_linux ++ elif [ "$mode" == "help" ]; then ++ help_linux ++ elif [ "$mode" == "config_device" ]; then ++ config_linux_device $configdev ++ elif [ "$mode" == "hugepage" ]; then ++ config_linux_hugepage ++ fi ++else ++ if [ "$mode" == "config" ]; then ++ configure_freebsd ++ elif [ "$mode" == "reset" ]; then ++ reset_freebsd ++ fi ++fi +-- +2.33.0 + diff --git a/0022-use-spdk_nvme_ns_cmd_dataset_management-and-delete-s.patch b/0022-use-spdk_nvme_ns_cmd_dataset_management-and-delete-s.patch new file mode 100644 index 0000000..9df73bb --- /dev/null +++ b/0022-use-spdk_nvme_ns_cmd_dataset_management-and-delete-s.patch @@ -0,0 +1,122 @@ +From 1447fa25369f107192be8fa9e5f21ec78f19dcf1 Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Mon, 1 Mar 2021 09:20:10 +0800 +Subject: [PATCH 22/27] use spdk_nvme_ns_cmd_dataset_management and delete + spdk_nvme_ns_cmd_unmap_blocks + +Signed-off-by: sunshihao520 +--- + include/spdk/nvme.h | 33 ----------------------------- + lib/nvme/nvme_ns_cmd.c | 35 ------------------------------- + module/bdev/nvme/bdev_nvme_self.c | 8 +++---- + 3 files changed, 4 insertions(+), 72 deletions(-) + +diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h +index 6393db3..9acfb89 100644 +--- a/include/spdk/nvme.h ++++ b/include/spdk/nvme.h +@@ -3549,39 +3549,6 @@ bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns); + bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns); + uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair); + +-/** +- * Submit a data set management request to the specified NVMe namespace. Data set +- * management operations are designed to optimize interaction with the block +- * translation layer inside the device. The most common type of operation is +- * deallocate, which is often referred to as TRIM or UNMAP. +- * +- * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). +- * The user must ensure that only one thread submits I/O on a given qpair at any +- * given time. +- * +- * This is a convenience wrapper that will automatically allocate and construct +- * the correct data buffers. Therefore, ranges does not need to be allocated from +- * pinned memory and can be placed on the stack. If a higher performance, zero-copy +- * version of DSM is required, simply build and submit a raw command using +- * spdk_nvme_ctrlr_cmd_io_raw(). +- * +- * \param ns NVMe namespace to submit the DSM request +- * \param type A bit field constructed from \ref spdk_nvme_dsm_attribute. +- * \param qpair I/O queue pair to submit the request +- * \param ranges An array of \ref spdk_nvme_dsm_range elements describing the LBAs +- * to operate on. +- * \param num_ranges The number of elements in the ranges array. +- * \param cb_fn Callback function to invoke when the I/O is completed +- * \param cb_arg Argument to pass to the callback function +- * +- * \return 0 if successfully submitted, negated POSIX errno values otherwise. +- */ +-int spdk_nvme_ns_cmd_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, +- uint32_t type, +- const struct spdk_nvme_dsm_range *ranges, +- uint16_t num_ranges, +- spdk_nvme_cmd_cb cb_fn, +- void *cb_arg); + /** + * \brief Submits a write I/O to the specified NVMe namespace. + * +diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c +index 37dcdc2..9b67b8e 100644 +--- a/lib/nvme/nvme_ns_cmd.c ++++ b/lib/nvme/nvme_ns_cmd.c +@@ -1221,38 +1221,3 @@ spdk_nvme_ns_cmd_reservation_report(struct spdk_nvme_ns *ns, + + return nvme_qpair_submit_request(qpair, req); + } +- +-#ifdef SPDK_CONFIG_APP_RW +-int +-spdk_nvme_ns_cmd_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, +- uint32_t type, +- const struct spdk_nvme_dsm_range *ranges, uint16_t num_ranges, +- spdk_nvme_cmd_cb cb_fn, void *cb_arg) +-{ +- struct nvme_request *req = NULL; +- struct spdk_nvme_cmd *cmd = NULL; +- struct nvme_payload payload; +- +- if (ranges == NULL) { +- return -EINVAL; +- } +- +- payload = NVME_PAYLOAD_CONTIG((void *)ranges, NULL); +- +- req = nvme_allocate_request(qpair, &payload, num_ranges * sizeof(struct spdk_nvme_dsm_range), +- 0, cb_fn, cb_arg); +- if (req == NULL) { +- return -ENOMEM; +- } +- +- req->user_cb_arg = cb_arg; +- +- cmd = &req->cmd; +- cmd->opc = SPDK_NVME_OPC_DATASET_MANAGEMENT; +- cmd->nsid = ns->id; +- +- cmd->cdw10 = num_ranges - 1; +- cmd->cdw11 = type; +- +- return nvme_qpair_submit_request(qpair, req); +-} +diff --git a/module/bdev/nvme/bdev_nvme_self.c b/module/bdev/nvme/bdev_nvme_self.c +index 7371ecb..1419b1f 100644 +--- a/module/bdev/nvme/bdev_nvme_self.c ++++ b/module/bdev/nvme/bdev_nvme_self.c +@@ -565,10 +565,10 @@ bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void + } + + spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); +- return spdk_nvme_ns_cmd_unmap_blocks(nbdev->nvme_ns->ns, nvme_ch->qpair, +- SPDK_NVME_DSM_ATTR_DEALLOCATE, +- unmap_d, unmap_count, +- bdev_nvme_queued_done, driver_ctx); ++ return spdk_nvme_ns_cmd_dataset_management(nbdev->nvme_ns->ns, nvme_ch->qpair, ++ SPDK_NVME_DSM_ATTR_DEALLOCATE, ++ unmap_d, unmap_count, ++ bdev_nvme_queued_done, driver_ctx); + } + + void +-- +2.33.0 + diff --git a/0023-spdk-add-nvme-support-for-HSAK.patch b/0023-spdk-add-nvme-support-for-HSAK.patch new file mode 100644 index 0000000..8a7ce37 --- /dev/null +++ b/0023-spdk-add-nvme-support-for-HSAK.patch @@ -0,0 +1,1651 @@ +From fa58cf908e76613a4c9ea84bc96c83bd92354822 Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Mon, 1 Mar 2021 10:59:02 +0800 +Subject: [PATCH 23/27] spdk: add nvme support for HSAK + +Signed-off-by: sunshihao +--- + configure | 2 +- + include/spdk/bdev.h | 7 +- + include/spdk/bdev_module.h | 4 +- + include/spdk/nvme.h | 51 +-------- + include/spdk/thread.h | 2 + + lib/bdev/bdev.c | 53 +++++++--- + lib/bdev/bdev_self.c | 36 +++---- + lib/env_dpdk/env.mk | 2 +- + lib/env_dpdk/init.c | 8 +- + lib/event/reactor.c | 13 +-- + lib/nvme/nvme.c | 35 +++---- + lib/nvme/nvme_ctrlr.c | 33 ++++-- + lib/nvme/nvme_ctrlr_cmd.c | 33 +++--- + lib/nvme/nvme_ctrlr_self.c | 3 + + lib/nvme/nvme_internal.h | 5 +- + lib/nvme/nvme_ns_cmd.c | 2 +- + lib/nvme/nvme_pcie.c | 6 +- + lib/nvme/nvme_pcie_common.c | 12 ++- + lib/nvme/nvme_qpair.c | 13 +-- + lib/nvme/nvme_uevent.h | 2 - + lib/thread/thread.c | 21 ++-- + mk/spdk.common.mk | 2 +- + module/bdev/nvme/bdev_nvme.c | 50 +++++---- + module/bdev/nvme/bdev_nvme_self.c | 167 ++++++++++++------------------ + module/bdev/nvme/bdev_nvme_self.h | 2 +- + module/bdev/nvme/common.h | 1 + + scripts/setup_self.sh | 11 +- + 27 files changed, 279 insertions(+), 297 deletions(-) + +diff --git a/configure b/configure +index 964322e..01db27a 100644 +--- a/configure ++++ b/configure +@@ -1,4 +1,4 @@ +-#!/usr/bin/env bash ++#!/bin/bash + + set -e + +diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h +index d0284d9..5f30340 100644 +--- a/include/spdk/bdev.h ++++ b/include/spdk/bdev.h +@@ -862,7 +862,7 @@ int spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *c + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ + int spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, +- void *buf, void *md, int64_t offset_blocks, uint64_t num_blocks, ++ void *buf, void *md, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + + /** +@@ -1410,12 +1410,13 @@ int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + * negated errno on failure, in which case the callback will not be called. + */ + int spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, +- void *unmap_d, uint16_t unmap_count, ++ void *unmap_d, uint32_t unmap_count, + spdk_bdev_io_completion_cb cb, void *cb_arg); + + void *spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch); + void *spdk_bdev_io_get_pool(size_t nbytes); +-bool spdk_bdev_have_io_in_channel(struct spdk_io_channel *bdevIoCh); ++bool spdk_bdev_have_io_in_channel(struct spdk_io_channel *io_ch); ++int spdk_bdev_get_channel_state(struct spdk_io_channel *io_ch); + #endif + + /** +diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h +index 55dc980..e901e14 100644 +--- a/include/spdk/bdev_module.h ++++ b/include/spdk/bdev_module.h +@@ -267,8 +267,10 @@ enum reqLocation_E { + }; + + void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr); +-void spdk_bdev_fail_ctrlr(const char *traddr); ++void spdk_bdev_fail_ctrlr(void *cb_ctx, void *ctrlr); ++struct spdk_nvme_ctrlr *spdk_nvme_bdev_ctrlr_get(char *trid); + void *nvme_channel_get_group(void *io_ch); ++int nvme_channel_get_state(void *io_ch); + #endif + + /** bdev I/O completion status */ +diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h +index 9acfb89..fb2e5f9 100644 +--- a/include/spdk/nvme.h ++++ b/include/spdk/nvme.h +@@ -3495,7 +3495,7 @@ bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_streams_supported(struct spdk_nvme_ctrlr *ctrlr); +-int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, ++int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, + void *payload); + int32_t spdk_nvme_ctrlr_enable_streams(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid); + int32_t spdk_nvme_ctrlr_ret_streams_param(struct spdk_nvme_ctrlr *ctrlr, void *payload); +@@ -3523,58 +3523,13 @@ int spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, const char * + const char **names, size_t *count, const char *hostnqn); + + int spdk_nvme_detach_ublock(struct spdk_nvme_ctrlr *ctrlr); +-void spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr); +- +-#define SPDK_NVME_UEVENT_SUBSYSTEM_UIO 1 +-#define SPDK_NVME_UEVENT_SUBSYSTEM_NVME 2 +- +-enum spdk_nvme_uevent_action { +- SPDK_NVME_UEVENT_ADD = 0, +- SPDK_NVME_UEVENT_REMOVE = 1, +-}; +- +-struct spdk_uevent { +- /* remove or add */ +- enum spdk_nvme_uevent_action action; +- int subsystem; +- /* pci address of device */ +- char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1]; +-}; +- +-int nvme_uevent_connect(void); +-int nvme_get_uevent(int fd, struct spdk_uevent *uevent); +-int nvme_get_uevent_block(int fd, struct spdk_uevent *uevent); + int32_t spdk_rebind_driver(char *pci_addr, char *driver_name); ++void spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr); ++void spdk_nvme_ctrlr_fail_hotplug(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns); + bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns); + uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair); + +-/** +- * \brief Submits a write I/O to the specified NVMe namespace. +- * +- * \param ns NVMe namespace to submit the write I/O +- * \param qpair I/O queue pair to submit the request +- * \param lba starting LBA to write the data +- * \param lba_count length (in sectors) for the write operation +- * \param streamId The stream id for write I/O +- * \param cb_fn callback function to invoke when the I/O is completed +- * \param cb_arg argument to pass to the callback function +- * \param io_flags set flags, defined in nvme_spec.h, for this I/O +- * \param reset_sgl_fn callback function to reset scattered payload +- * \param next_sge_fn callback function to iterate each scattered +- * payload memory segment +- * +- * \return 0 if successfully submitted, ENOMEM if an nvme_request +- * structure cannot be allocated for the I/O request +- * +- * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). +- * The user must ensure that only one thread submits I/O on a given qpair at any given time. +- */ +-int spdk_nvme_ns_cmd_writev_stream(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, +- uint64_t lba, uint32_t lba_count, uint16_t streamId, +- spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, +- spdk_nvme_req_reset_sgl_cb reset_sgl_fn, +- spdk_nvme_req_next_sge_cb next_sge_fn); + #endif + + /* +diff --git a/include/spdk/thread.h b/include/spdk/thread.h +index 7c52433..9fea1bd 100644 +--- a/include/spdk/thread.h ++++ b/include/spdk/thread.h +@@ -73,6 +73,8 @@ void spdk_reactors_use(bool useOrNot); + bool spdk_get_reactor_type(void); + + void spdk_set_thread_exited(struct spdk_thread *thread); ++ ++uint32_t spdk_get_channel_ref(void *io_ch); + #endif + + /** +diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c +index 1d8ce99..6daaef9 100644 +--- a/lib/bdev/bdev.c ++++ b/lib/bdev/bdev.c +@@ -1546,7 +1546,11 @@ void + spdk_bdev_module_finish_done(void) + { + if (spdk_get_thread() != g_fini_thread) { ++#ifndef SPDK_CONFIG_APP_RW + spdk_thread_send_msg(g_fini_thread, bdev_module_finish_iter, NULL); ++#else ++ bdev_module_finish_iter(NULL); ++#endif + } else { + bdev_module_finish_iter(NULL); + } +@@ -3096,10 +3100,12 @@ bdev_channel_destroy(void *io_device, void *ctx_buf) + SPDK_DEBUGLOG(bdev, "Destroying channel %p for bdev %s on thread %p\n", ch, ch->bdev->name, + spdk_get_thread()); + ++#ifndef SPDK_CONFIG_APP_RW + /* This channel is going away, so add its statistics into the bdev so that they don't get lost. */ + pthread_mutex_lock(&ch->bdev->internal.mutex); + bdev_io_stat_add(&ch->bdev->internal.stat, &ch->stat); + pthread_mutex_unlock(&ch->bdev->internal.mutex); ++#endif + + mgmt_ch = shared_resource->mgmt_ch; + +@@ -3592,7 +3598,7 @@ bdev_build_contig_io(uint8_t type, void *buf, void *md_buf, uint64_t offset_bloc + + static int + bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf, +- void *md_buf, int64_t offset_blocks, uint64_t num_blocks, ++ void *md_buf, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) + { + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); +@@ -3656,20 +3662,22 @@ spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + + int + spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, +- void *buf, void *md_buf, int64_t offset_blocks, uint64_t num_blocks, ++ void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) + { ++#ifndef SPDK_CONFIG_APP_RW + struct iovec iov = { + .iov_base = buf, + }; +-#ifndef SPDK_CONFIG_APP_RW ++ + if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } +-#endif ++ + if (!_bdev_io_check_md_buf(&iov, md_buf)) { + return -EINVAL; + } ++#endif + + return bdev_read_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks, + cb, cb_arg); +@@ -3835,19 +3843,19 @@ spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann + void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) + { ++#ifndef SPDK_CONFIG_APP_RW + struct iovec iov = { + .iov_base = buf, + }; + +-#ifndef SPDK_CONFIG_APP_RW + if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } +-#endif ++ + if (!_bdev_io_check_md_buf(&iov, md_buf)) { + return -EINVAL; + } +- ++#endif + return bdev_write_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks, + cb, cb_arg); + } +@@ -5158,12 +5166,14 @@ bdev_io_complete(void *ctx) + bdev_io->internal.io_submit_ch = NULL; + } + ++ if (bdev_io->internal.in_submit_request) { ++ bdev_io->internal.in_submit_request = false; ++ } + /* + * Defer completion to avoid potential infinite recursion if the + * user's completion callback issues a new I/O. + */ +- spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), +- bdev_io_complete, bdev_io); ++ bdev_io_complete(bdev_io); + return; + } + +@@ -5214,7 +5224,7 @@ bdev_io_complete(void *ctx) + break; + } + #ifdef SPDK_CONFIG_APP_RW +- bdev_io_stat_update(bdev_io, tsc, &bdev_io->internal.ch->stat); ++ bdev_io_stat_update(bdev_io, tsc_diff, &bdev_io->internal.ch->stat); + #endif + } + +@@ -5647,10 +5657,12 @@ bdev_start(struct spdk_bdev *bdev) + SPDK_DEBUGLOG(bdev, "Inserting bdev %s into list\n", bdev->name); + TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, internal.link); + ++#ifndef SPDK_CONFIG_APP_RW + /* Examine configuration before initializing I/O */ + bdev_examine(bdev); + + spdk_bdev_wait_for_examine(bdev_start_finished, bdev); ++#endif + } + + int +@@ -5729,7 +5741,14 @@ bdev_unregister_unsafe(struct spdk_bdev *bdev) + */ + desc->refs++; + pthread_mutex_unlock(&desc->mutex); +- spdk_thread_send_msg(desc->thread, _remove_notify, desc); ++ } ++ ++ TAILQ_FOREACH_SAFE(desc, &bdev->internal.open_descs, link, tmp) { ++ pthread_mutex_unlock(&bdev->internal.mutex); ++ pthread_mutex_unlock(&g_bdev_mgr.mutex); ++ _remove_notify(desc); ++ pthread_mutex_lock(&bdev->internal.mutex); ++ pthread_mutex_lock(&g_bdev_mgr.mutex); + } + + /* If there are no descriptors, proceed removing the bdev */ +@@ -5794,7 +5813,6 @@ static int + bdev_start_qos(struct spdk_bdev *bdev) + { + struct set_qos_limit_ctx *ctx; +- + /* Enable QoS */ + if (bdev->internal.qos && bdev->internal.qos->thread == NULL) { + ctx = calloc(1, sizeof(*ctx)); +@@ -6010,7 +6028,6 @@ spdk_bdev_close(struct spdk_bdev_desc *desc) + if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->internal.open_descs)) { + rc = bdev_unregister_unsafe(bdev); + pthread_mutex_unlock(&bdev->internal.mutex); +- + if (rc == 0) { + bdev_fini(bdev); + } +@@ -7054,6 +7071,16 @@ spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch) + return nvme_channel_get_group(nvme_io_ch); + } + ++int ++spdk_bdev_get_channel_state(struct spdk_io_channel *io_ch) ++{ ++ struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(io_ch); ++ struct spdk_io_channel *under_io_ch = ch->channel; ++ void *nvme_io_ch = spdk_io_channel_get_ctx(under_io_ch); ++ ++ return nvme_channel_get_state(nvme_io_ch); ++} ++ + bool + spdk_bdev_have_io_in_channel(struct spdk_io_channel *io_ch) + { +diff --git a/lib/bdev/bdev_self.c b/lib/bdev/bdev_self.c +index 7050c30..c5b92a3 100644 +--- a/lib/bdev/bdev_self.c ++++ b/lib/bdev/bdev_self.c +@@ -30,10 +30,8 @@ struct libstorage_bdev_io_stat *g_io_stat_map; + int32_t g_libstorage_iostat = 0; + int32_t g_polltime_threshold = 0; + +-void +-spdk_bdev_init_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, +- struct spdk_io_channel *io_ch, +- struct spdk_bdev_io_stat *stat) ++void spdk_bdev_init_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, ++ struct spdk_io_channel *io_ch, struct spdk_bdev_io_stat *stat) + { + int i = 0; + bool find = false; +@@ -95,9 +93,8 @@ spdk_bdev_init_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, + stat->interval_tsc = spdk_get_ticks_hz() / 10; + } + +-void +-spdk_bdev_destroy_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, +- struct spdk_io_channel *io_ch) ++void spdk_bdev_destroy_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, ++ struct spdk_io_channel *io_ch) + { + int i = 0; + uint16_t channel_id; +@@ -127,10 +124,9 @@ spdk_bdev_destroy_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, + } + } + +-int +-spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, +- void *unmap_d, uint16_t unmap_count, +- spdk_bdev_io_completion_cb cb, void *cb_arg) ++int spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, ++ void *unmap_d, uint32_t unmap_count, ++ spdk_bdev_io_completion_cb cb, void *cb_arg) + { + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); + struct spdk_bdev_io *bdev_io = NULL; +@@ -152,38 +148,38 @@ spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel + return 0; + } + +-void +-bdev_io_stat_update(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat) ++void bdev_io_stat_update(struct spdk_bdev_io *bdev_io, uint64_t tsc_diff, ++ struct spdk_bdev_io_stat *stat) + { + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ_NVME: + stat->bytes_read += bdev_io->u.contig.nbytes + bdev_io->u.contig.md_len; + stat->num_read_ops++; +- stat->read_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ stat->read_latency_ticks += tsc_diff; + break; + case SPDK_BDEV_IO_TYPE_WRITE_NVME: + stat->bytes_written += bdev_io->u.contig.nbytes + bdev_io->u.contig.md_len; + stat->num_write_ops++; +- stat->write_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ stat->write_latency_ticks += tsc_diff; + break; + case SPDK_BDEV_IO_TYPE_READV_NVME: + stat->bytes_read += bdev_io->u.bdev.nbytes; + stat->num_read_ops++; +- stat->read_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ stat->read_latency_ticks += tsc_diff; + break; + case SPDK_BDEV_IO_TYPE_WRITEV_NVME: + stat->bytes_written += bdev_io->u.bdev.nbytes; + stat->num_write_ops++; +- stat->write_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ stat->write_latency_ticks += tsc_diff; + break; + default: + break; + } + } + +-void +-bdev_update_iostat_map(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat, +- struct spdk_io_channel *channel, uint64_t io_outstanding) ++void bdev_update_iostat_map(struct spdk_bdev_io *bdev_io, uint64_t tsc, ++ struct spdk_bdev_io_stat *stat, ++ struct spdk_io_channel *channel, uint64_t io_outstanding) + { + uint64_t num_poll_timeout; + +diff --git a/lib/env_dpdk/env.mk b/lib/env_dpdk/env.mk +index 292dd91..1e4f63c 100644 +--- a/lib/env_dpdk/env.mk ++++ b/lib/env_dpdk/env.mk +@@ -120,7 +120,7 @@ endif + endif + + DPDK_SHARED_LIB = $(DPDK_LIB_LIST:%=$(DPDK_ABS_DIR)/lib/lib%.so) +-DPDK_STATIC_LIB = $(DPDK_LIB_LIST:%=$(DPDK_ABS_DIR)/lib/lib%.a) ++DPDK_STATIC_LIB = $(DPDK_LIB_LIST:%=/usr/lib64/lib%.a) + DPDK_SHARED_LIB_LINKER_ARGS = $(call add_no_as_needed,$(DPDK_SHARED_LIB)) + DPDK_STATIC_LIB_LINKER_ARGS = $(call add_whole_archive,$(DPDK_STATIC_LIB)) + +diff --git a/lib/env_dpdk/init.c b/lib/env_dpdk/init.c +index 3bb713d..1c18a8b 100644 +--- a/lib/env_dpdk/init.c ++++ b/lib/env_dpdk/init.c +@@ -561,12 +561,12 @@ spdk_env_init(const struct spdk_env_opts *opts) + return -EINVAL; + } + +- SPDK_PRINTF("Starting %s / %s initialization...\n", SPDK_VERSION_STRING, rte_version()); +- SPDK_PRINTF("[ DPDK EAL parameters: "); ++ printf("Starting %s / %s initialization...\n", SPDK_VERSION_STRING, rte_version()); ++ printf("[ DPDK EAL parameters: "); + for (i = 0; i < g_eal_cmdline_argcount; i++) { +- SPDK_PRINTF("%s ", g_eal_cmdline[i]); ++ printf("%s ", g_eal_cmdline[i]); + } +- SPDK_PRINTF("]\n"); ++ printf("]\n"); + + /* DPDK rearranges the array we pass to it, so make a copy + * before passing so we can still free the individual strings +diff --git a/lib/event/reactor.c b/lib/event/reactor.c +index 3eb8799..9d92875 100644 +--- a/lib/event/reactor.c ++++ b/lib/event/reactor.c +@@ -55,6 +55,7 @@ + #endif + + #define SPDK_EVENT_BATCH_SIZE 8 ++#define SPDK_EVENT_MAX_BATCH_SIZE 32 + + #ifdef SPDK_CONFIG_APP_RW + struct spdk_iodev_thread_info lcore_thread_info[RTE_MAX_LCORE]; +@@ -262,7 +263,7 @@ spdk_reactors_init(void) + sp = spdk_conf_find_section(NULL, "Reactor"); + if (sp != 0) { + g_reactor_batch_size = spdk_conf_section_get_intval(sp, "BatchSize"); +- if (g_reactor_batch_size <= 0 || g_reactor_batch_size > SPDK_EVENT_BATCH_SIZE) { ++ if (g_reactor_batch_size <= 0 || g_reactor_batch_size > SPDK_EVENT_MAX_BATCH_SIZE) { + g_reactor_batch_size = SPDK_EVENT_BATCH_SIZE; + } + syslog(LOG_INFO, "BatchSize is set to %d\n", g_reactor_batch_size); +@@ -550,7 +551,7 @@ static inline uint32_t + event_queue_run_batch(struct spdk_reactor *reactor) + { + unsigned count, i; +- void *events[SPDK_EVENT_BATCH_SIZE]; ++ void *events[SPDK_EVENT_MAX_BATCH_SIZE]; + struct spdk_thread *thread; + struct spdk_lw_thread *lw_thread; + +@@ -969,9 +970,6 @@ reactor_run(void *arg) + } + + if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) { +-#ifdef SPDK_CONFIG_APP_RW +- lcore_thread_info[reactor->lcore].state = SPDK_THREAD_STATE_EXITED; +-#endif + break; + } + } +@@ -1003,7 +1001,10 @@ reactor_run(void *arg) + } + } + } +- ++#ifdef SPDK_CONFIG_APP_RW ++ /* When all thread in reactor is finish, inform libstorage to release resource. */ ++ lcore_thread_info[reactor->lcore].state = SPDK_THREAD_STATE_EXITED; ++#endif + return 0; + } + +diff --git a/lib/nvme/nvme.c b/lib/nvme/nvme.c +index fca2f41..b0cc321 100644 +--- a/lib/nvme/nvme.c ++++ b/lib/nvme/nvme.c +@@ -39,6 +39,7 @@ + #include "spdk/nvme.h" + #include "spdk_internal/debug.h" + #include "spdk/bdev_module.h" ++#include + + #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver" + +@@ -100,6 +101,9 @@ static void admin_timer_timeout(void) + + static void *nvme_ctrlr_run_admin_timer(void *arg) + { ++#if defined(__linux__) ++ prctl(PR_SET_NAME, "nvme-admin", 0, 0, 0); ++#endif + sleep(20); + + while (1) { +@@ -561,6 +565,11 @@ nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, + struct spdk_nvme_ctrlr_process *active_proc, + uint64_t now_tick) + { ++ if (req == NULL) { ++ SPDK_WARNLOG("Get invalid req from tracker!\n"); ++ return 1; ++ } ++ + struct spdk_nvme_qpair *qpair = req->qpair; + struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; + +@@ -599,26 +608,16 @@ nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, + nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, + cid); + #else +- if (!nvme_qpair_is_admin_queue(qpair) && (req->cmd.opc == SPDK_NVME_OPC_WRITE || +- req->cmd.opc == SPDK_NVME_OPC_READ)) { +- SPDK_WARNLOG("IO timeout, OP[%u] NS[%u] LBA[%lu].\n", req->cmd.opc, req->cmd.nsid, +- *(uint64_t *)&req->cmd.cdw10); +- } else { +- SPDK_WARNLOG("%s Command[%u] timeout.\n", nvme_qpair_is_admin_queue(qpair) ? +- "Admin" : "IO", req->cmd.opc); +- } +- if (req->timed_out) { +- /* Reset the controller if the command was already timed out. */ +- SPDK_WARNLOG("IO Command[%u] timeout again, reset controller.\n", cid); +- active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, NULL, cid); +- } else { ++ if (!req->timed_out) { + req->timed_out = true; +- active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, +- nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, +- cid); +- /* Timing again. Reset the controller if it times out again */ +- req->submit_tick = spdk_get_ticks(); ++ SPDK_WARNLOG("%s Command[%u] timeout. ctrlr=%p qpair=%p cid=%u\n", ++ nvme_qpair_is_admin_queue(qpair) ? "Admin" : "IO", ++ req->cmd.opc, ctrlr, qpair, cid); + } ++ active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, ++ nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, cid); ++ /* Update submit tick to reduce timeout num. */ ++ req->submit_tick = spdk_get_ticks(); + #endif + return 0; + } +diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c +index fa28f07..27468a7 100644 +--- a/lib/nvme/nvme_ctrlr.c ++++ b/lib/nvme/nvme_ctrlr.c +@@ -34,6 +34,7 @@ + #include "spdk/stdinc.h" + + #include "nvme_internal.h" ++#include "nvme_pcie_internal.h" + #include "nvme_io_msg.h" + + #include "spdk/env.h" +@@ -512,7 +513,6 @@ spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) + } + + ctrlr = qpair->ctrlr; +- + if (qpair->in_completion_context) { + /* + * There are many cases where it is convenient to delete an io qpair in the context +@@ -543,12 +543,16 @@ spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) + * exits unexpectedly. In that case, we must not try to abort any reqs associated + * with that qpair, since the callbacks will also be foreign to this process. + */ ++ ++ if (ctrlr == NULL) { ++ return 0; ++ } ++ + if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) { + nvme_qpair_abort_reqs(qpair, 1); + } + + nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); +- + nvme_ctrlr_proc_remove_io_qpair(qpair); + + TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); +@@ -907,6 +911,16 @@ spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr) + nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); + } + ++#ifdef SPDK_CONFIG_APP_RW ++void ++spdk_nvme_ctrlr_fail_hotplug(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); ++ nvme_ctrlr_fail(ctrlr, true); ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++} ++#endif ++ + static void + nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr, + struct nvme_ctrlr_detach_ctx *ctx) +@@ -1102,7 +1116,6 @@ nvme_ctrlr_disable(struct spdk_nvme_ctrlr *ctrlr) + return 0; + } + +-#ifdef DEBUG + static const char * + nvme_ctrlr_state_string(enum nvme_ctrlr_state state) + { +@@ -1182,7 +1195,6 @@ nvme_ctrlr_state_string(enum nvme_ctrlr_state state) + } + return "unknown"; + }; +-#endif /* DEBUG */ + + static void + nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, +@@ -1209,12 +1221,12 @@ nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, + } + + ctrlr->state_timeout_tsc = timeout_in_ticks + now_ticks; +- SPDK_DEBUGLOG(nvme, "setting state to %s (timeout %" PRIu64 " ms)\n", +- nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms); ++ SPDK_NOTICELOG("setting state to %s (timeout %" PRIu64 " ms)\n", ++ nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms); + return; + inf: +- SPDK_DEBUGLOG(nvme, "setting state to %s (no timeout)\n", +- nvme_ctrlr_state_string(ctrlr->state)); ++ SPDK_NOTICELOG("setting state to %s (no timeout)\n", ++ nvme_ctrlr_state_string(ctrlr->state)); + ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE; + } + +@@ -2729,7 +2741,6 @@ struct spdk_nvme_ctrlr_process * + nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid) + { + struct spdk_nvme_ctrlr_process *active_proc; +- + TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { + if (active_proc->pid == pid) { + return active_proc; +@@ -4135,6 +4146,10 @@ spdk_nvme_ctrlr_alloc_qid(struct spdk_nvme_ctrlr *ctrlr) + void + spdk_nvme_ctrlr_free_qid(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid) + { ++ if (ctrlr->free_io_qids == NULL) { ++ return; ++ } ++ + assert(qid <= ctrlr->opts.num_io_queues); + + nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); +diff --git a/lib/nvme/nvme_ctrlr_cmd.c b/lib/nvme/nvme_ctrlr_cmd.c +index d335bc6..7a7e625 100644 +--- a/lib/nvme/nvme_ctrlr_cmd.c ++++ b/lib/nvme/nvme_ctrlr_cmd.c +@@ -581,16 +581,18 @@ nvme_ctrlr_retry_queued_abort(struct spdk_nvme_ctrlr *ctrlr) + rc = nvme_ctrlr_submit_admin_request(ctrlr, next); + if (rc < 0) { + SPDK_ERRLOG("Failed to submit queued abort.\n"); +-#ifndef SPDK_CONFIG_APP_RW ++#ifdef SPDK_CONFIG_APP_RW ++ /* If submit abort fail, free all req in queued aborts */ ++ ctrlr->outstanding_aborts--; ++ nvme_free_request(next); ++ goto free; ++#else + memset(&next->cpl, 0, sizeof(next->cpl)); + next->cpl.status.sct = SPDK_NVME_SCT_GENERIC; + next->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + next->cpl.status.dnr = 1; + nvme_complete_request(next->cb_fn, next->cb_arg, next->qpair, next, &next->cpl); + nvme_free_request(next); +-#else +- nvme_free_request(next); +- break; + #endif + } else { + /* If the first abort succeeds, stop iterating. */ +@@ -598,17 +600,13 @@ nvme_ctrlr_retry_queued_abort(struct spdk_nvme_ctrlr *ctrlr) + } + } + ++ return; + #ifdef SPDK_CONFIG_APP_RW +- nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); +- if (rc < 0) { +- /* If abort fail, free all of the queued abort requests */ +- STAILQ_FOREACH_SAFE(next, &ctrlr->queued_aborts, stailq, tmp) { +- STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); +- nvme_free_request(next); +- ctrlr->outstanding_aborts--; +- } ++free: ++ STAILQ_FOREACH_SAFE(next, &ctrlr->queued_aborts, stailq, tmp) { ++ STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); ++ nvme_free_request(next); + } +- nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); + #endif + } + +@@ -635,9 +633,10 @@ nvme_ctrlr_cmd_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl) + ctrlr = req->qpair->ctrlr; + + ctrlr->outstanding_aborts--; +- nvme_ctrlr_retry_queued_abort(ctrlr); +- ++ /* If abort is failed, just reset the ctrlr. */ + req->user_cb_fn(req->user_cb_arg, cpl); ++ ++ nvme_ctrlr_retry_queued_abort(ctrlr); + } + + int +@@ -1006,7 +1005,9 @@ nvme_ctrlr_cmd_directive(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, + cmd->opc = opc_type; + cmd->nsid = nsid; + +- cmd->cdw10 = (payload_size >> 2) - 1; ++ if (payload_size != 0) { ++ cmd->cdw10 = (payload_size >> 2) - 1; ++ } + cmd->cdw11_bits.directive.doper = doper; + cmd->cdw11_bits.directive.dtype = dtype; + cmd->cdw11_bits.directive.dspec = dspec; +diff --git a/lib/nvme/nvme_ctrlr_self.c b/lib/nvme/nvme_ctrlr_self.c +index 4ac1925..8adabfc 100644 +--- a/lib/nvme/nvme_ctrlr_self.c ++++ b/lib/nvme/nvme_ctrlr_self.c +@@ -224,5 +224,8 @@ int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint1 + + uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair) + { ++ if (qpair == NULL) { ++ return -1; ++ } + return qpair->id; + } +diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h +index 6934f9f..31328f0 100644 +--- a/lib/nvme/nvme_internal.h ++++ b/lib/nvme/nvme_internal.h +@@ -457,6 +457,8 @@ struct spdk_nvme_qpair { + const struct spdk_nvme_transport *transport; + + uint8_t transport_failure_reason: 2; ++ ++ uint32_t disconnected_time; + }; + + struct spdk_nvme_poll_group { +@@ -1069,10 +1071,9 @@ typedef int (*spdk_nvme_parse_ana_log_page_cb)( + const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg); + int nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr, + spdk_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg); +- ++bool nvme_qpair_check_enabled(struct spdk_nvme_qpair *qpair); + #ifdef SPDK_CONFIG_APP_RW + void nvme_ctrlr_destruct_ublock(struct spdk_nvme_ctrlr *ctrlr); +-void nvme_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); + #endif + + static inline struct nvme_request * +diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c +index 9b67b8e..b8d9a90 100644 +--- a/lib/nvme/nvme_ns_cmd.c ++++ b/lib/nvme/nvme_ns_cmd.c +@@ -462,7 +462,7 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint32_t sector_size = _nvme_get_host_buffer_sector_size(ns, io_flags); + uint32_t sectors_per_max_io = ns->sectors_per_max_io; + uint32_t sectors_per_stripe = ns->sectors_per_stripe; +- int rc; ++ int rc = 0; + + req = nvme_allocate_request(qpair, payload, lba_count * sector_size, lba_count * ns->md_size, + cb_fn, cb_arg); +diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c +index 0e9e24d..1b4b958 100644 +--- a/lib/nvme/nvme_pcie.c ++++ b/lib/nvme/nvme_pcie.c +@@ -755,9 +755,7 @@ nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) + spdk_pci_device_unclaim(devhandle); + spdk_pci_device_detach(devhandle); + } +- + spdk_free(pctrlr); +- + return 0; + } + +@@ -1244,6 +1242,10 @@ nvme_pcie_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_reques + bool sgl_supported; + bool dword_aligned = true; + ++ if (!nvme_qpair_check_enabled(qpair)) { ++ return -EBUSY; ++ } ++ + if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { + nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); + } +diff --git a/lib/nvme/nvme_pcie_common.c b/lib/nvme/nvme_pcie_common.c +index 564f81b..1dc9c99 100644 +--- a/lib/nvme/nvme_pcie_common.c ++++ b/lib/nvme/nvme_pcie_common.c +@@ -840,7 +840,7 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_ + if (tr->req) { + nvme_pcie_qpair_complete_tracker(qpair, tr, cpl, true); + } else { +- SPDK_ERRLOG("cpl does not map to outstanding cmd\n"); ++ SPDK_NOTICELOG("cpl does not map to outstanding cmd\n"); + spdk_nvme_qpair_print_completion(qpair, cpl); + assert(0); + } +@@ -905,7 +905,6 @@ nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair) + nvme_qpair_deinit(qpair); + + spdk_free(pqpair); +- + return 0; + } + +@@ -952,8 +951,6 @@ nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ + struct nvme_completion_poll_status *status; + int rc; + +- assert(ctrlr != NULL); +- + if (ctrlr->is_removed) { + goto free; + } +@@ -1005,6 +1002,7 @@ free: + nvme_pcie_qpair_abort_trackers(qpair, 1); + } + ++ nvme_qpair_abort_reqs(qpair, 1); + nvme_pcie_qpair_destroy(qpair); + return 0; + } +@@ -1064,6 +1062,11 @@ nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group * + local_completions = spdk_nvme_qpair_process_completions(qpair, completions_per_qpair); + if (local_completions < 0) { + disconnected_qpair_cb(qpair, tgroup->group->ctx); ++ qpair->disconnected_time++; ++ if (qpair->disconnected_time > 50) { ++ qpair->poll_group->num_qpairs_to_delete++; ++ qpair->delete_after_completion_context = 1; ++ } + local_completions = 0; + } + total_completions += local_completions; +@@ -1078,7 +1081,6 @@ nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) + if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { + return -EBUSY; + } +- + free(tgroup); + + return 0; +diff --git a/lib/nvme/nvme_qpair.c b/lib/nvme/nvme_qpair.c +index 3aabd63..f11d070 100644 +--- a/lib/nvme/nvme_qpair.c ++++ b/lib/nvme/nvme_qpair.c +@@ -600,7 +600,7 @@ nvme_qpair_abort_queued_reqs(struct spdk_nvme_qpair *qpair, void *cmd_cb_arg) + return aborting; + } + +-static inline bool ++bool + nvme_qpair_check_enabled(struct spdk_nvme_qpair *qpair) + { + struct nvme_request *req; +@@ -612,8 +612,7 @@ nvme_qpair_check_enabled(struct spdk_nvme_qpair *qpair) + * from the old transport connection and encourage the application to retry them. We also need + * to submit any queued requests that built up while we were in the connected or enabling state. + */ +- if (nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTED && !qpair->ctrlr->is_resetting +- && !qpair->ctrlr->is_removed && !qpair->ctrlr->is_destructed) { ++ if (nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTED && !qpair->ctrlr->is_resetting) { + nvme_qpair_set_state(qpair, NVME_QPAIR_ENABLING); + /* + * PCIe is special, for fabrics transports, we can abort requests before disconnect during reset +@@ -857,13 +856,7 @@ _nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *r + rc = nvme_qpair_submit_request(qpair, child_req); + if (spdk_unlikely(rc != 0)) { + child_req_failed = true; +-#ifdef SPDK_CONFIG_APP_RW +- if (rc == -ENXIO && child_req->num_children == 0) { +- SPDK_WARNLOG("Warning: child req submit failed.\n"); +- nvme_request_remove_child(req, child_req); +- nvme_free_request(child_req); +- } +-#endif ++ SPDK_WARNLOG("Warning: child req submit failed.\n"); + } + } else { /* free remaining child_reqs since one child_req fails */ + nvme_request_remove_child(req, child_req); +diff --git a/lib/nvme/nvme_uevent.h b/lib/nvme/nvme_uevent.h +index 1921801..94f6710 100644 +--- a/lib/nvme/nvme_uevent.h ++++ b/lib/nvme/nvme_uevent.h +@@ -41,7 +41,6 @@ + #ifndef SPDK_UEVENT_H_ + #define SPDK_UEVENT_H_ + +-#ifndef SPDK_CONFIG_APP_RW + #define SPDK_NVME_UEVENT_SUBSYSTEM_UNRECOGNIZED 0 + #define SPDK_NVME_UEVENT_SUBSYSTEM_UIO 1 + #define SPDK_NVME_UEVENT_SUBSYSTEM_VFIO 2 +@@ -59,6 +58,5 @@ struct spdk_uevent { + + int nvme_uevent_connect(void); + int nvme_get_uevent(int fd, struct spdk_uevent *uevent); +-#endif + + #endif /* SPDK_UEVENT_H_ */ +diff --git a/lib/thread/thread.c b/lib/thread/thread.c +index 1ab822b..a3d342e 100644 +--- a/lib/thread/thread.c ++++ b/lib/thread/thread.c +@@ -57,6 +57,12 @@ void spdk_set_thread_exited(struct spdk_thread *thread) + { + thread->state = SPDK_THREAD_STATE_EXITED; + } ++ ++uint32_t spdk_get_channel_ref(void *io_ch) ++{ ++ struct spdk_io_channel *ch = io_ch; ++ return ch->ref; ++} + #endif + + static pthread_mutex_t g_devlist_mutex = PTHREAD_MUTEX_INITIALIZER; +@@ -1147,11 +1153,6 @@ spdk_poller_unregister(struct spdk_poller **ppoller) + struct spdk_thread *thread; + struct spdk_poller *poller; + +- if (!g_bRunReactor) { +- *ppoller = NULL; +- return; +- } +- + poller = *ppoller; + if (poller == NULL) { + return; +@@ -1413,12 +1414,10 @@ _finish_unregister(void *arg) + struct spdk_thread *thread; + + thread = spdk_get_thread(); +- assert(thread == dev->unregister_thread); + + SPDK_DEBUGLOG(thread, "Finishing unregistration of io_device %s (%p) on thread %s\n", + dev->name, dev->io_device, thread->name); + +- assert(thread->pending_unregister_count > 0); + thread->pending_unregister_count--; + + dev->unregister_cb(dev->io_device); +@@ -1468,7 +1467,6 @@ spdk_io_device_unregister(void *io_device, spdk_io_device_unregister_cb unregist + + if (!dev) { + SPDK_ERRLOG("io_device %p not found\n", io_device); +- assert(false); + pthread_mutex_unlock(&g_devlist_mutex); + return; + } +@@ -1545,8 +1543,8 @@ spdk_get_io_channel(void *io_device) + if (ch->dev == dev) { + ch->ref++; + +- SPDK_DEBUGLOG(thread, "Get io_channel %p for io_device %s (%p) on thread %s refcnt %u\n", +- ch, dev->name, dev->io_device, thread->name, ch->ref); ++ SPDK_NOTICELOG("Get io_channel %p for io_device %s (%p) on thread %s refcnt %u\n", ++ ch, dev->name, dev->io_device, thread->name, ch->ref); + + /* + * An I/O channel already exists for this device on this +@@ -1798,7 +1796,6 @@ spdk_for_each_channel(void *io_device, spdk_channel_msg fn, void *ctx, + #else + _call_channel(i); + #endif +- assert(rc == 0); + return; + } + } +@@ -1821,7 +1818,7 @@ spdk_for_each_channel_continue(struct spdk_io_channel_iter *i, int status) + struct spdk_io_channel *ch; + int rc __attribute__((unused)); + +- assert(i->cur_thread == spdk_get_thread()); ++ /* assert(i->cur_thread == spdk_get_thread()); */ + + i->status = status; + +diff --git a/mk/spdk.common.mk b/mk/spdk.common.mk +index 6bdc1dd..da214c8 100644 +--- a/mk/spdk.common.mk ++++ b/mk/spdk.common.mk +@@ -254,7 +254,7 @@ CXXFLAGS += $(COMMON_CFLAGS) + SYS_LIBS += -lrt + SYS_LIBS += -luuid + SYS_LIBS += -lcrypto +-SYS_LIBS += -lsecurec ++SYS_LIBS += -lboundscheck + + ifneq ($(CONFIG_NVME_CUSE)$(CONFIG_FUSE),nn) + SYS_LIBS += -lfuse3 +diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c +index 01d0238..d291646 100644 +--- a/module/bdev/nvme/bdev_nvme.c ++++ b/module/bdev/nvme/bdev_nvme.c +@@ -280,9 +280,6 @@ bdev_nvme_poll(void *arg) + } + } + +- if (!spdk_get_reactor_type()) { +- return num_completions; +- } + return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; + } + +@@ -315,7 +312,6 @@ bdev_nvme_destruct(void *ctx) + pthread_mutex_unlock(&g_bdev_nvme_mutex); + + nvme_bdev_ns_detach(nvme_ns); +- + free(nvme_disk->disk.name); + free(nvme_disk); + +@@ -930,6 +926,7 @@ bdev_nvme_create_cb(void *io_device, void *ctx_buf) + goto err_qpair; + } + ++ nvme_ch->state = 0; + return 0; + + err_qpair: +@@ -956,6 +953,7 @@ bdev_nvme_destroy_cb(void *io_device, void *ctx_buf) + spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair); + + spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_ch->group)); ++ nvme_ch->state = 1; + } + + static int +@@ -970,7 +968,7 @@ bdev_nvme_poll_group_create_cb(void *io_device, void *ctx_buf) + + group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us); + +- if (group->poller == NULL && spdk_get_reactor_type()) { ++ if (group->poller == NULL) { + spdk_nvme_poll_group_destroy(group->group); + return -1; + } +@@ -985,7 +983,7 @@ bdev_nvme_poll_group_destroy_cb(void *io_device, void *ctx_buf) + + spdk_poller_unregister(&group->poller); + if (spdk_nvme_poll_group_destroy(group->group)) { +- SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module."); ++ SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n"); + assert(false); + } + } +@@ -1320,8 +1318,9 @@ nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl) + struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = ctx; + + if (spdk_nvme_cpl_is_error(cpl)) { +- SPDK_WARNLOG("Abort failed. Resetting controller.\n"); +- _bdev_nvme_reset(nvme_bdev_ctrlr, NULL); ++ SPDK_WARNLOG("Abort failed, sc is %u, sct is %u. Resetting controller.\n", cpl->status.sc, ++ cpl->status.sct); ++ spdk_nvme_ctrlr_reset(nvme_bdev_ctrlr->ctrlr); + } + } + +@@ -1335,8 +1334,6 @@ timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, + + assert(nvme_bdev_ctrlr->ctrlr == ctrlr); + +- SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid); +- + /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O + * queue. (Note: qpair == NULL when there's an admin cmd timeout.) Otherwise we + * would submit another fabrics cmd on the admin queue to read CSTS and check for its +@@ -1359,8 +1356,7 @@ timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, + if (rc == 0) { + return; + } +- +- SPDK_ERRLOG("Unable to send abort. Resetting.\n"); ++ SPDK_ERRLOG("Unable to send abort. Resetting, rc is %d.\n", rc); + } + + /* FALLTHROUGH */ +@@ -2245,9 +2241,14 @@ bdev_nvme_library_fini(void) + continue; + } + nvme_bdev_ctrlr->destruct = true; +- ++#ifndef SPDK_CONFIG_APP_RW + spdk_thread_send_msg(nvme_bdev_ctrlr->thread, _nvme_bdev_ctrlr_destruct, + nvme_bdev_ctrlr); ++#else ++ pthread_mutex_unlock(&g_bdev_nvme_mutex); ++ _nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); ++ pthread_mutex_lock(&g_bdev_nvme_mutex); ++#endif + } + + g_bdev_nvme_module_finish = true; +@@ -3028,12 +3029,18 @@ bdev_nvme_get_ctrlr(struct spdk_bdev *bdev) + } + + #ifdef SPDK_CONFIG_APP_RW +-void * +-nvme_channel_get_group(void *io_ch) ++void *nvme_channel_get_group(void *io_ch) ++{ ++ struct nvme_io_channel *nvme_ch = io_ch; ++ return nvme_ch->group; ++} ++ ++int nvme_channel_get_state(void *io_ch) + { +- struct nvme_io_channel *nvme_io_ch = io_ch; +- return nvme_io_ch->group; ++ struct nvme_io_channel *nvme_ch = io_ch; ++ return nvme_ch->state; + } ++ + struct nvme_bdev_io *nvme_bdev_io_update_args(struct nvme_bdev_io *bio, struct iovec *iov, + int iovcnt) + { +@@ -3128,14 +3135,15 @@ int bdev_probe_ctrlr(void) + } + + retry_count = spdk_conf_section_get_intval(sp, "RetryCount"); +- if (retry_count >= 0) { +- g_opts.retry_count = retry_count; +- } +- if (retry_count > 255) { ++ if (retry_count < 0) { ++ retry_count = 4; ++ } else if (retry_count > 255) { + SPDK_WARNLOG("RetryCount:%d should not be greater than 255, set it to 255 this time\n", + retry_count); + retry_count = 255; + } ++ ++ g_opts.retry_count = retry_count; + syslog(LOG_INFO, "RetryCount is set to %d\n", retry_count); + + val = spdk_conf_section_get_val(sp, "TimeoutUsec"); +diff --git a/module/bdev/nvme/bdev_nvme_self.c b/module/bdev/nvme/bdev_nvme_self.c +index 1419b1f..dc480ff 100644 +--- a/module/bdev/nvme/bdev_nvme_self.c ++++ b/module/bdev/nvme/bdev_nvme_self.c +@@ -15,7 +15,6 @@ + #include "spdk/json.h" + #include "spdk/likely.h" + #include "spdk/bdev_module.h" +-#include "spdk/nvme_ocssd.h" + #include "spdk/nvme.h" + + #include "spdk_internal/bdev_stat.h" +@@ -23,11 +22,6 @@ + #include "common.h" + #include + +-enum data_direction { +- BDEV_DISK_READ = 0, +- BDEV_DISK_WRITE = 1 +-}; +- + void bdev_update_ch_timeout(struct nvme_bdev_poll_group *group) + { + uint64_t current_ticks = 0; +@@ -49,8 +43,7 @@ void bdev_update_ch_timeout(struct nvme_bdev_poll_group *group) + } + } + +-int +-_bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) ++int _bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) + { + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + +@@ -65,27 +58,27 @@ _bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io * + bdev_io->u.contig.offset_blocks); + return bdev_nvme_queue_cmd_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, + bdev_io->driver_ctx, bdev_io->u.contig.buf, +- bdev_io->u.contig.md_buf, BDEV_DISK_READ, ++ bdev_io->u.contig.md_buf, SPDK_BDEV_IO_TYPE_READ, + bdev_io->u.contig.num_blocks, bdev_io->u.contig.offset_blocks); + case SPDK_BDEV_IO_TYPE_WRITE_NVME: + SPDK_DEBUGLOG(bdev_nvme, "write %lu lbas with offset %#lx\n", bdev_io->u.contig.num_blocks, + bdev_io->u.contig.offset_blocks); + return bdev_nvme_queue_cmd_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, + bdev_io->driver_ctx, bdev_io->u.contig.buf, +- bdev_io->u.contig.md_buf, BDEV_DISK_WRITE, ++ bdev_io->u.contig.md_buf, SPDK_BDEV_IO_TYPE_WRITE, + bdev_io->u.contig.num_blocks, bdev_io->u.contig.offset_blocks); + case SPDK_BDEV_IO_TYPE_READV_NVME: + SPDK_DEBUGLOG(bdev_nvme, "readv %lu lbas with offset %#lx\n", bdev_io->u.bdev.num_blocks, + bdev_io->u.bdev.offset_blocks); + return bdev_nvme_queue_cmd_v_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, +- bdev_io->driver_ctx, BDEV_DISK_READ, ++ bdev_io->driver_ctx, SPDK_BDEV_IO_TYPE_READ, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); + case SPDK_BDEV_IO_TYPE_WRITEV_NVME: + SPDK_DEBUGLOG(bdev_nvme, "writev %lu lbas with offset %#lx\n", bdev_io->u.bdev.num_blocks, + bdev_io->u.bdev.offset_blocks); + return bdev_nvme_queue_cmd_v_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, +- bdev_io->driver_ctx, BDEV_DISK_WRITE, ++ bdev_io->driver_ctx, SPDK_BDEV_IO_TYPE_WRITE, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); + case SPDK_BDEV_IO_TYPE_UNMAP_BLOCKS: +@@ -100,14 +93,12 @@ _bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io * + return 0; + } + +-int +-bdev_nvme_dump_info_json_self(void *ctx, struct spdk_json_write_ctx *w) ++int bdev_nvme_dump_info_json_self(void *ctx, struct spdk_json_write_ctx *w) + { + return 0; + } + +-uint16_t +-bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch) ++uint16_t bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch) + { + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + uint16_t channel_id; +@@ -116,15 +107,13 @@ bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch) + return channel_id; + } + +-uint64_t +-bdev_nvme_get_timeout_count(struct spdk_io_channel *ch) ++uint64_t bdev_nvme_get_timeout_count(struct spdk_io_channel *ch) + { + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + return nvme_ch->group->num_poll_timeout; + } + +-int32_t +-nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr) ++int32_t nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr) + { + uint32_t num_ctrlr = 0, i = 0; + struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; +@@ -221,8 +210,7 @@ nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr) + return num_ctrlr; + } + +-struct nvme_bdev_ctrlr * +-nvme_ctrlr_get_by_name(const char *name) ++struct nvme_bdev_ctrlr *nvme_ctrlr_get_by_name(const char *name) + { + struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; + +@@ -239,8 +227,7 @@ nvme_ctrlr_get_by_name(const char *name) + return NULL; + } + +-struct spdk_nvme_ctrlr * +-spdk_nvme_ctrlr_get_by_name(const char *ctrlname) ++struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_name(const char *ctrlname) + { + struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; + +@@ -253,8 +240,7 @@ spdk_nvme_ctrlr_get_by_name(const char *ctrlname) + return NULL; + } + +-struct spdk_nvme_ctrlr * +-spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) ++struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) + { + if (nvme_bdev_ctrlr == NULL) { + return NULL; +@@ -262,8 +248,7 @@ spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) + return nvme_bdev_ctrlr->ctrlr; + } + +-void +-nvme_ctrlr_clear_iostat_by_name(const char *ctrlname) ++void nvme_ctrlr_clear_iostat_by_name(const char *ctrlname) + { + int i; + size_t size = strnlen(ctrlname, 24); +@@ -288,8 +273,7 @@ nvme_ctrlr_clear_iostat_by_name(const char *ctrlname) + } + } + +-void +-nvme_ctrlr_clear_iostat_all(void) ++void nvme_ctrlr_clear_iostat_all(void) + { + struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; + +@@ -298,8 +282,7 @@ nvme_ctrlr_clear_iostat_all(void) + } + } + +-struct spdk_nvme_ns * +-bdev_nvme_get_ns(struct nvme_bdev *nbdev) ++struct spdk_nvme_ns *bdev_nvme_get_ns(struct nvme_bdev *nbdev) + { + return nbdev->nvme_ns->ns; + } +@@ -331,11 +314,10 @@ void bdev_nvme_update_block_by_nvme_ctrlr(struct spdk_nvme_ctrlr *ctrlr) + pthread_mutex_unlock(&g_bdev_nvme_mutex); + } + +-int +-bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++int bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + { +- struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr; + struct nvme_bdev_ns *ns = NULL; ++ bool ns_active = spdk_nvme_ctrlr_is_active_ns(nvme_bdev_ctrlr->ctrlr, nsid); + + if (nvme_bdev_ctrlr == NULL || nsid > nvme_bdev_ctrlr->num_ns) { + SPDK_ERRLOG("Parameter error. nsid[%u], the max nsid is[%u]\n", nsid, nvme_bdev_ctrlr->num_ns); +@@ -343,14 +325,9 @@ bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + } + + ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; ++ ns->type = NVME_BDEV_NS_STANDARD; + +- if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) { +- ns->type = NVME_BDEV_NS_OCSSD; +- } else { +- ns->type = NVME_BDEV_NS_STANDARD; +- } +- +- if (!ns->populated && spdk_nvme_ctrlr_is_active_ns(nvme_bdev_ctrlr->ctrlr, nsid)) { ++ if (!ns->populated && ns_active) { + SPDK_NOTICELOG("NSID %u to be added\n", nsid); + ns->id = nsid; + ns->ctrlr = nvme_bdev_ctrlr; +@@ -360,16 +337,16 @@ bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + return 0; + } + +- if (ns->populated && !spdk_nvme_ctrlr_is_active_ns(nvme_bdev_ctrlr->ctrlr, nsid)) { ++ if (ns->populated && !ns_active) { + SPDK_NOTICELOG("NSID %u is removed\n", nsid); + nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); + return 0; + } +- return -1; ++ ++ return 0; + } + +-bool +-spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++bool spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + { + struct nvme_bdev_ns *ns = NULL; + struct nvme_bdev *bdev = NULL, *tmp = NULL; +@@ -398,8 +375,7 @@ spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + return empty; + } + +-void +-spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++void spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + { + struct nvme_bdev_ns *ns = NULL; + struct nvme_bdev *bdev = NULL, *tmp = NULL; +@@ -419,9 +395,18 @@ spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + } + } + +-int +-bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, void *driver_ctx, +- void *buffer, void *metadata, int direction, uint64_t lba_count, uint64_t lba) ++static void check_error_type(int rc, bool read, void *qpair) ++{ ++ if (rc == -ENOMEM) { ++ SPDK_NOTICELOG("%s failed: rc = %d\n", read ? "read" : "write", rc); ++ } else if (rc < 0) { ++ SPDK_ERRLOG("%s failed: rc = %d, qpair is %p\n", read ? "read" : "write", rc, qpair); ++ } ++} ++ ++int bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, ++ void *driver_ctx, ++ void *buffer, void *metadata, int direction, uint64_t lba_count, uint64_t lba) + { + int rc; + uint32_t io_flags = 0; +@@ -452,7 +437,7 @@ bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpai + io_flags |= SPDK_NVME_IO_FLAGS_FORCE_UNIT_ACCESS; + } + +- if (direction == BDEV_DISK_READ) { ++ if (direction == SPDK_BDEV_IO_TYPE_READ) { + rc = spdk_nvme_ns_cmd_read_with_md(bdev->nvme_ns->ns, qpair, buffer, metadata, lba, + lba_count, bdev_nvme_queued_done, driver_ctx, io_flags, 0, 0); + } else { +@@ -460,21 +445,13 @@ bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpai + lba_count, bdev_nvme_queued_done, driver_ctx, io_flags, 0, 0); + } + +- if (rc != 0) { +- if (rc == -ENOMEM) { +- SPDK_NOTICELOG("%s failed: rc = %d\n", direction == BDEV_DISK_READ ? "read" : "write", rc); +- } else { +- SPDK_ERRLOG("%s failed: rc = %d, qpair is %p\n", direction == BDEV_DISK_READ ? "read" : "write", +- rc, qpair); +- } +- } ++ check_error_type(rc, direction == SPDK_BDEV_IO_TYPE_READ, qpair); + return rc; + } + +-int +-bdev_nvme_queue_cmd_v_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, +- void *driver_ctx, +- int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba) ++int bdev_nvme_queue_cmd_v_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, ++ void *driver_ctx, ++ int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba) + { + int rc; + struct nvme_bdev_io *bio = NULL; +@@ -508,29 +485,21 @@ bdev_nvme_queue_cmd_v_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qp + + bio = nvme_bdev_io_update_args((struct nvme_bdev_io *)driver_ctx, iov, iovcnt); + +- if (direction == BDEV_DISK_READ) { ++ if (direction == SPDK_BDEV_IO_TYPE_READ) { + rc = spdk_nvme_ns_cmd_readv(bdev->nvme_ns->ns, qpair, lba, + lba_count, bdev_nvme_queued_done, bio, io_flags, + bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); + } else { + rc = spdk_nvme_ns_cmd_writev(bdev->nvme_ns->ns, qpair, lba, lba_count, +- 0, bdev_nvme_queued_done, bio, io_flags, ++ bdev_nvme_queued_done, bio, io_flags, + bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); + } + +- if (rc != 0) { +- if (rc == -ENOMEM) { +- SPDK_NOTICELOG("%s failed: rc = %d\n", direction == BDEV_DISK_READ ? "readv" : "writev", rc); +- } else { +- SPDK_ERRLOG("%s failed: rc = %d, qpair is %p\n", direction == BDEV_DISK_READ ? "read" : "write", rc, +- qpair); +- } +- } ++ check_error_type(rc, direction == SPDK_BDEV_IO_TYPE_READ, qpair); + return rc; + } + +-struct nvme_bdev_ctrlr * +-bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc) ++struct nvme_bdev_ctrlr *bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc) + { + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(bdev_desc); + struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev->ctxt; +@@ -540,12 +509,11 @@ bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc) + return nbdev->nvme_ns->ctrlr; + } + +-int +-bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void *driver_ctx, +- struct spdk_nvme_dsm_range *unmap_d, uint16_t unmap_count) ++int bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void *driver_ctx, ++ struct spdk_nvme_dsm_range *unmap_d, uint32_t unmap_count) + { + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); +- int i; ++ uint32_t i; + + if (unmap_count == 0 || unmap_count > SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES) { + SPDK_ERRLOG("Invalid parameter, unmap count: %u\n", unmap_count); +@@ -566,36 +534,39 @@ bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void + + spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); + return spdk_nvme_ns_cmd_dataset_management(nbdev->nvme_ns->ns, nvme_ch->qpair, +- SPDK_NVME_DSM_ATTR_DEALLOCATE, +- unmap_d, unmap_count, +- bdev_nvme_queued_done, driver_ctx); ++ SPDK_NVME_DSM_ATTR_DEALLOCATE, ++ unmap_d, unmap_count, ++ bdev_nvme_queued_done, driver_ctx); + } + +-void +-spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr) ++void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr) + { + remove_cb(cb_ctx, (struct spdk_nvme_ctrlr *)ctrlr); + } + +-void spdk_bdev_fail_ctrlr(const char *traddr) ++void spdk_bdev_fail_ctrlr(void *cb_ctx, void *ctrlr) + { +- struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; ++ spdk_nvme_ctrlr_fail_hotplug((struct spdk_nvme_ctrlr *)ctrlr); ++ remove_cb(cb_ctx, (struct spdk_nvme_ctrlr *)ctrlr); ++} ++ ++struct spdk_nvme_ctrlr *spdk_nvme_bdev_ctrlr_get(char *pci_trid) ++{ ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; + +- pthread_mutex_lock(&g_bdev_nvme_mutex); + TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { +- if (strcmp(nvme_bdev_ctrlr->connected_trid->traddr, traddr) == 0) { +- spdk_nvme_ctrlr_fail(nvme_bdev_ctrlr->ctrlr); +- remove_cb(NULL, nvme_bdev_ctrlr->ctrlr); +- return; ++ if (strcmp(nvme_bdev_ctrlr->connected_trid->traddr, pci_trid) == 0) { ++ return nvme_bdev_ctrlr->ctrlr; + } + } ++ ++ return NULL; + } + +-int +-spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, +- const char *base_name, +- const char **names, size_t *count, +- const char *hostnqn) ++int spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, ++ const char *base_name, ++ const char **names, size_t *count, ++ const char *hostnqn) + { + struct nvme_probe_ctx *probe_ctx; + struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; +@@ -616,7 +587,7 @@ spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, + return -1; + } + +- if (spdk_nvme_probe(trid, probe_ctx, probe_cb, attach_cb, NULL)) { ++ if (spdk_nvme_probe(trid, probe_ctx, probe_cb, attach_cb, remove_cb)) { + SPDK_ERRLOG("Failed to probe for new devices\n"); + free(probe_ctx); + return -1; +diff --git a/module/bdev/nvme/bdev_nvme_self.h b/module/bdev/nvme/bdev_nvme_self.h +index d7cc587..43ad7ee 100644 +--- a/module/bdev/nvme/bdev_nvme_self.h ++++ b/module/bdev/nvme/bdev_nvme_self.h +@@ -40,4 +40,4 @@ bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc); + + int + bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void *driver_ctx, +- struct spdk_nvme_dsm_range *unmap_d, uint16_t unmap_count); ++ struct spdk_nvme_dsm_range *unmap_d, uint32_t unmap_count); +diff --git a/module/bdev/nvme/common.h b/module/bdev/nvme/common.h +index 81b4009..8dbcd87 100644 +--- a/module/bdev/nvme/common.h ++++ b/module/bdev/nvme/common.h +@@ -160,6 +160,7 @@ struct nvme_io_channel { + struct nvme_bdev_poll_group *group; + TAILQ_HEAD(, spdk_bdev_io) pending_resets; + struct ocssd_io_channel *ocssd_ch; ++ int state; + }; + + void nvme_ctrlr_populate_namespace_done(struct nvme_async_probe_ctx *ctx, +diff --git a/scripts/setup_self.sh b/scripts/setup_self.sh +index 9e77c29..90b7f86 100755 +--- a/scripts/setup_self.sh ++++ b/scripts/setup_self.sh +@@ -30,8 +30,15 @@ function linux_bind_driver() { + + echo "$bdf ($ven_dev_id): $old_driver_name -> $driver_name" + +- echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true +- echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true ++ if [ "$driver_name" = "nvme" ] ++ then ++ echo 1 > /sys/bus/pci/devices/$bdf/remove ++ sleep 1 ++ echo 1 > /sys/bus/pci/rescan ++ else ++ echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true ++ echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true ++ fi + } + + function linux_hugetlbfs_mount() { +-- +2.33.0 + diff --git a/0024-Add-CUSE-switch-for-nvme-ctrlr.patch b/0024-Add-CUSE-switch-for-nvme-ctrlr.patch new file mode 100644 index 0000000..650e3f7 --- /dev/null +++ b/0024-Add-CUSE-switch-for-nvme-ctrlr.patch @@ -0,0 +1,55 @@ +From 86162fca6435c4b5d98356f63ae32519fe485f02 Mon Sep 17 00:00:00 2001 +From: suweifeng +Date: Mon, 17 May 2021 16:05:40 +0800 +Subject: [PATCH 24/27] Add CUSE switch for nvme ctrlr + +Signed-off-by: suweifeng +--- + module/bdev/nvme/bdev_nvme.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c +index d291646..4f88e4e 100644 +--- a/module/bdev/nvme/bdev_nvme.c ++++ b/module/bdev/nvme/bdev_nvme.c +@@ -137,6 +137,9 @@ static struct spdk_thread *g_bdev_nvme_init_thread; + static struct spdk_poller *g_hotplug_poller; + static struct spdk_poller *g_hotplug_probe_poller; + static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx; ++#ifdef SPDK_CONFIG_APP_RW ++bool g_useCUSE = false; ++#endif + + static void nvme_ctrlr_populate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, + struct nvme_async_probe_ctx *ctx); +@@ -1694,6 +1697,12 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + } + + nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL); ++#ifdef SPDK_CONFIG_APP_RW ++ /* register CUSE */ ++ if (g_useCUSE) { ++ spdk_nvme_cuse_register(ctrlr); ++ } ++#endif + + free(name); + } +@@ -1720,6 +1729,14 @@ remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) + return; + } + nvme_bdev_ctrlr->destruct = true; ++ ++#ifdef SPDK_CONFIG_APP_RW ++ /* remove CUSE */ ++ if (g_useCUSE) { ++ spdk_nvme_cuse_unregister(ctrlr); ++ } ++#endif ++ + pthread_mutex_unlock(&g_bdev_nvme_mutex); + _nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); + } +-- +2.33.0 + diff --git a/0025-Adapt-for-ES3000-serial-vendor-special-opcode-in-CUS.patch b/0025-Adapt-for-ES3000-serial-vendor-special-opcode-in-CUS.patch new file mode 100644 index 0000000..9526287 --- /dev/null +++ b/0025-Adapt-for-ES3000-serial-vendor-special-opcode-in-CUS.patch @@ -0,0 +1,45 @@ +From c6239a3dc45a7cb3fa245cdcb5f0641959159714 Mon Sep 17 00:00:00 2001 +From: suweifeng +Date: Thu, 20 May 2021 16:41:01 +0800 +Subject: [PATCH 25/27] Adapt for ES3000 serial vendor special opcode in CUSE + +With Huawei ES3000 serial NVMe PCIe SSD, Will send special opcode 0xC0 +to get self-define vendor logs, the data transfer field of opcode didn't +follow NVMe 1.3/1.4 spec, So treat the opcode as bidirectional. +All self-define opcode start with 0xC0. + +Signed-off-by: suweifeng +--- + include/spdk/nvme_spec.h | 1 + + lib/nvme/nvme_cuse.c | 3 +++ + 2 files changed, 4 insertions(+) + +diff --git a/include/spdk/nvme_spec.h b/include/spdk/nvme_spec.h +index ca91c8b..8058ea0 100644 +--- a/include/spdk/nvme_spec.h ++++ b/include/spdk/nvme_spec.h +@@ -1345,6 +1345,7 @@ enum spdk_nvme_admin_opcode { + SPDK_NVME_OPC_SANITIZE = 0x84, + + SPDK_NVME_OPC_GET_LBA_STATUS = 0x86, ++ SPDK_NVME_OPC_VENDOR = 0xC0, + }; + + /** +diff --git a/lib/nvme/nvme_cuse.c b/lib/nvme/nvme_cuse.c +index 62d1422..3eccfd0 100644 +--- a/lib/nvme/nvme_cuse.c ++++ b/lib/nvme/nvme_cuse.c +@@ -154,6 +154,9 @@ cuse_nvme_admin_cmd_send(fuse_req_t req, struct nvme_admin_cmd *admin_cmd, + + ctx->req = req; + ctx->data_transfer = spdk_nvme_opc_get_data_transfer(admin_cmd->opcode); ++ if (admin_cmd->opcode >= SPDK_NVME_OPC_VENDOR) { ++ ctx->data_transfer = SPDK_NVME_DATA_BIDIRECTIONAL; ++ } + + memset(&ctx->nvme_cmd, 0, sizeof(ctx->nvme_cmd)); + ctx->nvme_cmd.opc = admin_cmd->opcode; +-- +2.33.0 + diff --git a/0026-Fix-race-condition-in-continuous-setup-and-teardown-.patch b/0026-Fix-race-condition-in-continuous-setup-and-teardown-.patch new file mode 100644 index 0000000..5cb8265 --- /dev/null +++ b/0026-Fix-race-condition-in-continuous-setup-and-teardown-.patch @@ -0,0 +1,124 @@ +From 34555d211c58ac7615d41547f56756ae02d22957 Mon Sep 17 00:00:00 2001 +From: suweifeng +Date: Tue, 8 Jun 2021 22:11:53 +0800 +Subject: [PATCH 26/27] Fix race condition in continuous setup and teardown + cuse session + +If we continuous setup and teardown cuse session, It will teardown +uninitialized cuse session and cause segment fault, So add delay until +session created. + +Signed-off-by: suweifeng +--- + lib/nvme/nvme_cuse.c | 41 +++++++++++++++++++++++++++++++++++++---- + 1 file changed, 37 insertions(+), 4 deletions(-) + +diff --git a/lib/nvme/nvme_cuse.c b/lib/nvme/nvme_cuse.c +index 3eccfd0..8f0be31 100644 +--- a/lib/nvme/nvme_cuse.c ++++ b/lib/nvme/nvme_cuse.c +@@ -55,6 +55,8 @@ struct cuse_device { + + pthread_t tid; + struct fuse_session *session; ++ pthread_cond_t session_cond; /* session condition variable */ ++ pthread_mutex_t session_mtx; /* session mutex variable */ + + struct cuse_device *ctrlr_device; + struct cuse_device *ns_devices; /**< Array of cuse ns devices */ +@@ -666,11 +668,17 @@ cuse_thread(void *arg) + cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop, + &multithreaded, cuse_device); + } ++ + if (!cuse_device->session) { + SPDK_ERRLOG("Cannot create cuse session\n"); ++ pthread_mutex_lock(&cuse_device->session_mtx); ++ pthread_cond_signal(&cuse_device->session_cond); ++ pthread_mutex_unlock(&cuse_device->session_mtx); + goto err; + } +- ++ pthread_mutex_lock(&cuse_device->session_mtx); ++ pthread_cond_signal(&cuse_device->session_cond); ++ pthread_mutex_unlock(&cuse_device->session_mtx); + SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name); + + /* Receive and process fuse requests */ +@@ -718,13 +726,20 @@ cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid) + free(ns_device); + return -ENAMETOOLONG; + } +- ++ pthread_cond_init(&ns_device->session_cond, NULL); ++ pthread_mutex_init(&ns_device->session_mtx, NULL); + rv = pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device); + if (rv != 0) { + SPDK_ERRLOG("pthread_create failed\n"); + return -rv; + } +- ++ pthread_mutex_lock(&ns_device->session_mtx); ++ pthread_cond_wait(&ns_device->session_cond, &ns_device->session_mtx); ++ pthread_mutex_unlock(&ns_device->session_mtx); ++ if (!ns_device->session) { ++ SPDK_ERRLOG("create namespace session failed\n"); ++ return -1; ++ } + ns_device->is_started = true; + + return 0; +@@ -739,9 +754,10 @@ cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, uint32_t nsid) + if (!ns_device->is_started) { + return; + } +- + fuse_session_exit(ns_device->session); + pthread_join(ns_device->tid, NULL); ++ pthread_cond_destroy(&ns_device->session_cond); ++ pthread_mutex_destroy(&ns_device->session_mtx); + ns_device->is_started = false; + } + +@@ -817,8 +833,14 @@ cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device) + cuse_nvme_ns_stop(ctrlr_device, i); + } + ++ if (!ctrlr_device->is_started) { ++ return; ++ } + fuse_session_exit(ctrlr_device->session); + pthread_join(ctrlr_device->tid, NULL); ++ pthread_cond_destroy(&ctrlr_device->session_cond); ++ pthread_mutex_destroy(&ctrlr_device->session_mtx); ++ ctrlr_device->is_started = false; + TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq); + spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index); + if (spdk_bit_array_count_set(g_ctrlr_started) == 0) { +@@ -894,12 +916,23 @@ nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr) + snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d", + ctrlr_device->index); + ++ pthread_cond_init(&ctrlr_device->session_cond, NULL); ++ pthread_mutex_init(&ctrlr_device->session_mtx, NULL); + rv = pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device); + if (rv != 0) { + SPDK_ERRLOG("pthread_create failed\n"); + rv = -rv; + goto err3; + } ++ pthread_mutex_lock(&ctrlr_device->session_mtx); ++ pthread_cond_wait(&ctrlr_device->session_cond, &ctrlr_device->session_mtx); ++ pthread_mutex_unlock(&ctrlr_device->session_mtx); ++ if (!ctrlr_device->session) { ++ SPDK_ERRLOG("cuse session create failed\n"); ++ rv = -1; ++ goto err3; ++ } ++ ctrlr_device->is_started = true; + TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq); + + ctrlr_device->ns_devices = (struct cuse_device *)calloc(num_ns, sizeof(struct cuse_device)); +-- +2.33.0 + diff --git a/0027-Change-log-level-in-poll-timeout.patch b/0027-Change-log-level-in-poll-timeout.patch new file mode 100644 index 0000000..1c123ea --- /dev/null +++ b/0027-Change-log-level-in-poll-timeout.patch @@ -0,0 +1,28 @@ +From 5f8b5846741c965b1b5ad7a8ca2960b20565d192 Mon Sep 17 00:00:00 2001 +From: suweifeng +Date: Thu, 10 Jun 2021 11:25:17 +0800 +Subject: [PATCH 27/27] Change log level in poll timeout + +Change to 'NOTICE' log level in poll timeout + +Signed-off-by: suweifeng +--- + module/bdev/nvme/bdev_nvme_self.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/module/bdev/nvme/bdev_nvme_self.c b/module/bdev/nvme/bdev_nvme_self.c +index dc480ff..cba129e 100644 +--- a/module/bdev/nvme/bdev_nvme_self.c ++++ b/module/bdev/nvme/bdev_nvme_self.c +@@ -36,7 +36,7 @@ void bdev_update_ch_timeout(struct nvme_bdev_poll_group *group) + poll_time = (poll_ticks * 1000ULL) / spdk_get_ticks_hz(); + if (poll_time >= g_polltime_threshold) { + group->num_poll_timeout++; +- SPDK_WARNLOG("group[%p] poll timeout in %ldms", group, poll_time); ++ SPDK_NOTICELOG("group[%p] poll timeout in %ldms", group, poll_time); + } + } + group->save_start_ticks = current_ticks; +-- +2.33.0 + diff --git a/spdk.spec b/spdk.spec index 193a4a0..63e639d 100644 --- a/spdk.spec +++ b/spdk.spec @@ -3,7 +3,7 @@ Name: spdk Version: 21.01.1 -Release: 4 +Release: 5 Summary: Set of libraries and utilities for high performance user-mode storage License: BSD and MIT URL: http://spdk.io @@ -24,6 +24,17 @@ Patch13: 0013-lib-vhost-Fix-compilation-with-dpdk-21.11.patch Patch14: 0014-mk-Fix-debug-build-error-on-ARM-ThunderX2-and-neoverse_N1_platform.patch Patch15: 0015-configure-add-gcc-version-check-for-ARM-Neoverse-N1_platform.patch Patch16: 0016-Enhance-security-for-share-library.patch +Patch17: 0017-add-HSAK-needed-head-file-and-API-to-spdk.patch +Patch18: 0018-lib-bdev-Add-bdev-support-for-HSAK.patch +Patch19: 0019-lib-env_dpdk-Add-config-args-for-HSAK.patch +Patch20: 0020-lib-nvme-Add-nvme-support-for-HSAK.patch +Patch21: 0021-module-bdev-Add-bdev-module-support-for-HSAK.patch +Patch22: 0022-use-spdk_nvme_ns_cmd_dataset_management-and-delete-s.patch +Patch23: 0023-spdk-add-nvme-support-for-HSAK.patch +Patch24: 0024-Add-CUSE-switch-for-nvme-ctrlr.patch +Patch25: 0025-Adapt-for-ES3000-serial-vendor-special-opcode-in-CUS.patch +Patch26: 0026-Fix-race-condition-in-continuous-setup-and-teardown-.patch +Patch27: 0027-Change-log-level-in-poll-timeout.patch %define package_version %{version}-%{release} @@ -44,6 +55,8 @@ BuildRequires: gcc gcc-c++ make BuildRequires: dpdk-devel, numactl-devel, ncurses-devel BuildRequires: libiscsi-devel, libaio-devel, openssl-devel, libuuid-devel BuildRequires: libibverbs-devel, librdmacm-devel +BuildRequires: fuse3, fuse3-devel +BuildRequires: libboundscheck %if %{with doc} BuildRequires: doxygen mscgen graphviz %endif @@ -51,6 +64,7 @@ BuildRequires: doxygen mscgen graphviz # Install dependencies Requires: dpdk >= 21.11, numactl-libs, openssl-libs Requires: libiscsi, libaio, libuuid +Requires: fuse3, libboundscheck # NVMe over Fabrics Requires: librdmacm, librdmacm Requires(post): /sbin/ldconfig @@ -113,7 +127,9 @@ BuildArch: noarch --with-rdma \ --with-shared \ --with-iscsi-initiator \ - --without-vtune + --without-vtune \ + --enable-raw \ + --with-nvme-cuse make -j`nproc` all @@ -123,6 +139,13 @@ make -C doc %install %make_install -j`nproc` prefix=%{_usr} libdir=%{_libdir} datadir=%{_datadir} +install -d $RPM_BUILD_ROOT%{_sysconfdir}/spdk +install -d $RPM_BUILD_ROOT/opt/spdk +install -d $RPM_BUILD_ROOT/usr/include/spdk_internal +install -m 0744 ./scripts/setup_self.sh $RPM_BUILD_ROOT/opt/spdk/setup.sh +install -m 0644 ./etc/spdk/nvme.conf.in $RPM_BUILD_ROOT%{_sysconfdir}/spdk +install -m 0644 include/spdk_internal/*.h $RPM_BUILD_ROOT/usr/include/spdk_internal +install -m 0644 lib/nvme/nvme_internal.h $RPM_BUILD_ROOT/usr/include/spdk_internal # Install tools mkdir -p %{install_datadir} @@ -157,12 +180,18 @@ mv doc/output/html/ %{install_docdir} %files %{_bindir}/spdk_* %{_libdir}/*.so.* +%dir %{_sysconfdir}/spdk +%{_sysconfdir}/spdk/nvme.conf.in +%dir /opt/spdk +/opt/spdk/setup.sh %files devel %{_includedir}/%{name} %{_libdir}/*.a %{_libdir}/*.so +%dir /usr/include/spdk_internal +/usr/include/spdk_internal/*.h %files tools @@ -177,6 +206,9 @@ mv doc/output/html/ %{install_docdir} %changelog +* Tue May 24 2022 Weifeng Su - 21.01.1-5 +- Add support for HSAK + * Tue Mar 15 2022 Weifeng Su - 21.01.1-4 - Remove rpath link option, Due to it's easy for attacher to construct 'rpath' attacks -- Gitee