From b898ee646ad955a50be2beee5cc672969f26e156 Mon Sep 17 00:00:00 2001 From: Dengdui Huang Date: Fri, 27 Oct 2023 16:23:27 +0800 Subject: [PATCH] sync some patchs from upstreaming Sync some patchs from upstreaming and modifies are as follow: - maintainers: update for hns3 driver - app/testpmd: add command to flush multicast MAC addresses - app/testpmd: fix help string - app/testpmd: fix multicast address pool leak - net/hns3: optimize SVE Rx performance - net/hns3: optimize rearm mbuf for SVE Rx - net/hns3: optimize free mbuf for SVE Tx - net/hns3: fix order in NEON Rx - net/hns3: fix traffic management dump text alignment - net/hns3: fix traffic management thread safety - net/hns3: fix flushing multicast MAC address - net/hns3: fix error code for multicast resource - net/hns3: fix VF default MAC modified when set failed - net/hns3: fix index to look up table in NEON Rx - net/hns3: fix non-zero weight for disabled TC - config/arm: add HiSilicon HIP10 Signed-off-by: Dengdui Huang --- 0351-config-arm-add-HiSilicon-HIP10.patch | 93 ++++++ ...-fix-non-zero-weight-for-disabled-TC.patch | 56 ++++ ...ix-index-to-look-up-table-in-NEON-Rx.patch | 40 +++ ...default-MAC-modified-when-set-failed.patch | 35 +++ ...ix-error-code-for-multicast-resource.patch | 35 +++ ...3-fix-flushing-multicast-MAC-address.patch | 51 ++++ ...fix-traffic-management-thread-safety.patch | 273 ++++++++++++++++++ ...affic-management-dump-text-alignment.patch | 107 +++++++ 0359-net-hns3-fix-order-in-NEON-Rx.patch | 155 ++++++++++ ...t-hns3-optimize-free-mbuf-for-SVE-Tx.patch | 89 ++++++ ...-hns3-optimize-rearm-mbuf-for-SVE-Rx.patch | 223 ++++++++++++++ ...net-hns3-optimize-SVE-Rx-performance.patch | 242 ++++++++++++++++ ...tpmd-fix-multicast-address-pool-leak.patch | 97 +++++++ 0364-app-testpmd-fix-help-string.patch | 40 +++ ...command-to-flush-multicast-MAC-addre.patch | 152 ++++++++++ 0366-maintainers-update-for-hns3-driver.patch | 33 +++ dpdk.spec | 37 ++- 17 files changed, 1757 insertions(+), 1 deletion(-) create mode 100644 0351-config-arm-add-HiSilicon-HIP10.patch create mode 100644 0352-net-hns3-fix-non-zero-weight-for-disabled-TC.patch create mode 100644 0353-net-hns3-fix-index-to-look-up-table-in-NEON-Rx.patch create mode 100644 0354-net-hns3-fix-VF-default-MAC-modified-when-set-failed.patch create mode 100644 0355-net-hns3-fix-error-code-for-multicast-resource.patch create mode 100644 0356-net-hns3-fix-flushing-multicast-MAC-address.patch create mode 100644 0357-net-hns3-fix-traffic-management-thread-safety.patch create mode 100644 0358-net-hns3-fix-traffic-management-dump-text-alignment.patch create mode 100644 0359-net-hns3-fix-order-in-NEON-Rx.patch create mode 100644 0360-net-hns3-optimize-free-mbuf-for-SVE-Tx.patch create mode 100644 0361-net-hns3-optimize-rearm-mbuf-for-SVE-Rx.patch create mode 100644 0362-net-hns3-optimize-SVE-Rx-performance.patch create mode 100644 0363-app-testpmd-fix-multicast-address-pool-leak.patch create mode 100644 0364-app-testpmd-fix-help-string.patch create mode 100644 0365-app-testpmd-add-command-to-flush-multicast-MAC-addre.patch create mode 100644 0366-maintainers-update-for-hns3-driver.patch diff --git a/0351-config-arm-add-HiSilicon-HIP10.patch b/0351-config-arm-add-HiSilicon-HIP10.patch new file mode 100644 index 0000000..28182c4 --- /dev/null +++ b/0351-config-arm-add-HiSilicon-HIP10.patch @@ -0,0 +1,93 @@ +From b3e2b303f964e5ad17af01a498ef8c1cdc32fbd6 Mon Sep 17 00:00:00 2001 +From: Dongdong Liu +Date: Mon, 26 Jun 2023 20:43:04 +0800 +Subject: [PATCH 351/366] config/arm: add HiSilicon HIP10 + +[ upstream commit 5b2a7f12edcaba0daab0154c9ab03430083cfd80 ] + +Adding support for HiSilicon HIP10 platform. + +Signed-off-by: Dongdong Liu +Acked-by: Ruifeng Wang +--- + config/arm/arm64_hip10_linux_gcc | 16 ++++++++++++++++ + config/arm/meson.build | 19 +++++++++++++++++++ + 2 files changed, 35 insertions(+) + create mode 100644 config/arm/arm64_hip10_linux_gcc + +diff --git a/config/arm/arm64_hip10_linux_gcc b/config/arm/arm64_hip10_linux_gcc +new file mode 100644 +index 0000000..2943e4a +--- /dev/null ++++ b/config/arm/arm64_hip10_linux_gcc +@@ -0,0 +1,16 @@ ++[binaries] ++c = ['ccache', 'aarch64-linux-gnu-gcc'] ++cpp = ['ccache', 'aarch64-linux-gnu-g++'] ++ar = 'aarch64-linux-gnu-gcc-ar' ++strip = 'aarch64-linux-gnu-strip' ++pkgconfig = 'aarch64-linux-gnu-pkg-config' ++pcap-config = '' ++ ++[host_machine] ++system = 'linux' ++cpu_family = 'aarch64' ++cpu = 'armv8-a' ++endian = 'little' ++ ++[properties] ++platform = 'hip10' +diff --git a/config/arm/meson.build b/config/arm/meson.build +index 213324d..ef047e9 100644 +--- a/config/arm/meson.build ++++ b/config/arm/meson.build +@@ -193,6 +193,16 @@ implementer_hisilicon = { + ['RTE_MAX_LCORE', 1280], + ['RTE_MAX_NUMA_NODES', 16] + ] ++ }, ++ '0xd03': { ++ 'march': 'armv8.5-a', ++ 'march_features': ['crypto', 'sve'], ++ 'flags': [ ++ ['RTE_MACHINE', '"hip10"'], ++ ['RTE_ARM_FEATURE_ATOMICS', true], ++ ['RTE_MAX_LCORE', 1280], ++ ['RTE_MAX_NUMA_NODES', 16] ++ ] + } + } + } +@@ -309,6 +319,13 @@ soc_graviton2 = { + 'numa': false + } + ++soc_hip10 = { ++ 'description': 'HiSilicon HIP10', ++ 'implementer': '0x48', ++ 'part_number': '0xd03', ++ 'numa': true ++} ++ + soc_kunpeng920 = { + 'description': 'HiSilicon Kunpeng 920', + 'implementer': '0x48', +@@ -381,6 +398,7 @@ cn10k: Marvell OCTEON 10 + dpaa: NXP DPAA + emag: Ampere eMAG + graviton2: AWS Graviton2 ++hip10: HiSilicon HIP10 + kunpeng920: HiSilicon Kunpeng 920 + kunpeng930: HiSilicon Kunpeng 930 + n1sdp: Arm Neoverse N1SDP +@@ -403,6 +421,7 @@ socs = { + 'dpaa': soc_dpaa, + 'emag': soc_emag, + 'graviton2': soc_graviton2, ++ 'hip10': soc_hip10, + 'kunpeng920': soc_kunpeng920, + 'kunpeng930': soc_kunpeng930, + 'n1sdp': soc_n1sdp, +-- +2.41.0.windows.2 + diff --git a/0352-net-hns3-fix-non-zero-weight-for-disabled-TC.patch b/0352-net-hns3-fix-non-zero-weight-for-disabled-TC.patch new file mode 100644 index 0000000..a5da010 --- /dev/null +++ b/0352-net-hns3-fix-non-zero-weight-for-disabled-TC.patch @@ -0,0 +1,56 @@ +From af30b78f204788a5a82cc637b813a3b8bb66ae6b Mon Sep 17 00:00:00 2001 +From: Huisong Li +Date: Fri, 7 Jul 2023 18:40:53 +0800 +Subject: [PATCH 352/366] net/hns3: fix non-zero weight for disabled TC + +[ upstream commit 1abcdb3f247393a04703071452b560a77ab23c04 ] + +hns3 PF driver enables one TC, allocates to 100% weight for this +TC and 0% for other disabled TC by default. But driver modifies +the weight to 1% for disabled TC and then set to hardware to make +all TC work in DWRR mode. As a result, the total percent of all TC +is more than 100%. Actually, this operation is also redundant, +because these disabled TC will never be used. So this patch sets +the weight of all TC based on user's configuration. + +Fixes: 62e3ccc2b94c ("net/hns3: support flow control") +Cc: stable@dpdk.org + +Signed-off-by: Huisong Li +Signed-off-by: Dongdong Liu +--- + drivers/net/hns3/hns3_dcb.c | 9 +-------- + 1 file changed, 1 insertion(+), 8 deletions(-) + +diff --git a/drivers/net/hns3/hns3_dcb.c b/drivers/net/hns3/hns3_dcb.c +index af045b2..07b8c46 100644 +--- a/drivers/net/hns3/hns3_dcb.c ++++ b/drivers/net/hns3/hns3_dcb.c +@@ -237,9 +237,9 @@ hns3_dcb_qs_weight_cfg(struct hns3_hw *hw, uint16_t qs_id, uint8_t dwrr) + static int + hns3_dcb_ets_tc_dwrr_cfg(struct hns3_hw *hw) + { +-#define DEFAULT_TC_WEIGHT 1 + #define DEFAULT_TC_OFFSET 14 + struct hns3_ets_tc_weight_cmd *ets_weight; ++ struct hns3_pg_info *pg_info; + struct hns3_cmd_desc desc; + uint8_t i; + +@@ -247,13 +247,6 @@ hns3_dcb_ets_tc_dwrr_cfg(struct hns3_hw *hw) + ets_weight = (struct hns3_ets_tc_weight_cmd *)desc.data; + + for (i = 0; i < HNS3_MAX_TC_NUM; i++) { +- struct hns3_pg_info *pg_info; +- +- ets_weight->tc_weight[i] = DEFAULT_TC_WEIGHT; +- +- if (!(hw->hw_tc_map & BIT(i))) +- continue; +- + pg_info = &hw->dcb_info.pg_info[hw->dcb_info.tc_info[i].pgid]; + ets_weight->tc_weight[i] = pg_info->tc_dwrr[i]; + } +-- +2.41.0.windows.2 + diff --git a/0353-net-hns3-fix-index-to-look-up-table-in-NEON-Rx.patch b/0353-net-hns3-fix-index-to-look-up-table-in-NEON-Rx.patch new file mode 100644 index 0000000..77ec50a --- /dev/null +++ b/0353-net-hns3-fix-index-to-look-up-table-in-NEON-Rx.patch @@ -0,0 +1,40 @@ +From c7f8daafe6ec2cfde7af46e446c227f15b0eec7f Mon Sep 17 00:00:00 2001 +From: Huisong Li +Date: Tue, 11 Jul 2023 18:24:44 +0800 +Subject: [PATCH 353/366] net/hns3: fix index to look up table in NEON Rx + +[ upstream commit 6bec7c50be7a38c114680481f285976142df40d0 ] + +In hns3_recv_burst_vec(), the index to get packet length and data +size are reversed. Fortunately, this doesn't affect functionality +because the NEON Rx only supports single BD in which the packet +length is equal to the date size. Now this patch fixes it to get +back to the truth. + +Fixes: a3d4f4d291d7 ("net/hns3: support NEON Rx") +Cc: stable@dpdk.org + +Signed-off-by: Huisong Li +Signed-off-by: Dongdong Liu +--- + drivers/net/hns3/hns3_rxtx_vec_neon.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h +index 55d9bf8..a20a6b6 100644 +--- a/drivers/net/hns3/hns3_rxtx_vec_neon.h ++++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h +@@ -142,8 +142,8 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq, + /* mask to shuffle from desc to mbuf's rx_descriptor_fields1 */ + uint8x16_t shuf_desc_fields_msk = { + 0xff, 0xff, 0xff, 0xff, /* packet type init zero */ +- 22, 23, 0xff, 0xff, /* rx.pkt_len to rte_mbuf.pkt_len */ +- 20, 21, /* size to rte_mbuf.data_len */ ++ 20, 21, 0xff, 0xff, /* rx.pkt_len to rte_mbuf.pkt_len */ ++ 22, 23, /* size to rte_mbuf.data_len */ + 0xff, 0xff, /* rte_mbuf.vlan_tci init zero */ + 8, 9, 10, 11, /* rx.rss_hash to rte_mbuf.hash.rss */ + }; +-- +2.41.0.windows.2 + diff --git a/0354-net-hns3-fix-VF-default-MAC-modified-when-set-failed.patch b/0354-net-hns3-fix-VF-default-MAC-modified-when-set-failed.patch new file mode 100644 index 0000000..90f4001 --- /dev/null +++ b/0354-net-hns3-fix-VF-default-MAC-modified-when-set-failed.patch @@ -0,0 +1,35 @@ +From f2d94f67f97a92cd142f1e7e6fa5106766acd08a Mon Sep 17 00:00:00 2001 +From: Dengdui Huang +Date: Sat, 5 Aug 2023 16:36:23 +0800 +Subject: [PATCH 354/366] net/hns3: fix VF default MAC modified when set failed + +[ upstream commit ed7faab2a717347077d9e657fba010bb145a2b54 ] + +When the VF fail to set the default MAC address, +"hw->mac.mac_addr" should not be updated. + +Fixes: a5475d61fa34 ("net/hns3: support VF") +Cc: stable@dpdk.org + +Signed-off-by: Dengdui Huang +Signed-off-by: Dongdong Liu +--- + drivers/net/hns3/hns3_ethdev_vf.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c +index 6898a77..02fb4a8 100644 +--- a/drivers/net/hns3/hns3_ethdev_vf.c ++++ b/drivers/net/hns3/hns3_ethdev_vf.c +@@ -250,6 +250,8 @@ hns3vf_set_default_mac_addr(struct rte_eth_dev *dev, + hns3_err(hw, "Failed to set mac addr(%s) for vf: %d", + mac_str, ret); + } ++ rte_spinlock_unlock(&hw->lock); ++ return ret; + } + + rte_ether_addr_copy(mac_addr, +-- +2.41.0.windows.2 + diff --git a/0355-net-hns3-fix-error-code-for-multicast-resource.patch b/0355-net-hns3-fix-error-code-for-multicast-resource.patch new file mode 100644 index 0000000..4f2c99f --- /dev/null +++ b/0355-net-hns3-fix-error-code-for-multicast-resource.patch @@ -0,0 +1,35 @@ +From 81f221e0c7e43eb37eda6e4ea8765a159fae9b08 Mon Sep 17 00:00:00 2001 +From: Dengdui Huang +Date: Sat, 5 Aug 2023 16:36:24 +0800 +Subject: [PATCH 355/366] net/hns3: fix error code for multicast resource + +[ upstream commit c8cd885352d58bcfcc514770cb6068dd689d0dc3 ] + +Return ENOSPC instead of EINVAL when the hardware +has not enough multicast filtering resources. + +Fixes: 7d7f9f80bbfb ("net/hns3: support MAC address related operations") +Cc: stable@dpdk.org + +Signed-off-by: Dengdui Huang +Signed-off-by: Dongdong Liu +--- + drivers/net/hns3/hns3_common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/hns3/hns3_common.c b/drivers/net/hns3/hns3_common.c +index a7b576a..51a1c68 100644 +--- a/drivers/net/hns3/hns3_common.c ++++ b/drivers/net/hns3/hns3_common.c +@@ -384,7 +384,7 @@ hns3_set_mc_addr_chk_param(struct hns3_hw *hw, + hns3_err(hw, "failed to set mc mac addr, nb_mc_addr(%u) " + "invalid. valid range: 0~%d", + nb_mc_addr, HNS3_MC_MACADDR_NUM); +- return -EINVAL; ++ return -ENOSPC; + } + + /* Check if input mac addresses are valid */ +-- +2.41.0.windows.2 + diff --git a/0356-net-hns3-fix-flushing-multicast-MAC-address.patch b/0356-net-hns3-fix-flushing-multicast-MAC-address.patch new file mode 100644 index 0000000..6242127 --- /dev/null +++ b/0356-net-hns3-fix-flushing-multicast-MAC-address.patch @@ -0,0 +1,51 @@ +From 526759b4f78ecd42b217285c892a2e2e664192a2 Mon Sep 17 00:00:00 2001 +From: Dengdui Huang +Date: Sat, 5 Aug 2023 16:36:25 +0800 +Subject: [PATCH 356/366] net/hns3: fix flushing multicast MAC address + +[ upstream commit 49d1ab205b033b6131fb895b5e4d9ebc14081e51 ] + +According rte_eth_dev_set_mc_addr_list() API definition, +support flush multicast MAC address if mc_addr_set is NULL +or nb_mc_addr is zero. + +Fixes: 7d7f9f80bbfb ("net/hns3: support MAC address related operations") +Cc: stable@dpdk.org + +Signed-off-by: Dengdui Huang +Signed-off-by: Dongdong Liu +--- + drivers/net/hns3/hns3_common.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/drivers/net/hns3/hns3_common.c b/drivers/net/hns3/hns3_common.c +index 51a1c68..5dec62c 100644 +--- a/drivers/net/hns3/hns3_common.c ++++ b/drivers/net/hns3/hns3_common.c +@@ -442,6 +442,7 @@ hns3_set_mc_mac_addr_list(struct rte_eth_dev *dev, + uint32_t nb_mc_addr) + { + struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + struct rte_ether_addr *addr; + int cur_addr_num; + int set_addr_num; +@@ -449,6 +450,15 @@ hns3_set_mc_mac_addr_list(struct rte_eth_dev *dev, + int ret; + int i; + ++ if (mc_addr_set == NULL || nb_mc_addr == 0) { ++ rte_spinlock_lock(&hw->lock); ++ ret = hns3_configure_all_mc_mac_addr(hns, true); ++ if (ret == 0) ++ hw->mc_addrs_num = 0; ++ rte_spinlock_unlock(&hw->lock); ++ return ret; ++ } ++ + /* Check if input parameters are valid */ + ret = hns3_set_mc_addr_chk_param(hw, mc_addr_set, nb_mc_addr); + if (ret) +-- +2.41.0.windows.2 + diff --git a/0357-net-hns3-fix-traffic-management-thread-safety.patch b/0357-net-hns3-fix-traffic-management-thread-safety.patch new file mode 100644 index 0000000..1b3527a --- /dev/null +++ b/0357-net-hns3-fix-traffic-management-thread-safety.patch @@ -0,0 +1,273 @@ +From a5b54a960acbdd2c55f60577f7801af096ee84ba Mon Sep 17 00:00:00 2001 +From: Chengwen Feng +Date: Sat, 5 Aug 2023 16:36:26 +0800 +Subject: [PATCH 357/366] net/hns3: fix traffic management thread safety + +[ upstream commit 69901040975bff8a38edfc47aee727cadc87d356 ] + +The driver-related TM (traffic management) info is implemented through +the linked list. The following threads are involved in the read and +write of the TM info: + +1. main thread: invokes the rte_tm_xxx() API family to modify or read. +2. interrupt thread: will read TM info in reset recover process. +3. telemetry/proc-info thread: invoke rte_eth_dev_priv_dump() API to + read TM info. + +Currently, thread safety protection of TM info is implemented only in +the following operations: +1. some of the rte_tm_xxx() API's implementation. +2. reset recover process. + +Thread safety risks may exist in other scenarios, so fix by: +1. make sure all the rte_tm_xxx() API's implementations protected by + hw.lock. +2. make sure rte_eth_dev_priv_dump() API's implementation protected + by hw.lock. + +Fixes: c09c7847d892 ("net/hns3: support traffic management") +Fixes: e4cfe6bb9114 ("net/hns3: dump TM configuration info") +Cc: stable@dpdk.org + +Signed-off-by: Chengwen Feng +Signed-off-by: Dongdong Liu +--- + drivers/net/hns3/hns3_dump.c | 8 +- + drivers/net/hns3/hns3_tm.c | 173 ++++++++++++++++++++++++++++++----- + 2 files changed, 157 insertions(+), 24 deletions(-) + +diff --git a/drivers/net/hns3/hns3_dump.c b/drivers/net/hns3/hns3_dump.c +index 7ecfca8..2dc44f2 100644 +--- a/drivers/net/hns3/hns3_dump.c ++++ b/drivers/net/hns3/hns3_dump.c +@@ -918,6 +918,8 @@ hns3_eth_dev_priv_dump(struct rte_eth_dev *dev, FILE *file) + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; + ++ rte_spinlock_lock(&hw->lock); ++ + hns3_get_device_basic_info(file, dev); + hns3_get_dev_feature_capability(file, hw); + hns3_get_rxtx_queue_info(file, dev); +@@ -927,8 +929,10 @@ hns3_eth_dev_priv_dump(struct rte_eth_dev *dev, FILE *file) + * VF only supports dumping basic info, feaure capability and queue + * info. + */ +- if (hns->is_vf) ++ if (hns->is_vf) { ++ rte_spinlock_unlock(&hw->lock); + return 0; ++ } + + hns3_get_dev_mac_info(file, hns); + hns3_get_vlan_config_info(file, hw); +@@ -936,6 +940,8 @@ hns3_eth_dev_priv_dump(struct rte_eth_dev *dev, FILE *file) + hns3_get_tm_conf_info(file, dev); + hns3_get_flow_ctrl_info(file, dev); + ++ rte_spinlock_unlock(&hw->lock); ++ + return 0; + } + +diff --git a/drivers/net/hns3/hns3_tm.c b/drivers/net/hns3/hns3_tm.c +index e1089b6..67402a7 100644 +--- a/drivers/net/hns3/hns3_tm.c ++++ b/drivers/net/hns3/hns3_tm.c +@@ -1081,21 +1081,6 @@ hns3_tm_hierarchy_commit(struct rte_eth_dev *dev, + return -EINVAL; + } + +-static int +-hns3_tm_hierarchy_commit_wrap(struct rte_eth_dev *dev, +- int clear_on_fail, +- struct rte_tm_error *error) +-{ +- struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); +- int ret; +- +- rte_spinlock_lock(&hw->lock); +- ret = hns3_tm_hierarchy_commit(dev, clear_on_fail, error); +- rte_spinlock_unlock(&hw->lock); +- +- return ret; +-} +- + static int + hns3_tm_node_shaper_do_update(struct hns3_hw *hw, + uint32_t node_id, +@@ -1195,6 +1180,148 @@ hns3_tm_node_shaper_update(struct rte_eth_dev *dev, + return 0; + } + ++static int ++hns3_tm_capabilities_get_wrap(struct rte_eth_dev *dev, ++ struct rte_tm_capabilities *cap, ++ struct rte_tm_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ rte_spinlock_lock(&hw->lock); ++ ret = hns3_tm_capabilities_get(dev, cap, error); ++ rte_spinlock_unlock(&hw->lock); ++ ++ return ret; ++} ++ ++static int ++hns3_tm_shaper_profile_add_wrap(struct rte_eth_dev *dev, ++ uint32_t shaper_profile_id, ++ struct rte_tm_shaper_params *profile, ++ struct rte_tm_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ rte_spinlock_lock(&hw->lock); ++ ret = hns3_tm_shaper_profile_add(dev, shaper_profile_id, profile, error); ++ rte_spinlock_unlock(&hw->lock); ++ ++ return ret; ++} ++ ++static int ++hns3_tm_shaper_profile_del_wrap(struct rte_eth_dev *dev, ++ uint32_t shaper_profile_id, ++ struct rte_tm_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ rte_spinlock_lock(&hw->lock); ++ ret = hns3_tm_shaper_profile_del(dev, shaper_profile_id, error); ++ rte_spinlock_unlock(&hw->lock); ++ ++ return ret; ++} ++ ++static int ++hns3_tm_node_add_wrap(struct rte_eth_dev *dev, uint32_t node_id, ++ uint32_t parent_node_id, uint32_t priority, ++ uint32_t weight, uint32_t level_id, ++ struct rte_tm_node_params *params, ++ struct rte_tm_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ rte_spinlock_lock(&hw->lock); ++ ret = hns3_tm_node_add(dev, node_id, parent_node_id, priority, ++ weight, level_id, params, error); ++ rte_spinlock_unlock(&hw->lock); ++ ++ return ret; ++} ++ ++static int ++hns3_tm_node_delete_wrap(struct rte_eth_dev *dev, ++ uint32_t node_id, ++ struct rte_tm_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ rte_spinlock_lock(&hw->lock); ++ ret = hns3_tm_node_delete(dev, node_id, error); ++ rte_spinlock_unlock(&hw->lock); ++ ++ return ret; ++} ++ ++static int ++hns3_tm_node_type_get_wrap(struct rte_eth_dev *dev, ++ uint32_t node_id, ++ int *is_leaf, ++ struct rte_tm_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ rte_spinlock_lock(&hw->lock); ++ ret = hns3_tm_node_type_get(dev, node_id, is_leaf, error); ++ rte_spinlock_unlock(&hw->lock); ++ ++ return ret; ++} ++ ++static int ++hns3_tm_level_capabilities_get_wrap(struct rte_eth_dev *dev, ++ uint32_t level_id, ++ struct rte_tm_level_capabilities *cap, ++ struct rte_tm_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ rte_spinlock_lock(&hw->lock); ++ ret = hns3_tm_level_capabilities_get(dev, level_id, cap, error); ++ rte_spinlock_unlock(&hw->lock); ++ ++ return ret; ++} ++ ++static int ++hns3_tm_node_capabilities_get_wrap(struct rte_eth_dev *dev, ++ uint32_t node_id, ++ struct rte_tm_node_capabilities *cap, ++ struct rte_tm_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ rte_spinlock_lock(&hw->lock); ++ ret = hns3_tm_node_capabilities_get(dev, node_id, cap, error); ++ rte_spinlock_unlock(&hw->lock); ++ ++ return ret; ++} ++ ++static int ++hns3_tm_hierarchy_commit_wrap(struct rte_eth_dev *dev, ++ int clear_on_fail, ++ struct rte_tm_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ rte_spinlock_lock(&hw->lock); ++ ret = hns3_tm_hierarchy_commit(dev, clear_on_fail, error); ++ rte_spinlock_unlock(&hw->lock); ++ ++ return ret; ++} ++ + static int + hns3_tm_node_shaper_update_wrap(struct rte_eth_dev *dev, + uint32_t node_id, +@@ -1213,14 +1340,14 @@ hns3_tm_node_shaper_update_wrap(struct rte_eth_dev *dev, + } + + static const struct rte_tm_ops hns3_tm_ops = { +- .capabilities_get = hns3_tm_capabilities_get, +- .shaper_profile_add = hns3_tm_shaper_profile_add, +- .shaper_profile_delete = hns3_tm_shaper_profile_del, +- .node_add = hns3_tm_node_add, +- .node_delete = hns3_tm_node_delete, +- .node_type_get = hns3_tm_node_type_get, +- .level_capabilities_get = hns3_tm_level_capabilities_get, +- .node_capabilities_get = hns3_tm_node_capabilities_get, ++ .capabilities_get = hns3_tm_capabilities_get_wrap, ++ .shaper_profile_add = hns3_tm_shaper_profile_add_wrap, ++ .shaper_profile_delete = hns3_tm_shaper_profile_del_wrap, ++ .node_add = hns3_tm_node_add_wrap, ++ .node_delete = hns3_tm_node_delete_wrap, ++ .node_type_get = hns3_tm_node_type_get_wrap, ++ .level_capabilities_get = hns3_tm_level_capabilities_get_wrap, ++ .node_capabilities_get = hns3_tm_node_capabilities_get_wrap, + .hierarchy_commit = hns3_tm_hierarchy_commit_wrap, + .node_shaper_update = hns3_tm_node_shaper_update_wrap, + }; +-- +2.41.0.windows.2 + diff --git a/0358-net-hns3-fix-traffic-management-dump-text-alignment.patch b/0358-net-hns3-fix-traffic-management-dump-text-alignment.patch new file mode 100644 index 0000000..3129ad9 --- /dev/null +++ b/0358-net-hns3-fix-traffic-management-dump-text-alignment.patch @@ -0,0 +1,107 @@ +From c813bce4dfa2c99ec1ddc06cce3adff7b5f5fdef Mon Sep 17 00:00:00 2001 +From: Chengwen Feng +Date: Sat, 5 Aug 2023 16:36:27 +0800 +Subject: [PATCH 358/366] net/hns3: fix traffic management dump text alignment + +[ upstream commit a73065bfea87385aa86d8ec2e7b65f68494c4f06 ] + +Currently the dumped TM info is un-align, which are: + - TM config info: + -- nb_leaf_nodes_max=64 nb_nodes_max=73 + -- nb_shaper_profile=2 nb_tc_node=1 nb_queue_node=1 + -- committed=0 + shaper_profile: + id=800 reference_count=1 peak_rate=4000000Bps + id=801 reference_count=1 peak_rate=12000000Bps + port_node: + ... + +This patch fix it, the new formatting: + - TM config info: + -- nb_leaf_nodes_max=256 nb_nodes_max=265 + -- nb_shaper_profile=2 nb_tc_node=1 nb_queue_node=1 + -- committed=1 + -- shaper_profile: + id=800 reference_count=0 peak_rate=4000000Bps + id=801 reference_count=0 peak_rate=12000000Bps + -- port_node: + ... + +Fixes: e4cfe6bb9114 ("net/hns3: dump TM configuration info") +Cc: stable@dpdk.org + +Signed-off-by: Chengwen Feng +Signed-off-by: Dongdong Liu +--- + drivers/net/hns3/hns3_dump.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/hns3/hns3_dump.c b/drivers/net/hns3/hns3_dump.c +index 2dc44f2..b6e8b62 100644 +--- a/drivers/net/hns3/hns3_dump.c ++++ b/drivers/net/hns3/hns3_dump.c +@@ -664,10 +664,10 @@ hns3_get_tm_conf_shaper_info(FILE *file, struct hns3_tm_conf *conf) + if (conf->nb_shaper_profile == 0) + return; + +- fprintf(file, " shaper_profile:\n"); ++ fprintf(file, "\t -- shaper_profile:\n"); + TAILQ_FOREACH(shaper_profile, shaper_profile_list, node) { + fprintf(file, +- " id=%u reference_count=%u peak_rate=%" PRIu64 "Bps\n", ++ "\t id=%u reference_count=%u peak_rate=%" PRIu64 "Bps\n", + shaper_profile->shaper_profile_id, + shaper_profile->reference_count, + shaper_profile->profile.peak.rate); +@@ -681,8 +681,8 @@ hns3_get_tm_conf_port_node_info(FILE *file, struct hns3_tm_conf *conf) + return; + + fprintf(file, +- " port_node:\n" +- " node_id=%u reference_count=%u shaper_profile_id=%d\n", ++ "\t -- port_node:\n" ++ "\t node_id=%u reference_count=%u shaper_profile_id=%d\n", + conf->root->id, conf->root->reference_count, + conf->root->shaper_profile ? + (int)conf->root->shaper_profile->shaper_profile_id : -1); +@@ -699,7 +699,7 @@ hns3_get_tm_conf_tc_node_info(FILE *file, struct hns3_tm_conf *conf) + if (conf->nb_tc_node == 0) + return; + +- fprintf(file, " tc_node:\n"); ++ fprintf(file, "\t -- tc_node:\n"); + memset(tc_node, 0, sizeof(tc_node)); + TAILQ_FOREACH(tm_node, tc_list, node) { + tidx = hns3_tm_calc_node_tc_no(conf, tm_node->id); +@@ -712,7 +712,7 @@ hns3_get_tm_conf_tc_node_info(FILE *file, struct hns3_tm_conf *conf) + if (tm_node == NULL) + continue; + fprintf(file, +- " id=%u TC%u reference_count=%u parent_id=%d " ++ "\t id=%u TC%u reference_count=%u parent_id=%d " + "shaper_profile_id=%d\n", + tm_node->id, hns3_tm_calc_node_tc_no(conf, tm_node->id), + tm_node->reference_count, +@@ -738,7 +738,7 @@ hns3_get_tm_conf_queue_format_info(FILE *file, struct hns3_tm_node **queue_node, + end_queue_id = (i + 1) * HNS3_PERLINE_QUEUES - 1; + if (end_queue_id > nb_tx_queues - 1) + end_queue_id = nb_tx_queues - 1; +- fprintf(file, " %04u - %04u | ", start_queue_id, ++ fprintf(file, "\t %04u - %04u | ", start_queue_id, + end_queue_id); + for (j = start_queue_id; j < nb_tx_queues; j++) { + if (j >= end_queue_id + 1) +@@ -767,8 +767,8 @@ hns3_get_tm_conf_queue_node_info(FILE *file, struct hns3_tm_conf *conf, + return; + + fprintf(file, +- " queue_node:\n" +- " tx queue id | mapped tc (8 mean node not exist)\n"); ++ "\t -- queue_node:\n" ++ "\t tx queue id | mapped tc (8 mean node not exist)\n"); + + memset(queue_node, 0, sizeof(queue_node)); + memset(queue_node_tc, 0, sizeof(queue_node_tc)); +-- +2.41.0.windows.2 + diff --git a/0359-net-hns3-fix-order-in-NEON-Rx.patch b/0359-net-hns3-fix-order-in-NEON-Rx.patch new file mode 100644 index 0000000..859e48e --- /dev/null +++ b/0359-net-hns3-fix-order-in-NEON-Rx.patch @@ -0,0 +1,155 @@ +From 7739ae6472f1dc986ce72d24ff3fcdd1a1eccc3f Mon Sep 17 00:00:00 2001 +From: Huisong Li +Date: Tue, 11 Jul 2023 18:24:45 +0800 +Subject: [PATCH 359/366] net/hns3: fix order in NEON Rx + +[ upstream commit 7dd439ed998c36c8d0204c436cc656af08cfa5fc ] + +This patch reorders the order of the NEON Rx for better maintenance +and easier understanding. + +Fixes: a3d4f4d291d7 ("net/hns3: support NEON Rx") +Cc: stable@dpdk.org + +Signed-off-by: Huisong Li +Signed-off-by: Dongdong Liu +--- + drivers/net/hns3/hns3_rxtx_vec_neon.h | 78 +++++++++++---------------- + 1 file changed, 31 insertions(+), 47 deletions(-) + +diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h +index a20a6b6..1048b9d 100644 +--- a/drivers/net/hns3/hns3_rxtx_vec_neon.h ++++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h +@@ -180,19 +180,12 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq, + bd_vld = vset_lane_u16(rxdp[2].rx.bdtype_vld_udp0, bd_vld, 2); + bd_vld = vset_lane_u16(rxdp[3].rx.bdtype_vld_udp0, bd_vld, 3); + +- /* load 2 mbuf pointer */ +- mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]); +- + bd_vld = vshl_n_u16(bd_vld, + HNS3_UINT16_BIT - 1 - HNS3_RXD_VLD_B); + bd_vld = vreinterpret_u16_s16( + vshr_n_s16(vreinterpret_s16_u16(bd_vld), + HNS3_UINT16_BIT - 1)); + stat = ~vget_lane_u64(vreinterpret_u64_u16(bd_vld), 0); +- +- /* load 2 mbuf pointer again */ +- mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]); +- + if (likely(stat == 0)) + bd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP; + else +@@ -200,20 +193,20 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq, + if (bd_valid_num == 0) + break; + +- /* use offset to control below data load oper ordering */ +- offset = rxq->offset_table[bd_valid_num]; ++ /* load 4 mbuf pointer */ ++ mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]); ++ mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]); + +- /* store 2 mbuf pointer into rx_pkts */ ++ /* store 4 mbuf pointer into rx_pkts */ + vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1); ++ vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2); + +- /* read first two descs */ ++ /* use offset to control below data load oper ordering */ ++ offset = rxq->offset_table[bd_valid_num]; ++ ++ /* read 4 descs */ + descs[0] = vld2q_u64((uint64_t *)(rxdp + offset)); + descs[1] = vld2q_u64((uint64_t *)(rxdp + offset + 1)); +- +- /* store 2 mbuf pointer into rx_pkts again */ +- vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2); +- +- /* read remains two descs */ + descs[2] = vld2q_u64((uint64_t *)(rxdp + offset + 2)); + descs[3] = vld2q_u64((uint64_t *)(rxdp + offset + 3)); + +@@ -221,56 +214,47 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq, + pkt_mbuf1.val[1] = vreinterpretq_u8_u64(descs[0].val[1]); + pkt_mbuf2.val[0] = vreinterpretq_u8_u64(descs[1].val[0]); + pkt_mbuf2.val[1] = vreinterpretq_u8_u64(descs[1].val[1]); ++ pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]); ++ pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]); ++ pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]); ++ pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]); + +- /* pkt 1,2 convert format from desc to pktmbuf */ ++ /* 4 packets convert format from desc to pktmbuf */ + pkt_mb1 = vqtbl2q_u8(pkt_mbuf1, shuf_desc_fields_msk); + pkt_mb2 = vqtbl2q_u8(pkt_mbuf2, shuf_desc_fields_msk); ++ pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk); ++ pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk); + +- /* store the first 8 bytes of pkt 1,2 mbuf's rearm_data */ +- *(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data = +- rxq->mbuf_initializer; +- *(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data = +- rxq->mbuf_initializer; +- +- /* pkt 1,2 remove crc */ ++ /* 4 packets remove crc */ + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust); + pkt_mb1 = vreinterpretq_u8_u16(tmp); + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust); + pkt_mb2 = vreinterpretq_u8_u16(tmp); ++ tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust); ++ pkt_mb3 = vreinterpretq_u8_u16(tmp); ++ tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); ++ pkt_mb4 = vreinterpretq_u8_u16(tmp); + +- pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]); +- pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]); +- pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]); +- pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]); +- +- /* pkt 3,4 convert format from desc to pktmbuf */ +- pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk); +- pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk); +- +- /* pkt 1,2 save to rx_pkts mbuf */ ++ /* save packet info to rx_pkts mbuf */ + vst1q_u8((void *)&sw_ring[pos + 0].mbuf->rx_descriptor_fields1, + pkt_mb1); + vst1q_u8((void *)&sw_ring[pos + 1].mbuf->rx_descriptor_fields1, + pkt_mb2); ++ vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1, ++ pkt_mb3); ++ vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1, ++ pkt_mb4); + +- /* pkt 3,4 remove crc */ +- tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust); +- pkt_mb3 = vreinterpretq_u8_u16(tmp); +- tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); +- pkt_mb4 = vreinterpretq_u8_u16(tmp); +- +- /* store the first 8 bytes of pkt 3,4 mbuf's rearm_data */ ++ /* store the first 8 bytes of packets mbuf's rearm_data */ ++ *(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data = ++ rxq->mbuf_initializer; ++ *(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data = ++ rxq->mbuf_initializer; + *(uint64_t *)&sw_ring[pos + 2].mbuf->rearm_data = + rxq->mbuf_initializer; + *(uint64_t *)&sw_ring[pos + 3].mbuf->rearm_data = + rxq->mbuf_initializer; + +- /* pkt 3,4 save to rx_pkts mbuf */ +- vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1, +- pkt_mb3); +- vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1, +- pkt_mb4); +- + rte_prefetch_non_temporal(rxdp + HNS3_DEFAULT_DESCS_PER_LOOP); + + parse_retcode = hns3_desc_parse_field(rxq, &sw_ring[pos], +-- +2.41.0.windows.2 + diff --git a/0360-net-hns3-optimize-free-mbuf-for-SVE-Tx.patch b/0360-net-hns3-optimize-free-mbuf-for-SVE-Tx.patch new file mode 100644 index 0000000..05aefb1 --- /dev/null +++ b/0360-net-hns3-optimize-free-mbuf-for-SVE-Tx.patch @@ -0,0 +1,89 @@ +From d967db92088afcb06e7b245109ff35288c8cd3fe Mon Sep 17 00:00:00 2001 +From: Huisong Li +Date: Tue, 11 Jul 2023 18:24:46 +0800 +Subject: [PATCH 360/366] net/hns3: optimize free mbuf for SVE Tx + +[ upstream commit 01a295b741603b9366366a665402a2667a29fcc3 ] + +Currently, hns3 SVE Tx checks the valid bits of all descriptors +in a batch and then determines whether to release the corresponding +mbufs. Actually, once the valid bit of any descriptor in a batch +isn't cleared, driver does not need to scan the rest of descriptors. + +If we optimize SVE codes algorithm about this function, the performance +of a single queue for 64B packet is improved by ~2% on txonly forwarding +mode. And if use C code to scan all descriptors, the performance is +improved by ~8%. + +So this patch selects C code to optimize this code to improve the SVE +Tx performance. + +Signed-off-by: Huisong Li +Signed-off-by: Dongdong Liu +--- + drivers/net/hns3/hns3_rxtx_vec_sve.c | 42 +--------------------------- + 1 file changed, 1 insertion(+), 41 deletions(-) + +diff --git a/drivers/net/hns3/hns3_rxtx_vec_sve.c b/drivers/net/hns3/hns3_rxtx_vec_sve.c +index 6f23ba6..51d4bf3 100644 +--- a/drivers/net/hns3/hns3_rxtx_vec_sve.c ++++ b/drivers/net/hns3/hns3_rxtx_vec_sve.c +@@ -337,46 +337,6 @@ hns3_recv_pkts_vec_sve(void *__restrict rx_queue, + return nb_rx; + } + +-static inline void +-hns3_tx_free_buffers_sve(struct hns3_tx_queue *txq) +-{ +-#define HNS3_SVE_CHECK_DESCS_PER_LOOP 8 +-#define TX_VLD_U8_ZIP_INDEX svindex_u8(0, 4) +- svbool_t pg32 = svwhilelt_b32(0, HNS3_SVE_CHECK_DESCS_PER_LOOP); +- svuint32_t vld, vld2; +- svuint8_t vld_u8; +- uint64_t vld_all; +- struct hns3_desc *tx_desc; +- int i; +- +- /* +- * All mbufs can be released only when the VLD bits of all +- * descriptors in a batch are cleared. +- */ +- /* do logical OR operation for all desc's valid field */ +- vld = svdup_n_u32(0); +- tx_desc = &txq->tx_ring[txq->next_to_clean]; +- for (i = 0; i < txq->tx_rs_thresh; i += HNS3_SVE_CHECK_DESCS_PER_LOOP, +- tx_desc += HNS3_SVE_CHECK_DESCS_PER_LOOP) { +- vld2 = svld1_gather_u32offset_u32(pg32, (uint32_t *)tx_desc, +- svindex_u32(BD_FIELD_VALID_OFFSET, BD_SIZE)); +- vld = svorr_u32_z(pg32, vld, vld2); +- } +- /* shift left and then right to get all valid bit */ +- vld = svlsl_n_u32_z(pg32, vld, +- HNS3_UINT32_BIT - 1 - HNS3_TXD_VLD_B); +- vld = svreinterpret_u32_s32(svasr_n_s32_z(pg32, +- svreinterpret_s32_u32(vld), HNS3_UINT32_BIT - 1)); +- /* use tbl to compress 32bit-lane to 8bit-lane */ +- vld_u8 = svtbl_u8(svreinterpret_u8_u32(vld), TX_VLD_U8_ZIP_INDEX); +- /* dump compressed 64bit to variable */ +- svst1_u64(PG64_64BIT, &vld_all, svreinterpret_u64_u8(vld_u8)); +- if (vld_all > 0) +- return; +- +- hns3_tx_bulk_free_buffers(txq); +-} +- + static inline void + hns3_tx_fill_hw_ring_sve(struct hns3_tx_queue *txq, + struct rte_mbuf **pkts, +@@ -457,7 +417,7 @@ hns3_xmit_fixed_burst_vec_sve(void *__restrict tx_queue, + uint16_t nb_tx = 0; + + if (txq->tx_bd_ready < txq->tx_free_thresh) +- hns3_tx_free_buffers_sve(txq); ++ hns3_tx_free_buffers(txq); + + nb_pkts = RTE_MIN(txq->tx_bd_ready, nb_pkts); + if (unlikely(nb_pkts == 0)) { +-- +2.41.0.windows.2 + diff --git a/0361-net-hns3-optimize-rearm-mbuf-for-SVE-Rx.patch b/0361-net-hns3-optimize-rearm-mbuf-for-SVE-Rx.patch new file mode 100644 index 0000000..34faf2e --- /dev/null +++ b/0361-net-hns3-optimize-rearm-mbuf-for-SVE-Rx.patch @@ -0,0 +1,223 @@ +From 133dbfed220120724a60a2b7deae5ec7d4c38301 Mon Sep 17 00:00:00 2001 +From: Huisong Li +Date: Tue, 11 Jul 2023 18:24:47 +0800 +Subject: [PATCH 361/366] net/hns3: optimize rearm mbuf for SVE Rx + +[ upstream commit d49b64477f246e53210488825fdd92ccf53fa184 ] + +Use hns3_rxq_rearm_mbuf() to replace the hns3_rxq_rearm_mbuf_sve() +to optimize the performance of SVE Rx. + +On the rxonly forwarding mode, the performance of a single queue +for 64B packet is improved by ~15%. + +Signed-off-by: Huisong Li +Signed-off-by: Dongdong Liu +--- + drivers/net/hns3/hns3_rxtx_vec.c | 51 --------------------------- + drivers/net/hns3/hns3_rxtx_vec.h | 51 +++++++++++++++++++++++++++ + drivers/net/hns3/hns3_rxtx_vec_sve.c | 52 ++-------------------------- + 3 files changed, 53 insertions(+), 101 deletions(-) + +diff --git a/drivers/net/hns3/hns3_rxtx_vec.c b/drivers/net/hns3/hns3_rxtx_vec.c +index 153866c..5cdfa60 100644 +--- a/drivers/net/hns3/hns3_rxtx_vec.c ++++ b/drivers/net/hns3/hns3_rxtx_vec.c +@@ -55,57 +55,6 @@ hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + return nb_tx; + } + +-static inline void +-hns3_rxq_rearm_mbuf(struct hns3_rx_queue *rxq) +-{ +-#define REARM_LOOP_STEP_NUM 4 +- struct hns3_entry *rxep = &rxq->sw_ring[rxq->rx_rearm_start]; +- struct hns3_desc *rxdp = rxq->rx_ring + rxq->rx_rearm_start; +- uint64_t dma_addr; +- int i; +- +- if (unlikely(rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep, +- HNS3_DEFAULT_RXQ_REARM_THRESH) < 0)) { +- rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++; +- return; +- } +- +- for (i = 0; i < HNS3_DEFAULT_RXQ_REARM_THRESH; i += REARM_LOOP_STEP_NUM, +- rxep += REARM_LOOP_STEP_NUM, rxdp += REARM_LOOP_STEP_NUM) { +- if (likely(i < +- HNS3_DEFAULT_RXQ_REARM_THRESH - REARM_LOOP_STEP_NUM)) { +- rte_prefetch_non_temporal(rxep[4].mbuf); +- rte_prefetch_non_temporal(rxep[5].mbuf); +- rte_prefetch_non_temporal(rxep[6].mbuf); +- rte_prefetch_non_temporal(rxep[7].mbuf); +- } +- +- dma_addr = rte_mbuf_data_iova_default(rxep[0].mbuf); +- rxdp[0].addr = rte_cpu_to_le_64(dma_addr); +- rxdp[0].rx.bd_base_info = 0; +- +- dma_addr = rte_mbuf_data_iova_default(rxep[1].mbuf); +- rxdp[1].addr = rte_cpu_to_le_64(dma_addr); +- rxdp[1].rx.bd_base_info = 0; +- +- dma_addr = rte_mbuf_data_iova_default(rxep[2].mbuf); +- rxdp[2].addr = rte_cpu_to_le_64(dma_addr); +- rxdp[2].rx.bd_base_info = 0; +- +- dma_addr = rte_mbuf_data_iova_default(rxep[3].mbuf); +- rxdp[3].addr = rte_cpu_to_le_64(dma_addr); +- rxdp[3].rx.bd_base_info = 0; +- } +- +- rxq->rx_rearm_start += HNS3_DEFAULT_RXQ_REARM_THRESH; +- if (rxq->rx_rearm_start >= rxq->nb_rx_desc) +- rxq->rx_rearm_start = 0; +- +- rxq->rx_rearm_nb -= HNS3_DEFAULT_RXQ_REARM_THRESH; +- +- hns3_write_reg_opt(rxq->io_head_reg, HNS3_DEFAULT_RXQ_REARM_THRESH); +-} +- + uint16_t + hns3_recv_pkts_vec(void *__restrict rx_queue, + struct rte_mbuf **__restrict rx_pkts, +diff --git a/drivers/net/hns3/hns3_rxtx_vec.h b/drivers/net/hns3/hns3_rxtx_vec.h +index 2c8a919..a9a6774 100644 +--- a/drivers/net/hns3/hns3_rxtx_vec.h ++++ b/drivers/net/hns3/hns3_rxtx_vec.h +@@ -94,4 +94,55 @@ hns3_rx_reassemble_pkts(struct rte_mbuf **rx_pkts, + + return count; + } ++ ++static inline void ++hns3_rxq_rearm_mbuf(struct hns3_rx_queue *rxq) ++{ ++#define REARM_LOOP_STEP_NUM 4 ++ struct hns3_entry *rxep = &rxq->sw_ring[rxq->rx_rearm_start]; ++ struct hns3_desc *rxdp = rxq->rx_ring + rxq->rx_rearm_start; ++ uint64_t dma_addr; ++ int i; ++ ++ if (unlikely(rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep, ++ HNS3_DEFAULT_RXQ_REARM_THRESH) < 0)) { ++ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++; ++ return; ++ } ++ ++ for (i = 0; i < HNS3_DEFAULT_RXQ_REARM_THRESH; i += REARM_LOOP_STEP_NUM, ++ rxep += REARM_LOOP_STEP_NUM, rxdp += REARM_LOOP_STEP_NUM) { ++ if (likely(i < ++ HNS3_DEFAULT_RXQ_REARM_THRESH - REARM_LOOP_STEP_NUM)) { ++ rte_prefetch_non_temporal(rxep[4].mbuf); ++ rte_prefetch_non_temporal(rxep[5].mbuf); ++ rte_prefetch_non_temporal(rxep[6].mbuf); ++ rte_prefetch_non_temporal(rxep[7].mbuf); ++ } ++ ++ dma_addr = rte_mbuf_data_iova_default(rxep[0].mbuf); ++ rxdp[0].addr = rte_cpu_to_le_64(dma_addr); ++ rxdp[0].rx.bd_base_info = 0; ++ ++ dma_addr = rte_mbuf_data_iova_default(rxep[1].mbuf); ++ rxdp[1].addr = rte_cpu_to_le_64(dma_addr); ++ rxdp[1].rx.bd_base_info = 0; ++ ++ dma_addr = rte_mbuf_data_iova_default(rxep[2].mbuf); ++ rxdp[2].addr = rte_cpu_to_le_64(dma_addr); ++ rxdp[2].rx.bd_base_info = 0; ++ ++ dma_addr = rte_mbuf_data_iova_default(rxep[3].mbuf); ++ rxdp[3].addr = rte_cpu_to_le_64(dma_addr); ++ rxdp[3].rx.bd_base_info = 0; ++ } ++ ++ rxq->rx_rearm_start += HNS3_DEFAULT_RXQ_REARM_THRESH; ++ if (rxq->rx_rearm_start >= rxq->nb_rx_desc) ++ rxq->rx_rearm_start = 0; ++ ++ rxq->rx_rearm_nb -= HNS3_DEFAULT_RXQ_REARM_THRESH; ++ ++ hns3_write_reg_opt(rxq->io_head_reg, HNS3_DEFAULT_RXQ_REARM_THRESH); ++} + #endif /* HNS3_RXTX_VEC_H */ +diff --git a/drivers/net/hns3/hns3_rxtx_vec_sve.c b/drivers/net/hns3/hns3_rxtx_vec_sve.c +index 51d4bf3..1251939 100644 +--- a/drivers/net/hns3/hns3_rxtx_vec_sve.c ++++ b/drivers/net/hns3/hns3_rxtx_vec_sve.c +@@ -237,54 +237,6 @@ hns3_recv_burst_vec_sve(struct hns3_rx_queue *__restrict rxq, + return nb_rx; + } + +-static inline void +-hns3_rxq_rearm_mbuf_sve(struct hns3_rx_queue *rxq) +-{ +-#define REARM_LOOP_STEP_NUM 4 +- struct hns3_entry *rxep = &rxq->sw_ring[rxq->rx_rearm_start]; +- struct hns3_desc *rxdp = rxq->rx_ring + rxq->rx_rearm_start; +- struct hns3_entry *rxep_tmp = rxep; +- int i; +- +- if (unlikely(rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep, +- HNS3_DEFAULT_RXQ_REARM_THRESH) < 0)) { +- rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++; +- return; +- } +- +- for (i = 0; i < HNS3_DEFAULT_RXQ_REARM_THRESH; i += REARM_LOOP_STEP_NUM, +- rxep_tmp += REARM_LOOP_STEP_NUM) { +- svuint64_t prf = svld1_u64(PG64_256BIT, (uint64_t *)rxep_tmp); +- svprfd_gather_u64base(PG64_256BIT, prf, SV_PLDL1STRM); +- } +- +- for (i = 0; i < HNS3_DEFAULT_RXQ_REARM_THRESH; i += REARM_LOOP_STEP_NUM, +- rxep += REARM_LOOP_STEP_NUM, rxdp += REARM_LOOP_STEP_NUM) { +- uint64_t iova[REARM_LOOP_STEP_NUM]; +- iova[0] = rxep[0].mbuf->buf_iova; +- iova[1] = rxep[1].mbuf->buf_iova; +- iova[2] = rxep[2].mbuf->buf_iova; +- iova[3] = rxep[3].mbuf->buf_iova; +- svuint64_t siova = svld1_u64(PG64_256BIT, iova); +- siova = svadd_n_u64_z(PG64_256BIT, siova, RTE_PKTMBUF_HEADROOM); +- svuint64_t ol_base = svdup_n_u64(0); +- svst1_scatter_u64offset_u64(PG64_256BIT, +- (uint64_t *)&rxdp[0].addr, +- svindex_u64(BD_FIELD_ADDR_OFFSET, BD_SIZE), siova); +- svst1_scatter_u64offset_u64(PG64_256BIT, +- (uint64_t *)&rxdp[0].addr, +- svindex_u64(BD_FIELD_OL_OFFSET, BD_SIZE), ol_base); +- } +- +- rxq->rx_rearm_start += HNS3_DEFAULT_RXQ_REARM_THRESH; +- if (rxq->rx_rearm_start >= rxq->nb_rx_desc) +- rxq->rx_rearm_start = 0; +- +- rxq->rx_rearm_nb -= HNS3_DEFAULT_RXQ_REARM_THRESH; +- +- hns3_write_reg_opt(rxq->io_head_reg, HNS3_DEFAULT_RXQ_REARM_THRESH); +-} +- + uint16_t + hns3_recv_pkts_vec_sve(void *__restrict rx_queue, + struct rte_mbuf **__restrict rx_pkts, +@@ -300,7 +252,7 @@ hns3_recv_pkts_vec_sve(void *__restrict rx_queue, + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_SVE_DEFAULT_DESCS_PER_LOOP); + + if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH) +- hns3_rxq_rearm_mbuf_sve(rxq); ++ hns3_rxq_rearm_mbuf(rxq); + + if (unlikely(!(rxdp->rx.bd_base_info & + rte_cpu_to_le_32(1u << HNS3_RXD_VLD_B)))) +@@ -331,7 +283,7 @@ hns3_recv_pkts_vec_sve(void *__restrict rx_queue, + break; + + if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH) +- hns3_rxq_rearm_mbuf_sve(rxq); ++ hns3_rxq_rearm_mbuf(rxq); + } + + return nb_rx; +-- +2.41.0.windows.2 + diff --git a/0362-net-hns3-optimize-SVE-Rx-performance.patch b/0362-net-hns3-optimize-SVE-Rx-performance.patch new file mode 100644 index 0000000..c786317 --- /dev/null +++ b/0362-net-hns3-optimize-SVE-Rx-performance.patch @@ -0,0 +1,242 @@ +From 5e6c0f58eff79c06edf3638108c096e792b81a3b Mon Sep 17 00:00:00 2001 +From: Huisong Li +Date: Tue, 11 Jul 2023 18:24:48 +0800 +Subject: [PATCH 362/366] net/hns3: optimize SVE Rx performance + +[ upstream commit f1ad6decfbd44c3dc2d73dcda3fa8fb37b140186 ] + +This patch optimizes SVE Rx performance by the following ways: +1> optimize the calculation of valid BD number. +2> remove a temporary variable (key_fields) +3> use C language to parse some descriptor fields, instead of + SVE instruction. +4> small step prefetch descriptor. + +On the rxonly forwarding mode, the performance of a single queue +or 64B packet is improved by ~40%. + +Signed-off-by: Huisong Li +Signed-off-by: Dongdong Liu +--- + drivers/net/hns3/hns3_rxtx_vec_sve.c | 137 ++++++--------------------- + 1 file changed, 27 insertions(+), 110 deletions(-) + +diff --git a/drivers/net/hns3/hns3_rxtx_vec_sve.c b/drivers/net/hns3/hns3_rxtx_vec_sve.c +index 1251939..88b484d 100644 +--- a/drivers/net/hns3/hns3_rxtx_vec_sve.c ++++ b/drivers/net/hns3/hns3_rxtx_vec_sve.c +@@ -20,40 +20,36 @@ + + #define BD_SIZE 32 + #define BD_FIELD_ADDR_OFFSET 0 +-#define BD_FIELD_L234_OFFSET 8 +-#define BD_FIELD_XLEN_OFFSET 12 +-#define BD_FIELD_RSS_OFFSET 16 +-#define BD_FIELD_OL_OFFSET 24 + #define BD_FIELD_VALID_OFFSET 28 + +-typedef struct { +- uint32_t l234_info[HNS3_SVE_DEFAULT_DESCS_PER_LOOP]; +- uint32_t ol_info[HNS3_SVE_DEFAULT_DESCS_PER_LOOP]; +- uint32_t bd_base_info[HNS3_SVE_DEFAULT_DESCS_PER_LOOP]; +-} HNS3_SVE_KEY_FIELD_S; +- + static inline uint32_t + hns3_desc_parse_field_sve(struct hns3_rx_queue *rxq, + struct rte_mbuf **rx_pkts, +- HNS3_SVE_KEY_FIELD_S *key, ++ struct hns3_desc *rxdp, + uint32_t bd_vld_num) + { ++ uint32_t l234_info, ol_info, bd_base_info; + uint32_t retcode = 0; + int ret, i; + + for (i = 0; i < (int)bd_vld_num; i++) { + /* init rte_mbuf.rearm_data last 64-bit */ + rx_pkts[i]->ol_flags = RTE_MBUF_F_RX_RSS_HASH; +- +- ret = hns3_handle_bdinfo(rxq, rx_pkts[i], key->bd_base_info[i], +- key->l234_info[i]); ++ rx_pkts[i]->hash.rss = rxdp[i].rx.rss_hash; ++ rx_pkts[i]->pkt_len = rte_le_to_cpu_16(rxdp[i].rx.pkt_len) - ++ rxq->crc_len; ++ rx_pkts[i]->data_len = rx_pkts[i]->pkt_len; ++ ++ l234_info = rxdp[i].rx.l234_info; ++ ol_info = rxdp[i].rx.ol_info; ++ bd_base_info = rxdp[i].rx.bd_base_info; ++ ret = hns3_handle_bdinfo(rxq, rx_pkts[i], bd_base_info, l234_info); + if (unlikely(ret)) { + retcode |= 1u << i; + continue; + } + +- rx_pkts[i]->packet_type = hns3_rx_calc_ptype(rxq, +- key->l234_info[i], key->ol_info[i]); ++ rx_pkts[i]->packet_type = hns3_rx_calc_ptype(rxq, l234_info, ol_info); + + /* Increment bytes counter */ + rxq->basic_stats.bytes += rx_pkts[i]->pkt_len; +@@ -77,46 +73,16 @@ hns3_recv_burst_vec_sve(struct hns3_rx_queue *__restrict rxq, + uint16_t nb_pkts, + uint64_t *bd_err_mask) + { +-#define XLEN_ADJUST_LEN 32 +-#define RSS_ADJUST_LEN 16 +-#define GEN_VLD_U8_ZIP_INDEX svindex_s8(28, -4) + uint16_t rx_id = rxq->next_to_use; + struct hns3_entry *sw_ring = &rxq->sw_ring[rx_id]; + struct hns3_desc *rxdp = &rxq->rx_ring[rx_id]; +- struct hns3_desc *rxdp2; +- HNS3_SVE_KEY_FIELD_S key_field; ++ struct hns3_desc *rxdp2, *next_rxdp; + uint64_t bd_valid_num; + uint32_t parse_retcode; + uint16_t nb_rx = 0; + int pos, offset; + +- uint16_t xlen_adjust[XLEN_ADJUST_LEN] = { +- 0, 0xffff, 1, 0xffff, /* 1st mbuf: pkt_len and dat_len */ +- 2, 0xffff, 3, 0xffff, /* 2st mbuf: pkt_len and dat_len */ +- 4, 0xffff, 5, 0xffff, /* 3st mbuf: pkt_len and dat_len */ +- 6, 0xffff, 7, 0xffff, /* 4st mbuf: pkt_len and dat_len */ +- 8, 0xffff, 9, 0xffff, /* 5st mbuf: pkt_len and dat_len */ +- 10, 0xffff, 11, 0xffff, /* 6st mbuf: pkt_len and dat_len */ +- 12, 0xffff, 13, 0xffff, /* 7st mbuf: pkt_len and dat_len */ +- 14, 0xffff, 15, 0xffff, /* 8st mbuf: pkt_len and dat_len */ +- }; +- +- uint32_t rss_adjust[RSS_ADJUST_LEN] = { +- 0, 0xffff, /* 1st mbuf: rss */ +- 1, 0xffff, /* 2st mbuf: rss */ +- 2, 0xffff, /* 3st mbuf: rss */ +- 3, 0xffff, /* 4st mbuf: rss */ +- 4, 0xffff, /* 5st mbuf: rss */ +- 5, 0xffff, /* 6st mbuf: rss */ +- 6, 0xffff, /* 7st mbuf: rss */ +- 7, 0xffff, /* 8st mbuf: rss */ +- }; +- + svbool_t pg32 = svwhilelt_b32(0, HNS3_SVE_DEFAULT_DESCS_PER_LOOP); +- svuint16_t xlen_tbl1 = svld1_u16(PG16_256BIT, xlen_adjust); +- svuint16_t xlen_tbl2 = svld1_u16(PG16_256BIT, &xlen_adjust[16]); +- svuint32_t rss_tbl1 = svld1_u32(PG32_256BIT, rss_adjust); +- svuint32_t rss_tbl2 = svld1_u32(PG32_256BIT, &rss_adjust[8]); + + /* compile-time verifies the xlen_adjust mask */ + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != +@@ -126,30 +92,21 @@ hns3_recv_burst_vec_sve(struct hns3_rx_queue *__restrict rxq, + + for (pos = 0; pos < nb_pkts; pos += HNS3_SVE_DEFAULT_DESCS_PER_LOOP, + rxdp += HNS3_SVE_DEFAULT_DESCS_PER_LOOP) { +- svuint64_t vld_clz, mbp1st, mbp2st, mbuf_init; +- svuint64_t xlen1st, xlen2st, rss1st, rss2st; +- svuint32_t l234, ol, vld, vld2, xlen, rss; +- svuint8_t vld_u8; ++ svuint64_t mbp1st, mbp2st, mbuf_init; ++ svuint32_t vld; ++ svbool_t vld_op; + + /* calc how many bd valid: part 1 */ + vld = svld1_gather_u32offset_u32(pg32, (uint32_t *)rxdp, + svindex_u32(BD_FIELD_VALID_OFFSET, BD_SIZE)); +- vld2 = svlsl_n_u32_z(pg32, vld, +- HNS3_UINT32_BIT - 1 - HNS3_RXD_VLD_B); +- vld2 = svreinterpret_u32_s32(svasr_n_s32_z(pg32, +- svreinterpret_s32_u32(vld2), HNS3_UINT32_BIT - 1)); ++ vld = svand_n_u32_z(pg32, vld, BIT(HNS3_RXD_VLD_B)); ++ vld_op = svcmpne_n_u32(pg32, vld, BIT(HNS3_RXD_VLD_B)); ++ bd_valid_num = svcntp_b32(pg32, svbrkb_b_z(pg32, vld_op)); ++ if (bd_valid_num == 0) ++ break; + + /* load 4 mbuf pointer */ + mbp1st = svld1_u64(PG64_256BIT, (uint64_t *)&sw_ring[pos]); +- +- /* calc how many bd valid: part 2 */ +- vld_u8 = svtbl_u8(svreinterpret_u8_u32(vld2), +- svreinterpret_u8_s8(GEN_VLD_U8_ZIP_INDEX)); +- vld_clz = svnot_u64_z(PG64_64BIT, svreinterpret_u64_u8(vld_u8)); +- vld_clz = svclz_u64_z(PG64_64BIT, vld_clz); +- svst1_u64(PG64_64BIT, &bd_valid_num, vld_clz); +- bd_valid_num /= HNS3_UINT8_BIT; +- + /* load 4 more mbuf pointer */ + mbp2st = svld1_u64(PG64_256BIT, (uint64_t *)&sw_ring[pos + 4]); + +@@ -159,65 +116,25 @@ hns3_recv_burst_vec_sve(struct hns3_rx_queue *__restrict rxq, + + /* store 4 mbuf pointer into rx_pkts */ + svst1_u64(PG64_256BIT, (uint64_t *)&rx_pkts[pos], mbp1st); +- +- /* load key field to vector reg */ +- l234 = svld1_gather_u32offset_u32(pg32, (uint32_t *)rxdp2, +- svindex_u32(BD_FIELD_L234_OFFSET, BD_SIZE)); +- ol = svld1_gather_u32offset_u32(pg32, (uint32_t *)rxdp2, +- svindex_u32(BD_FIELD_OL_OFFSET, BD_SIZE)); +- + /* store 4 mbuf pointer into rx_pkts again */ + svst1_u64(PG64_256BIT, (uint64_t *)&rx_pkts[pos + 4], mbp2st); + +- /* load datalen, pktlen and rss_hash */ +- xlen = svld1_gather_u32offset_u32(pg32, (uint32_t *)rxdp2, +- svindex_u32(BD_FIELD_XLEN_OFFSET, BD_SIZE)); +- rss = svld1_gather_u32offset_u32(pg32, (uint32_t *)rxdp2, +- svindex_u32(BD_FIELD_RSS_OFFSET, BD_SIZE)); +- +- /* store key field to stash buffer */ +- svst1_u32(pg32, (uint32_t *)key_field.l234_info, l234); +- svst1_u32(pg32, (uint32_t *)key_field.bd_base_info, vld); +- svst1_u32(pg32, (uint32_t *)key_field.ol_info, ol); +- +- /* sub crc_len for pkt_len and data_len */ +- xlen = svreinterpret_u32_u16(svsub_n_u16_z(PG16_256BIT, +- svreinterpret_u16_u32(xlen), rxq->crc_len)); +- + /* init mbuf_initializer */ + mbuf_init = svdup_n_u64(rxq->mbuf_initializer); +- +- /* extract datalen, pktlen and rss from xlen and rss */ +- xlen1st = svreinterpret_u64_u16( +- svtbl_u16(svreinterpret_u16_u32(xlen), xlen_tbl1)); +- xlen2st = svreinterpret_u64_u16( +- svtbl_u16(svreinterpret_u16_u32(xlen), xlen_tbl2)); +- rss1st = svreinterpret_u64_u32( +- svtbl_u32(svreinterpret_u32_u32(rss), rss_tbl1)); +- rss2st = svreinterpret_u64_u32( +- svtbl_u32(svreinterpret_u32_u32(rss), rss_tbl2)); +- + /* save mbuf_initializer */ + svst1_scatter_u64base_offset_u64(PG64_256BIT, mbp1st, + offsetof(struct rte_mbuf, rearm_data), mbuf_init); + svst1_scatter_u64base_offset_u64(PG64_256BIT, mbp2st, + offsetof(struct rte_mbuf, rearm_data), mbuf_init); + +- /* save datalen and pktlen and rss */ +- svst1_scatter_u64base_offset_u64(PG64_256BIT, mbp1st, +- offsetof(struct rte_mbuf, pkt_len), xlen1st); +- svst1_scatter_u64base_offset_u64(PG64_256BIT, mbp1st, +- offsetof(struct rte_mbuf, hash.rss), rss1st); +- svst1_scatter_u64base_offset_u64(PG64_256BIT, mbp2st, +- offsetof(struct rte_mbuf, pkt_len), xlen2st); +- svst1_scatter_u64base_offset_u64(PG64_256BIT, mbp2st, +- offsetof(struct rte_mbuf, hash.rss), rss2st); +- +- rte_prefetch_non_temporal(rxdp + +- HNS3_SVE_DEFAULT_DESCS_PER_LOOP); ++ next_rxdp = rxdp + HNS3_SVE_DEFAULT_DESCS_PER_LOOP; ++ rte_prefetch_non_temporal(next_rxdp); ++ rte_prefetch_non_temporal(next_rxdp + 2); ++ rte_prefetch_non_temporal(next_rxdp + 4); ++ rte_prefetch_non_temporal(next_rxdp + 6); + + parse_retcode = hns3_desc_parse_field_sve(rxq, &rx_pkts[pos], +- &key_field, bd_valid_num); ++ &rxdp2[offset], bd_valid_num); + if (unlikely(parse_retcode)) + (*bd_err_mask) |= ((uint64_t)parse_retcode) << pos; + +-- +2.41.0.windows.2 + diff --git a/0363-app-testpmd-fix-multicast-address-pool-leak.patch b/0363-app-testpmd-fix-multicast-address-pool-leak.patch new file mode 100644 index 0000000..73a9cf3 --- /dev/null +++ b/0363-app-testpmd-fix-multicast-address-pool-leak.patch @@ -0,0 +1,97 @@ +From 9b13302cec30ec70d2aedcd024bde4db57bc8eaa Mon Sep 17 00:00:00 2001 +From: Ke Zhang +Date: Fri, 25 Mar 2022 08:35:55 +0000 +Subject: [PATCH 363/366] app/testpmd: fix multicast address pool leak + +[ upstream commit 68629be3a622ee53cd5b40c8447ae9b083ff3f6c ] + +A multicast address pool is allocated for a port when +using mcast_addr testpmd commands. + +When closing a port or stopping testpmd, this pool was +not freed, resulting in a leak. +This issue has been caught using ASan. + +Free this pool when closing the port. + +Error info as following: +ERROR: LeakSanitizer: detected memory leaksDirect leak of + 192 byte(s) +0 0x7f6a2e0aeffe in __interceptor_realloc + (/lib/x86_64-linux-gnu/libasan.so.5+0x10dffe) +1 0x565361eb340f in mcast_addr_pool_extend + ../app/test-pmd/config.c:5162 +2 0x565361eb3556 in mcast_addr_pool_append + ../app/test-pmd/config.c:5180 +3 0x565361eb3aae in mcast_addr_add + ../app/test-pmd/config.c:5243 + +Fixes: 8fff667578a7 ("app/testpmd: new command to add/remove multicast MAC addresses") +Cc: stable@dpdk.org + +Signed-off-by: Ke Zhang +Acked-by: Yuying Zhang +Acked-by: Ferruh Yigit +--- + app/test-pmd/config.c | 19 +++++++++++++++++++ + app/test-pmd/testpmd.c | 1 + + app/test-pmd/testpmd.h | 1 + + 3 files changed, 21 insertions(+) + +diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c +index 22c63e2..61dc56f 100644 +--- a/app/test-pmd/config.c ++++ b/app/test-pmd/config.c +@@ -5364,6 +5364,25 @@ mcast_addr_pool_remove(struct rte_port *port, uint32_t addr_idx) + sizeof(struct rte_ether_addr) * (port->mc_addr_nb - addr_idx)); + } + ++int ++mcast_addr_pool_destroy(portid_t port_id) ++{ ++ struct rte_port *port; ++ ++ if (port_id_is_invalid(port_id, ENABLED_WARN) || ++ port_id == (portid_t)RTE_PORT_ALL) ++ return -EINVAL; ++ port = &ports[port_id]; ++ ++ if (port->mc_addr_nb != 0) { ++ /* free the pool of multicast addresses. */ ++ free(port->mc_addr_pool); ++ port->mc_addr_pool = NULL; ++ port->mc_addr_nb = 0; ++ } ++ return 0; ++} ++ + static int + eth_port_multicast_addr_list_set(portid_t port_id) + { +diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c +index 20134c5..6f59bd2 100644 +--- a/app/test-pmd/testpmd.c ++++ b/app/test-pmd/testpmd.c +@@ -3284,6 +3284,7 @@ close_port(portid_t pid) + } + + if (is_proc_primary()) { ++ mcast_addr_pool_destroy(pi); + port_flow_flush(pi); + port_flex_item_flush(pi); + port_action_handle_flush(pi); +diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h +index be7454a..54d3112 100644 +--- a/app/test-pmd/testpmd.h ++++ b/app/test-pmd/testpmd.h +@@ -906,6 +906,7 @@ int port_flow_create(portid_t port_id, + int port_action_handle_query(portid_t port_id, uint32_t id); + void update_age_action_context(const struct rte_flow_action *actions, + struct port_flow *pf); ++int mcast_addr_pool_destroy(portid_t port_id); + int port_flow_destroy(portid_t port_id, uint32_t n, const uint32_t *rule); + int port_flow_flush(portid_t port_id); + int port_flow_dump(portid_t port_id, bool dump_all, +-- +2.41.0.windows.2 + diff --git a/0364-app-testpmd-fix-help-string.patch b/0364-app-testpmd-fix-help-string.patch new file mode 100644 index 0000000..d27024e --- /dev/null +++ b/0364-app-testpmd-fix-help-string.patch @@ -0,0 +1,40 @@ +From 21f694a2c28879a863dc255e7800ee31aac5c068 Mon Sep 17 00:00:00 2001 +From: Dengdui Huang +Date: Sun, 8 Oct 2023 14:46:19 +0800 +Subject: [PATCH 364/366] app/testpmd: fix help string + +[ upstream commit 42661fb8f18e52684d0d9f0d376017082fca45e0 ] + +Command help string is missing 'mcast_addr add|remove'. +This patch add it. + +Fixes: 8fff667578a7 ("app/testpmd: new command to add/remove multicast MAC addresses") +Cc: stable@dpdk.org + +Signed-off-by: Dengdui Huang +Acked-by: Chengwen Feng +Acked-by: Ferruh Yigit +--- + app/test-pmd/cmdline.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c +index bc770f3..ec8f385 100644 +--- a/app/test-pmd/cmdline.c ++++ b/app/test-pmd/cmdline.c +@@ -504,6 +504,12 @@ static void cmd_help_long_parsed(void *parsed_result, + "mac_addr add port (port_id) vf (vf_id) (mac_address)\n" + " Add a MAC address for a VF on the port.\n\n" + ++ "mcast_addr add (port_id) (mcast_addr)\n" ++ " Add a multicast MAC addresses on port_id.\n\n" ++ ++ "mcast_addr remove (port_id) (mcast_addr)\n" ++ " Remove a multicast MAC address from port_id.\n\n" ++ + "set vf mac addr (port_id) (vf_id) (XX:XX:XX:XX:XX:XX)\n" + " Set the MAC address for a VF from the PF.\n\n" + +-- +2.41.0.windows.2 + diff --git a/0365-app-testpmd-add-command-to-flush-multicast-MAC-addre.patch b/0365-app-testpmd-add-command-to-flush-multicast-MAC-addre.patch new file mode 100644 index 0000000..56b2a4a --- /dev/null +++ b/0365-app-testpmd-add-command-to-flush-multicast-MAC-addre.patch @@ -0,0 +1,152 @@ +From c2f8baf727df5d43ba3e1366037d31bd6185b77d Mon Sep 17 00:00:00 2001 +From: Dengdui Huang +Date: Sun, 8 Oct 2023 14:46:20 +0800 +Subject: [PATCH 365/366] app/testpmd: add command to flush multicast MAC addresses + +[ upstream commit ef8bd7d0b25abdcc425d4a7e399c66957b15b935 ] + +Add command to flush all multicast MAC address +Usage: + mcast_addr flush : + flush all multicast MAC address on port_id + +Signed-off-by: Dengdui Huang +Acked-by: Chengwen Feng +Acked-by: Ferruh Yigit +--- + app/test-pmd/cmdline.c | 43 +++++++++++++++++++++ + app/test-pmd/config.c | 18 +++++++++ + app/test-pmd/testpmd.h | 1 + + doc/guides/testpmd_app_ug/testpmd_funcs.rst | 7 ++++ + 4 files changed, 69 insertions(+) + +diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c +index ec8f385..8facca3 100644 +--- a/app/test-pmd/cmdline.c ++++ b/app/test-pmd/cmdline.c +@@ -510,6 +510,9 @@ static void cmd_help_long_parsed(void *parsed_result, + "mcast_addr remove (port_id) (mcast_addr)\n" + " Remove a multicast MAC address from port_id.\n\n" + ++ "mcast_addr flush (port_id)\n" ++ " Flush all multicast MAC addresses on port_id.\n\n" ++ + "set vf mac addr (port_id) (vf_id) (XX:XX:XX:XX:XX:XX)\n" + " Set the MAC address for a VF from the PF.\n\n" + +@@ -11004,6 +11007,45 @@ cmdline_parse_inst_t cmd_mcast_addr = { + }, + }; + ++/* *** FLUSH MULTICAST MAC ADDRESS ON PORT *** */ ++struct cmd_mcast_addr_flush_result { ++ cmdline_fixed_string_t mcast_addr_cmd; ++ cmdline_fixed_string_t what; ++ uint16_t port_num; ++}; ++ ++static void cmd_mcast_addr_flush_parsed(void *parsed_result, ++ __rte_unused struct cmdline *cl, ++ __rte_unused void *data) ++{ ++ struct cmd_mcast_addr_flush_result *res = parsed_result; ++ ++ mcast_addr_flush(res->port_num); ++} ++ ++static cmdline_parse_token_string_t cmd_mcast_addr_flush_cmd = ++ TOKEN_STRING_INITIALIZER(struct cmd_mcast_addr_result, ++ mcast_addr_cmd, "mcast_addr"); ++static cmdline_parse_token_string_t cmd_mcast_addr_flush_what = ++ TOKEN_STRING_INITIALIZER(struct cmd_mcast_addr_result, what, ++ "flush"); ++static cmdline_parse_token_num_t cmd_mcast_addr_flush_portnum = ++ TOKEN_NUM_INITIALIZER(struct cmd_mcast_addr_result, port_num, ++ RTE_UINT16); ++ ++static cmdline_parse_inst_t cmd_mcast_addr_flush = { ++ .f = cmd_mcast_addr_flush_parsed, ++ .data = (void *)0, ++ .help_str = "mcast_addr flush : " ++ "flush all multicast MAC addresses on port_id", ++ .tokens = { ++ (void *)&cmd_mcast_addr_flush_cmd, ++ (void *)&cmd_mcast_addr_flush_what, ++ (void *)&cmd_mcast_addr_flush_portnum, ++ NULL, ++ }, ++}; ++ + /* vf vlan anti spoof configuration */ + + /* Common result structure for vf vlan anti spoof */ +@@ -17867,6 +17909,7 @@ cmdline_parse_ctx_t main_ctx[] = { + (cmdline_parse_inst_t *)&cmd_set_port_meter_stats_mask, + (cmdline_parse_inst_t *)&cmd_show_port_meter_stats, + (cmdline_parse_inst_t *)&cmd_mcast_addr, ++ (cmdline_parse_inst_t *)&cmd_mcast_addr_flush, + (cmdline_parse_inst_t *)&cmd_set_vf_vlan_anti_spoof, + (cmdline_parse_inst_t *)&cmd_set_vf_mac_anti_spoof, + (cmdline_parse_inst_t *)&cmd_set_vf_vlan_stripq, +diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c +index 61dc56f..af00078 100644 +--- a/app/test-pmd/config.c ++++ b/app/test-pmd/config.c +@@ -5459,6 +5459,24 @@ mcast_addr_remove(portid_t port_id, struct rte_ether_addr *mc_addr) + mcast_addr_pool_append(port, mc_addr); + } + ++void ++mcast_addr_flush(portid_t port_id) ++{ ++ int ret; ++ ++ if (port_id_is_invalid(port_id, ENABLED_WARN)) ++ return; ++ ++ ret = rte_eth_dev_set_mc_addr_list(port_id, NULL, 0); ++ if (ret != 0) { ++ fprintf(stderr, ++ "Failed to flush all multicast MAC addresses on port_id %u\n", ++ port_id); ++ return; ++ } ++ mcast_addr_pool_destroy(port_id); ++} ++ + void + port_dcb_info_display(portid_t port_id) + { +diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h +index 54d3112..30c7177 100644 +--- a/app/test-pmd/testpmd.h ++++ b/app/test-pmd/testpmd.h +@@ -1051,6 +1051,7 @@ void show_mcast_macs(portid_t port_id); + /* Functions to manage the set of filtered Multicast MAC addresses */ + void mcast_addr_add(portid_t port_id, struct rte_ether_addr *mc_addr); + void mcast_addr_remove(portid_t port_id, struct rte_ether_addr *mc_addr); ++void mcast_addr_flush(portid_t port_id); + void port_dcb_info_display(portid_t port_id); + + uint8_t *open_file(const char *file_path, uint32_t *size); +diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/doc/guides/testpmd_app_ug/testpmd_funcs.rst +index ecf89aa..c33c845 100644 +--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst ++++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst +@@ -1406,6 +1406,13 @@ filtered by port:: + + testpmd> mcast_addr remove (port_id) (mcast_addr) + ++mcast_addr flush ++~~~~~~~~~~~~~~~~ ++ ++Flush all multicast MAC addresses on port_id:: ++ ++ testpmd> mcast_addr flush (port_id) ++ + mac_addr add (for VF) + ~~~~~~~~~~~~~~~~~~~~~ + +-- +2.41.0.windows.2 + diff --git a/0366-maintainers-update-for-hns3-driver.patch b/0366-maintainers-update-for-hns3-driver.patch new file mode 100644 index 0000000..12fc38e --- /dev/null +++ b/0366-maintainers-update-for-hns3-driver.patch @@ -0,0 +1,33 @@ +From d743e25356ecfda7dcfc029c4e6a5d46fd80bce1 Mon Sep 17 00:00:00 2001 +From: Jie Hai +Date: Tue, 26 Sep 2023 18:04:05 +0800 +Subject: [PATCH] maintainers: update for hns3 driver + +[ upstream commit 5e4b7cad5119956df1ca9f0d22d1429399a5c818 ] + +Dongdong Liu currently do not work for the hns3 PMD. +I will do the work, so update the hns3 maintainers. + +Signed-off-by: Jie Hai +--- + MAINTAINERS | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/MAINTAINERS b/MAINTAINERS +index 7a28fec..7db6d4e 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -695,9 +695,8 @@ F: doc/guides/nics/enic.rst + F: doc/guides/nics/features/enic.ini + + Hisilicon hns3 +-M: Min Hu (Connor) ++M: Jie Hai + M: Yisen Zhuang +-M: Lijun Ou + F: drivers/net/hns3/ + F: doc/guides/nics/hns3.rst + F: doc/guides/nics/features/hns3.ini +-- +2.41.0.windows.2 + diff --git a/dpdk.spec b/dpdk.spec index af64575..0a162e7 100644 --- a/dpdk.spec +++ b/dpdk.spec @@ -1,6 +1,6 @@ Name: dpdk Version: 21.11 -Release: 56 +Release: 57 Packager: packaging@6wind.com URL: http://dpdk.org %global source_version 21.11 @@ -377,6 +377,22 @@ Patch6347: 0347-net-hns3-add-FDIR-VLAN-match-mode-runtime-config.patch Patch6348: 0348-doc-fix-kernel-patch-link-in-hns3-guide.patch Patch6349: 0349-doc-fix-syntax-in-hns3-guide.patch Patch6350: 0350-doc-fix-number-of-leading-spaces-in-hns3-guide.patch +Patch6351: 0351-config-arm-add-HiSilicon-HIP10.patch +Patch6352: 0352-net-hns3-fix-non-zero-weight-for-disabled-TC.patch +Patch6353: 0353-net-hns3-fix-index-to-look-up-table-in-NEON-Rx.patch +Patch6354: 0354-net-hns3-fix-VF-default-MAC-modified-when-set-failed.patch +Patch6355: 0355-net-hns3-fix-error-code-for-multicast-resource.patch +Patch6356: 0356-net-hns3-fix-flushing-multicast-MAC-address.patch +Patch6357: 0357-net-hns3-fix-traffic-management-thread-safety.patch +Patch6358: 0358-net-hns3-fix-traffic-management-dump-text-alignment.patch +Patch6359: 0359-net-hns3-fix-order-in-NEON-Rx.patch +Patch6360: 0360-net-hns3-optimize-free-mbuf-for-SVE-Tx.patch +Patch6361: 0361-net-hns3-optimize-rearm-mbuf-for-SVE-Rx.patch +Patch6362: 0362-net-hns3-optimize-SVE-Rx-performance.patch +Patch6363: 0363-app-testpmd-fix-multicast-address-pool-leak.patch +Patch6364: 0364-app-testpmd-fix-help-string.patch +Patch6365: 0365-app-testpmd-add-command-to-flush-multicast-MAC-addre.patch +Patch6366: 0366-maintainers-update-for-hns3-driver.patch Summary: Data Plane Development Kit core Group: System Environment/Libraries @@ -522,6 +538,25 @@ strip -g $RPM_BUILD_ROOT/lib/modules/%{kern_devel_ver}/extra/dpdk/igb_uio.ko /usr/sbin/depmod %changelog +* Fri Oct 27 2023 huangdengdui - 21.11-57 + Sync some patchs from upstreaming and modifies are as follow: + - maintainers: update for hns3 driver + - app/testpmd: add command to flush multicast MAC addresses + - app/testpmd: fix help string + - app/testpmd: fix multicast address pool leak + - net/hns3: optimize SVE Rx performance + - net/hns3: optimize rearm mbuf for SVE Rx + - net/hns3: optimize free mbuf for SVE Tx + - net/hns3: fix order in NEON Rx + - net/hns3: fix traffic management dump text alignment + - net/hns3: fix traffic management thread safety + - net/hns3: fix flushing multicast MAC address + - net/hns3: fix error code for multicast resource + - net/hns3: fix VF default MAC modified when set failed + - net/hns3: fix index to look up table in NEON Rx + - net/hns3: fix non-zero weight for disabled TC + - config/arm: add HiSilicon HIP10 + * Mon Aug 21 2023 huangdengdui - 21.11-56 replace patch-287 to solve the duplicate setting for MAC address. -- Gitee