diff --git a/tests/st/baseline_results/chatglm3_gqa_cp4.json b/tests/st/baseline_results/chatglm3_gqa_cp4.json index ea182bd14ed3082602e47961ebfd07a8d6e459b9..610af92bb41ae2d50f3bc6a6244ac9012d09ff69 100644 --- a/tests/st/baseline_results/chatglm3_gqa_cp4.json +++ b/tests/st/baseline_results/chatglm3_gqa_cp4.json @@ -33,23 +33,6 @@ 1813.6, 1812.8 ], - "throughput": [ - 90.1, - 191.7, - 192.0, - 192.2, - 191.4, - 190.9, - 191.1, - 190.2, - 190.8, - 190.2, - 189.9, - 189.4, - 189.8, - 189.0, - 188.8 - ], "memo info": [ { "rank": 0, diff --git a/tests/st/baseline_results/deepseek_500b_tp1_pp2_ep2_cp2_overlap.json b/tests/st/baseline_results/deepseek_500b_tp1_pp2_ep2_cp2_overlap.json index e1cfb8f9e29ea81810ef0e0e9bd3dee5ec76bbde..49f750c4f698b05988b8ffa4ac431d854c8bb715 100644 --- a/tests/st/baseline_results/deepseek_500b_tp1_pp2_ep2_cp2_overlap.json +++ b/tests/st/baseline_results/deepseek_500b_tp1_pp2_ep2_cp2_overlap.json @@ -33,23 +33,6 @@ 7972.2, 8005.5 ], - "throughput": [ - 50.8, - 119.1, - 118.4, - 117.0, - 118.3, - 116.3, - 114.9, - 114.5, - 111.5, - 111.3, - 112.3, - 111.6, - 110.4, - 110.6, - 110.4 - ], "memo info": [ { "rank": 0, diff --git a/tests/st/baseline_results/deepseek_v2_mcore_tp1_pp1_ep8.json b/tests/st/baseline_results/deepseek_v2_mcore_tp1_pp1_ep8.json index 5c282f5095e525039f7529e9a96040a69077d22b..9058140b1b10e0812735254e8c295ed166d54fbf 100644 --- a/tests/st/baseline_results/deepseek_v2_mcore_tp1_pp1_ep8.json +++ b/tests/st/baseline_results/deepseek_v2_mcore_tp1_pp1_ep8.json @@ -33,23 +33,6 @@ 4588.8, 4592.8 ], - "throughput": [ - 12, - 256, - 256, - 257, - 257, - 258, - 258, - 258, - 259, - 258, - 259, - 259, - 259, - 259, - 259 - ], "memo info": [ { "rank": 0, diff --git a/tests/st/baseline_results/deepseek_v3_mcore_tp1_pp2_ep4.json b/tests/st/baseline_results/deepseek_v3_mcore_tp1_pp2_ep4.json index dcc8219733d927aa9521afa3be6fcc37241f4ae0..a35b54306f593504125fa991a4c735ded737abd6 100644 --- a/tests/st/baseline_results/deepseek_v3_mcore_tp1_pp2_ep4.json +++ b/tests/st/baseline_results/deepseek_v3_mcore_tp1_pp2_ep4.json @@ -33,40 +33,6 @@ 2830.1, 2818.3 ], - "time info": [ - 9476.1, - 2816.5, - 2826.5, - 2819.2, - 2820.9, - 2831.1, - 2823.6, - 2828.2, - 2821.2, - 2825.2, - 2832.9, - 2829.3, - 2828.4, - 2833.0, - 2834.7 - ], - "throughput": [ - 35.0, - 129.3, - 128.9, - 129.3, - 128.4, - 129.3, - 129.3, - 129.3, - 128.4, - 129.1, - 129.5, - 129.3, - 128.8, - 129.1, - 129.0 - ], "memo info": [ { "rank": 0, diff --git a/tests/st/baseline_results/llama2_tp2_cp4_general_double_ring.json b/tests/st/baseline_results/llama2_tp2_cp4_general_double_ring.json index 108fe657669526f95d81aeb45edff978328c207e..6a96719dedd6d9511bfa2e6df85164729c2502f9 100644 --- a/tests/st/baseline_results/llama2_tp2_cp4_general_double_ring.json +++ b/tests/st/baseline_results/llama2_tp2_cp4_general_double_ring.json @@ -33,22 +33,22 @@ 16.649, 16.239 ], - "throughput": [ - 2.1, - 29.7, - 30.1, - 30.2, - 29.8, - 29.5, - 28.7, - 29.8, - 29.3, - 28.7, - 27.7, - 28.5, - 27.7, - 29.5, - 29.3 + "time info": [ + 43396.5, + 5191.7, + 5175.4, + 5240.7, + 5161.1, + 5194.3, + 5077.1, + 5153.1, + 5244.8, + 5221.0, + 5175.4, + 5287.4, + 5214.0, + 5275.8, + 5223.3 ], "memo info": [ { diff --git a/tests/st/baseline_results/llama2_tp2_pp4_vpp2_ptd.json b/tests/st/baseline_results/llama2_tp2_pp4_vpp2_ptd.json index 37248a06891db7005ca24f99a9352879de48ec81..d287dedc14801181566f853cf802165dae871374 100644 --- a/tests/st/baseline_results/llama2_tp2_pp4_vpp2_ptd.json +++ b/tests/st/baseline_results/llama2_tp2_pp4_vpp2_ptd.json @@ -16,22 +16,22 @@ 1.542952, 1.528176 ], - "throughput": [ - 38.5, - 128.8, - 129.1, - 129.3, - 129.2, - 129.4, - 129.4, - 129.2, - 129.2, - 129.4, - 129.3, - 129.2, - 129.1, - 129.1, - 129.4 + "time info": [ + 12326.2, + 5653.2, + 5646.0, + 5645.7, + 5633.3, + 5647.6, + 5639.1, + 5631.7, + 5634.1, + 5627.8, + 5633.2, + 5642.8, + 5636.0, + 5630.2, + 5634.0 ], "memo info": [ { diff --git a/tests/st/baseline_results/llama2_tp2_pp4_vpp2_swap.json b/tests/st/baseline_results/llama2_tp2_pp4_vpp2_swap.json index c346defdf74762a5651fbb2d8a5570b79580bab7..cb51c20ca2ba4810220e9783add7d0f496dbd62d 100644 --- a/tests/st/baseline_results/llama2_tp2_pp4_vpp2_swap.json +++ b/tests/st/baseline_results/llama2_tp2_pp4_vpp2_swap.json @@ -19,22 +19,22 @@ 1.286705, 1.269324 ], - "throughput": [ - 49.1, - 92.8, - 93.7, - 93.2, - 95.4, - 93.8, - 93.3, - 93.3, - 94.9, - 93.7, - 95.5, - 95.1, - 92.5, - 92.5, - 93.8 + "time info": [ + 11579.3, + 6038.9, + 6028.7, + 6000.3, + 6017.7, + 6024.4, + 6004.6, + 6001.1, + 6032.6, + 6016.7, + 6025.0, + 6032.2, + 6051.8, + 6031.1, + 5996.1 ], "memo info": [ { diff --git a/tests/st/baseline_results/llama3_mcore_tp2_pp2_vpp2_noop_layer.json b/tests/st/baseline_results/llama3_mcore_tp2_pp2_vpp2_noop_layer.json index 2e9404f8efc81e4e89c5f61459186c2dd6363cd4..e52f0315321a74224bbc8ab6db07b7d3fc4340f1 100644 --- a/tests/st/baseline_results/llama3_mcore_tp2_pp2_vpp2_noop_layer.json +++ b/tests/st/baseline_results/llama3_mcore_tp2_pp2_vpp2_noop_layer.json @@ -16,22 +16,22 @@ 9.112225, 8.973358 ], - "throughput": [ - 23.8, - 131.2, - 131.9, - 131.9, - 131.9, - 132.0, - 131.8, - 131.9, - 131.6, - 131.8, - 131.7, - 132.1, - 131.6, - 132.0, - 132.3 + "time info": [ + 5501.4, + 901.3, + 893.3, + 894.9, + 892.6, + 894.7, + 895.4, + 895.8, + 897.5, + 895.5, + 894.5, + 898.4, + 896.7, + 897.5, + 893.5 ], "memo info": [ { diff --git a/tests/st/baseline_results/llama3_tp2_pp2_vpp1.json b/tests/st/baseline_results/llama3_tp2_pp2_vpp1.json index 48a5c561bf7d86411c8a5d940af363cfa6ccdb97..5deacb34d45b8ee4fdc71ee0993e2bc4eab9d10e 100644 --- a/tests/st/baseline_results/llama3_tp2_pp2_vpp1.json +++ b/tests/st/baseline_results/llama3_tp2_pp2_vpp1.json @@ -16,22 +16,22 @@ 7.73499, 7.66312 ], - "throughput": [ - 6.3, - 97.5, - 97.5, - 97.3, - 97.5, - 97.4, - 97.5, - 97.5, - 97.5, - 97.3, - 97.4, - 97.4, - 97.3, - 97.4, - 97.3 + "time info": [ + 11410.4, + 4718.4, + 4716.5, + 4715.1, + 4716.5, + 4714.9, + 4715.7, + 4717.9, + 4718.3, + 4717.9, + 4715.2, + 4715.3, + 4714.4, + 4714.2, + 4714.9 ], "memo info": [ { diff --git a/tests/st/baseline_results/mamba2_8b_tp8_pp1_4k_ptd.json b/tests/st/baseline_results/mamba2_8b_tp8_pp1_4k_ptd.json index 076c9c3b1027958b28257429922392b44e5eb248..a9ea0fca215239e8e71836134fffddb4d6866151 100644 --- a/tests/st/baseline_results/mamba2_8b_tp8_pp1_4k_ptd.json +++ b/tests/st/baseline_results/mamba2_8b_tp8_pp1_4k_ptd.json @@ -16,22 +16,22 @@ 9.642986, 9.380438 ], - "throughput": [ - 31.6, - 67.5, - 63.7, - 67.8, - 66.0, - 68.1, - 68.7, - 68.2, - 68.6, - 68.6, - 68.6, - 68.7, - 68.8, - 68.8, - 68.4 + "time info": [ + 9999.0, + 4043.6, + 3985.9, + 3831.2, + 3758.7, + 3772.9, + 3731.0, + 3751.6, + 3693.8, + 3690.2, + 3701.7, + 3692.2, + 3696.2, + 3777.7, + 3710.7 ], "memo info": [ { diff --git a/tests/st/baseline_results/mixtral_mcore_tp4_cp2_ep2_ptd.json b/tests/st/baseline_results/mixtral_mcore_tp4_cp2_ep2_ptd.json index 8900a521cf95e173209a56e4e7041ed2b7e86801..40c2e4ebf440c64a02a63141707b817982f5b4bd 100644 --- a/tests/st/baseline_results/mixtral_mcore_tp4_cp2_ep2_ptd.json +++ b/tests/st/baseline_results/mixtral_mcore_tp4_cp2_ep2_ptd.json @@ -33,23 +33,6 @@ 3142.3, 3095.9 ], - "throughput": [ - 13.1, - 42.6, - 41.4, - 42.7, - 42.5, - 41.3, - 41.3, - 41.1, - 41.9, - 41.6, - 42.2, - 42.8, - 42.8, - 41.7, - 43.3 - ], "memo info": [ { "rank": 0, diff --git a/tests/st/baseline_results/qwen2_moe_tp1_pp2_ep2_cp2_32k.json b/tests/st/baseline_results/qwen2_moe_tp1_pp2_ep2_cp2_32k.json index b0d2bdd71b7345b3e06722b7be134ccf9a8c57f9..173f5ebae3c1ed198cf17d2413a25781c2c3d13f 100644 --- a/tests/st/baseline_results/qwen2_moe_tp1_pp2_ep2_cp2_32k.json +++ b/tests/st/baseline_results/qwen2_moe_tp1_pp2_ep2_cp2_32k.json @@ -33,23 +33,6 @@ 7136.8, 7133.7 ], - "throughput": [ - 13.0, - 27.8, - 27.9, - 27.8, - 27.9, - 27.9, - 27.9, - 28.0, - 28.0, - 27.9, - 28.0, - 28.0, - 28.0, - 27.9, - 27.8 - ], "memo info": [ { "rank": 0, diff --git a/tests/st/baseline_results/tune_llama2_tp1_pp1_qlora_ptd.json b/tests/st/baseline_results/tune_llama2_tp1_pp1_qlora_ptd.json index aee543cfce20231035c1d2f1f97790cfcd2e893a..67b4ff55c07813e7d205bbba8954ec246d5b0f22 100644 --- a/tests/st/baseline_results/tune_llama2_tp1_pp1_qlora_ptd.json +++ b/tests/st/baseline_results/tune_llama2_tp1_pp1_qlora_ptd.json @@ -16,22 +16,22 @@ 1.470815, 1.141417 ], - "throughput": [ - 284.9, - 1067.6, - 1066.3, - 1093.9, - 1075.6, - 1048.2, - 1056.2, - 1065.1, - 1061.2, - 1046.4, - 1052.4, - 1024.5, - 1060.5, - 1049.1, - 1058.3 + "time info": [ + 2240.9, + 497.9, + 495.9, + 483.1, + 491.1, + 508.9, + 509.9, + 495.9, + 495.6, + 506.3, + 499.8, + 504.5, + 492.3, + 507.5, + 490.8 ], "memo info": [ { diff --git a/tests/st/baseline_results/tune_llama2_tp2_cp2_adaptive_cp.json b/tests/st/baseline_results/tune_llama2_tp2_cp2_adaptive_cp.json index b783015003b64b73540dc17988054681b36b70c2..f1889a1fa169dab78b0152872d7e4f34ad4ffda8 100644 --- a/tests/st/baseline_results/tune_llama2_tp2_cp2_adaptive_cp.json +++ b/tests/st/baseline_results/tune_llama2_tp2_cp2_adaptive_cp.json @@ -33,23 +33,6 @@ 1562.8, 1572.6 ], - "throughput": [ - 19.7, - 61.3, - 61.9, - 61.9, - 60.8, - 61.3, - 61.1, - 62.0, - 61.9, - 61.7, - 62.1, - 61.8, - 61.9, - 61.7, - 61.6 - ], "memo info": [ { "rank": 0,