From 6567df490cdd54ca0bb731fa431efa98339d5bf3 Mon Sep 17 00:00:00 2001 From: "wenfeng.zhang" Date: Wed, 14 Aug 2024 16:26:38 +0800 Subject: [PATCH 1/2] fix bert base nv accurary --- .../language_model/bert_base_squad/ixrt/CMakeLists.txt | 2 +- .../bert_base_squad/ixrt/src/gelu/geluPlugin.cpp | 8 ++++---- .../language_model/bert_large_squad/ixrt/CMakeLists.txt | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/models/nlp/language_model/bert_base_squad/ixrt/CMakeLists.txt b/models/nlp/language_model/bert_base_squad/ixrt/CMakeLists.txt index 4b1d5075..44b66104 100644 --- a/models/nlp/language_model/bert_base_squad/ixrt/CMakeLists.txt +++ b/models/nlp/language_model/bert_base_squad/ixrt/CMakeLists.txt @@ -20,7 +20,7 @@ if(DEFINED USE_TENSORRT) message(STATUS "Plugin lib use TRT 8.6.1") set(TRT_INC_PATH /usr/include/x86_64-linux-gnu/) - set(TRT_LIB_PATH /usr/lib/x86_64-linux-gnu/ /usr/local/cuda-12.1/targets/x86_64-linux/lib) + set(TRT_LIB_PATH /usr/lib/x86_64-linux-gnu/ /usr/local/cuda-12.4/targets/x86_64-linux/lib) set(TRT_LIBRARY nvinfer cublasLt) message(STATUS "cuda_libs = ${CUDA_LIBRARIES}") diff --git a/models/nlp/language_model/bert_base_squad/ixrt/src/gelu/geluPlugin.cpp b/models/nlp/language_model/bert_base_squad/ixrt/src/gelu/geluPlugin.cpp index b9ae5177..fe9ab334 100644 --- a/models/nlp/language_model/bert_base_squad/ixrt/src/gelu/geluPlugin.cpp +++ b/models/nlp/language_model/bert_base_squad/ixrt/src/gelu/geluPlugin.cpp @@ -85,6 +85,9 @@ IPluginV2* GeluPluginDynamicCreator::createPlugin(char const* name, PluginFieldC bias.values = fc->fields[i].data; bias.count = fc->fields[i].length; bias.type = fieldTypeToDataType(fc->fields[i].type); + if (ld == 0) { + ld = bias.count; + } } if (fieldName.compare("ld") == 0) { ld = *static_cast(fc->fields[i].data); @@ -344,10 +347,7 @@ int32_t GeluPluginDynamic::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, case DataType::kINT8: { int8_t* input = (int8_t*)(inputs[0]); int8_t* output = (int8_t*)(outputs[0]); - IxinferBiasGeluI8II8O(batch_token_num, stream, (int8_t*)input, (int8_t*)output, - static_cast(mBiasDev.get()), mLd, inputDesc[0].scale, - 1.0/outputDesc[0].scale); - return STATUS_SUCCESS; + return enqueueInt8(input, output, inputDesc[0].scale, 1.0/outputDesc[0].scale, inputVolume, stream);` } default: return STATUS_FAILURE; diff --git a/models/nlp/language_model/bert_large_squad/ixrt/CMakeLists.txt b/models/nlp/language_model/bert_large_squad/ixrt/CMakeLists.txt index 4b1d5075..44b66104 100644 --- a/models/nlp/language_model/bert_large_squad/ixrt/CMakeLists.txt +++ b/models/nlp/language_model/bert_large_squad/ixrt/CMakeLists.txt @@ -20,7 +20,7 @@ if(DEFINED USE_TENSORRT) message(STATUS "Plugin lib use TRT 8.6.1") set(TRT_INC_PATH /usr/include/x86_64-linux-gnu/) - set(TRT_LIB_PATH /usr/lib/x86_64-linux-gnu/ /usr/local/cuda-12.1/targets/x86_64-linux/lib) + set(TRT_LIB_PATH /usr/lib/x86_64-linux-gnu/ /usr/local/cuda-12.4/targets/x86_64-linux/lib) set(TRT_LIBRARY nvinfer cublasLt) message(STATUS "cuda_libs = ${CUDA_LIBRARIES}") -- Gitee From 6307d9cdb61fc691195a644469dc38dffcda6c6d Mon Sep 17 00:00:00 2001 From: "wenfeng.zhang" Date: Wed, 28 Aug 2024 02:35:19 +0000 Subject: [PATCH 2/2] fix bert base nv accurary --- models/nlp/language_model/bert_base_squad/ixrt/CMakeLists.txt | 2 +- .../bert_base_squad/ixrt/python/script/inference_squad.sh | 4 ++-- .../bert_base_squad/ixrt/src/gelu/geluPlugin.cpp | 2 +- .../nlp/language_model/bert_large_squad/ixrt/CMakeLists.txt | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/models/nlp/language_model/bert_base_squad/ixrt/CMakeLists.txt b/models/nlp/language_model/bert_base_squad/ixrt/CMakeLists.txt index 44b66104..9a0e7a12 100644 --- a/models/nlp/language_model/bert_base_squad/ixrt/CMakeLists.txt +++ b/models/nlp/language_model/bert_base_squad/ixrt/CMakeLists.txt @@ -20,7 +20,7 @@ if(DEFINED USE_TENSORRT) message(STATUS "Plugin lib use TRT 8.6.1") set(TRT_INC_PATH /usr/include/x86_64-linux-gnu/) - set(TRT_LIB_PATH /usr/lib/x86_64-linux-gnu/ /usr/local/cuda-12.4/targets/x86_64-linux/lib) + set(TRT_LIB_PATH /usr/lib/x86_64-linux-gnu/ /usr/local/cuda/targets/x86_64-linux/lib) set(TRT_LIBRARY nvinfer cublasLt) message(STATUS "cuda_libs = ${CUDA_LIBRARIES}") diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/script/inference_squad.sh b/models/nlp/language_model/bert_base_squad/ixrt/python/script/inference_squad.sh index a40354b5..9473ee77 100644 --- a/models/nlp/language_model/bert_base_squad/ixrt/python/script/inference_squad.sh +++ b/models/nlp/language_model/bert_base_squad/ixrt/python/script/inference_squad.sh @@ -37,7 +37,7 @@ if [ "$USE_FP16" = "True" ]; then -sq ./data/squad/dev-v1.1.json \ -v ./data/bert_base_uncased_squad/vocab.txt \ -o ./data/predictions-bert_base_384.json - python3 evaluate-v1.1.py ./data/squad/dev-v1.1.json ./data/predictions-bert_base_384.json 88 + python3 evaluate-v1.1.py ./data/squad/dev-v1.1.json ./data/predictions-bert_base_384.json 87 else echo 'USE_INT8=True' UMD_ENABLEDCPRINGNUM=16 python3 inference.py -e ./data/bert_base_384_int8.engine \ @@ -47,5 +47,5 @@ else -v ./data/bert_base_uncased_squad/vocab.txt \ -o ./data/predictions-bert_base_384_int8.json \ -i - python3 evaluate-v1.1.py ./data/squad/dev-v1.1.json ./data/predictions-bert_base_384_int8.json 88 + python3 evaluate-v1.1.py ./data/squad/dev-v1.1.json ./data/predictions-bert_base_384_int8.json 86 fi \ No newline at end of file diff --git a/models/nlp/language_model/bert_base_squad/ixrt/src/gelu/geluPlugin.cpp b/models/nlp/language_model/bert_base_squad/ixrt/src/gelu/geluPlugin.cpp index fe9ab334..897052de 100644 --- a/models/nlp/language_model/bert_base_squad/ixrt/src/gelu/geluPlugin.cpp +++ b/models/nlp/language_model/bert_base_squad/ixrt/src/gelu/geluPlugin.cpp @@ -347,7 +347,7 @@ int32_t GeluPluginDynamic::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, case DataType::kINT8: { int8_t* input = (int8_t*)(inputs[0]); int8_t* output = (int8_t*)(outputs[0]); - return enqueueInt8(input, output, inputDesc[0].scale, 1.0/outputDesc[0].scale, inputVolume, stream);` + return enqueueInt8(input, output, inputDesc[0].scale, 1.0/outputDesc[0].scale, inputVolume, stream); } default: return STATUS_FAILURE; diff --git a/models/nlp/language_model/bert_large_squad/ixrt/CMakeLists.txt b/models/nlp/language_model/bert_large_squad/ixrt/CMakeLists.txt index 44b66104..9a0e7a12 100644 --- a/models/nlp/language_model/bert_large_squad/ixrt/CMakeLists.txt +++ b/models/nlp/language_model/bert_large_squad/ixrt/CMakeLists.txt @@ -20,7 +20,7 @@ if(DEFINED USE_TENSORRT) message(STATUS "Plugin lib use TRT 8.6.1") set(TRT_INC_PATH /usr/include/x86_64-linux-gnu/) - set(TRT_LIB_PATH /usr/lib/x86_64-linux-gnu/ /usr/local/cuda-12.4/targets/x86_64-linux/lib) + set(TRT_LIB_PATH /usr/lib/x86_64-linux-gnu/ /usr/local/cuda/targets/x86_64-linux/lib) set(TRT_LIBRARY nvinfer cublasLt) message(STATUS "cuda_libs = ${CUDA_LIBRARIES}") -- Gitee