diff --git a/models/nlp/language_model/bert_base_squad/ixrt/README.md b/models/nlp/language_model/bert_base_squad/ixrt/README.md index acc3592b59533fcfd6334628977c78b05916de02..6629b384e8a90d7cdc83598649ee59d75c3a501a 100644 --- a/models/nlp/language_model/bert_base_squad/ixrt/README.md +++ b/models/nlp/language_model/bert_base_squad/ixrt/README.md @@ -14,6 +14,11 @@ docker pull nvcr.io/nvidia/tensorrt:23.04-py3 ## Install +```bash +pip install onnx +pip install pycuda +``` + ### Install on Iluvatar ```bash @@ -37,39 +42,31 @@ bash script/prepare.sh v1_1 ## Inference -```bash -# INT8 -cd python -pip install onnx pycuda -bash script/build_engine.sh --bs 32 --int8 -bash script/inference_squad.sh --bs 32 --int8 -``` - ### On Iluvatar #### FP16 ```bash -cd python/script -bash infer_bert_base_squad_fp16_ixrt.sh -``` +cd script -#### INT8 +# FP16 +bash infer_bert_base_squad_fp16_ixrt.sh -```bash -cd python/script +# INT8 bash infer_bert_base_squad_int8_ixrt.sh ``` -### On T4 +### On NV ```bash # FP16 -cd python -pip install onnx pycuda # use --bs to set max_batch_size (dynamic) bash script/build_engine.sh --bs 32 bash script/inference_squad.sh --bs 32 + +# INT8 +bash script/build_engine.sh --bs 32 --int8 +bash script/inference_squad.sh --bs 32 --int8 ``` ## Results diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/script/prepare.sh b/models/nlp/language_model/bert_base_squad/ixrt/python/script/prepare.sh index 843166dec9d30224e818649f61868e9b968a2f37..5bd750c46738ac3489fadd81c730fff0eabfe5eb 100644 --- a/models/nlp/language_model/bert_base_squad/ixrt/python/script/prepare.sh +++ b/models/nlp/language_model/bert_base_squad/ixrt/python/script/prepare.sh @@ -51,7 +51,7 @@ else echo 'squad directory existed' fi -echo "Step 2: Downloading model file and config to ./data/bert-large-uncased" +echo "Step 2: Downloading model file and config to ./data/bert_base_uncased_squad" if [ ! -d "./bert_base_uncased_squad" ]; then wget https://drive.google.com/file/d/1_q7SaiZjwysJ3jWAIQT2Ne-duFdgWivR/view?usp=drive_link