diff --git a/PyTorch/built-in/mlm/PLLaVA/README.md b/PyTorch/built-in/mlm/PLLaVA/README.md index a856ec3fc38e9b84fdcec70cdd3bb048559f4356..c4734f0daada9dc9f0c7410bbcae3b115e090f73 100644 --- a/PyTorch/built-in/mlm/PLLaVA/README.md +++ b/PyTorch/built-in/mlm/PLLaVA/README.md @@ -61,7 +61,7 @@ PLLaVA是一种新颖的端到端训练的大型多模态模型,它结合了 - 创建Python环境并且安装Python三方包: ```shell - conda create -n llava python=3.10 -y + conda create -n pllava python=3.10 -y conda activate pllava pip install --upgrade pip # enable PEP 660 support pip3 install torch==2.1.0+cpu --index-url https://download.pytorch.org/whl/cpu #For X86 @@ -122,10 +122,12 @@ PLLaVA是一种新颖的端到端训练的大型多模态模型,它结合了 **表 2** 训练结果展示: -| 芯片 | 卡数 | second per step | batch_size | AMP_Type | Torch_Version | -|:-------------:|:---:|:---------------:|:----------:|:---:|:---:| -| 竞品A | 8p | 0.84s | 1 | bf16 | 2.1 | -| Atlas 800T A2 | 8p | 0.85s | 1 | bf16 | 2.1 | +| 芯片 | 卡数 | second per step | batch_size | AMP_Type | Torch_Version | +|:------------------:|:---:|:---------------:|:----------:|:--------:|:---:| +| 竞品A | 8p | 0.9352s | 1 | bf16 | 2.1 | +| Atlas 200 A2 Box16 | 8p | 0.8411s | 1 | bf16 | 2.1 | +| 竞品A | 8p | 1.0760s | 1 | fp32 | 2.1 | +| Atlas 200 A2 Box16 | 8p | 0.9347s | 1 | fp32 | 2.1 | ## 模型推理 diff --git a/PyTorch/built-in/mlm/PLLaVA/requirements.txt b/PyTorch/built-in/mlm/PLLaVA/requirements.txt index 6feac297633bfd29eb3a6b0970e9aaad456a2603..48218321d8f5b0731f386e3c36d7ed8a16a74dac 100644 --- a/PyTorch/built-in/mlm/PLLaVA/requirements.txt +++ b/PyTorch/built-in/mlm/PLLaVA/requirements.txt @@ -72,7 +72,7 @@ hjson==3.1.0 hpack==4.0.0 httpcore==1.0.4 httpx==0.27.0 -huggingface-hub==0.21.4 +huggingface-hub==0.24.5 humanize==4.9.0 hupper==1.12.1 Hypercorn==0.16.0 @@ -204,7 +204,7 @@ texttable==1.7.0 threadpoolctl==3.3.0 tifffile==2024.2.12 timm==0.6.12 -tokenizers==0.15.2 +tokenizers==0.19.1 tomli==2.0.1 tomlkit==0.12.0 toolz==0.12.1 diff --git a/PyTorch/built-in/mlm/PLLaVA/scripts/eval_single.sh b/PyTorch/built-in/mlm/PLLaVA/scripts/eval_single.sh index 4aeaab90ea59f3625aaecdd21f2bd4ecf3b00bae..ae3d957c437b25b67be371ccff32787e1a3be364 100644 --- a/PyTorch/built-in/mlm/PLLaVA/scripts/eval_single.sh +++ b/PyTorch/built-in/mlm/PLLaVA/scripts/eval_single.sh @@ -12,7 +12,7 @@ lora_alpha=4 video_path=/path_to_PLLaVA/example/cooking.mp4 conv_mode=eval_videoqabench -python -m tasks.eval.videoqabench.pllava_eval_videoqabench \ +python -m tasks.eval.videoqabench.pllava_eval_single \ --pretrained_model_name_or_path ${model_dir} \ --save_path ${SAVE_DIR}/videoqabench \ --num_frames ${num_frames} \ diff --git a/PyTorch/built-in/mlm/PLLaVA/tasks/eval/videoqabench/pllava_eval_single.py b/PyTorch/built-in/mlm/PLLaVA/tasks/eval/videoqabench/pllava_eval_single.py index 59872424304392be192e476cb0ebe26c4c6a3ad6..a6778c73db67fd95860896440951cae9f9875226 100644 --- a/PyTorch/built-in/mlm/PLLaVA/tasks/eval/videoqabench/pllava_eval_single.py +++ b/PyTorch/built-in/mlm/PLLaVA/tasks/eval/videoqabench/pllava_eval_single.py @@ -21,6 +21,8 @@ from decord import VideoReader, cpu from tasks.eval.model_utils import load_pllava, pllava_answer from tasks.eval.eval_utils import conv_templates +from torch import nn, Tensor + logging.basicConfig() logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -165,6 +167,21 @@ def single_test(model, processor, vid_path, num_frames=4, conv_mode="plain", eva conv.user_query("Describe the video in details.", is_mm=True) llm_response, conv = pllava_answer(conv=conv, model=model, processor=processor, do_sample=False, img_list=img_list, max_new_tokens=256, print_res=True) +def adaptive_avg_pool3d(input: Tensor, output_size) -> Tensor: + input_dtype = input.dtype + input_shape = input.shape + input = input.to(dtype=torch.float32) + pool = nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False) + output = pool( + input.reshape(input_shape[0] * input_shape[1], input_shape[2], input_shape[3], input_shape[4])).reshape( + input_shape[0], input_shape[1], output_size[0], output_size[1], output_size[2]) + output = output.to(dtype=input_dtype) + return output + +def replace_with_adaptive_avg_pool3d(): + torch.nn.functional.adaptive_avg_pool3d = adaptive_avg_pool3d + + def main(): multiprocess=True mp.set_start_method('spawn',force=True) @@ -189,4 +206,5 @@ def main(): logger.info('single test done...') if __name__ == "__main__": + replace_with_adaptive_avg_pool3d() main() \ No newline at end of file diff --git a/PyTorch/built-in/mlm/PLLaVA/utils/basic_utils.py b/PyTorch/built-in/mlm/PLLaVA/utils/basic_utils.py index d2ca1ca21193decacead3e71f9d7bd45a77f45c4..0bd8f173a14818d74d866be5bb1d8c9cb2c60fe4 100644 --- a/PyTorch/built-in/mlm/PLLaVA/utils/basic_utils.py +++ b/PyTorch/built-in/mlm/PLLaVA/utils/basic_utils.py @@ -185,8 +185,8 @@ class MetricLogger(object): end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) - logger.info('{} Total time: {} ({:.4f} s / it)'.format( - header, total_time_str, total_time / len(iterable))) + logger.info('Total time: {}'.format(total_time_str)) + logger.info('Averaged time: {:.4f} s / it'.format(total_time / len(iterable))) class AttrDict(dict):