diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..5f1957ca18f8db75a1e8f1157d90fe7def3e2255 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include * +recursive-include mindspeed_mm * +recursive-include examples * diff --git a/pyproject.toml b/pyproject.toml index 5b85edc36107ffe30e84be02dab2589bf2d47113..8e6e2e534e31facf11b3acbedb8244501c348000 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,8 +31,11 @@ dependencies = [ requires = ["setuptools >= 65.0"] build-backend = "setuptools.build_meta" +[tool.setuptools] +include-package-data = true + [tool.setuptools.packages.find] -exclude = ["ci*", "docs", "sources", "tests"] +exclude = ["ci*", "docs", "sources", "tests*"] [project.optional-dependencies] test = [ diff --git a/tests/st/baseline_results/finetune_internvl2_8B.json b/tests/st/baseline_results/finetune_internvl2_8B.json new file mode 100644 index 0000000000000000000000000000000000000000..4313829ce17bf53e5aeb2759b9e36a6b493c7250 --- /dev/null +++ b/tests/st/baseline_results/finetune_internvl2_8B.json @@ -0,0 +1,33 @@ +{ + "loss": [ + 2.864627E-01, + 4.773775E-01 + ], + "time": [ + 20954.1, + 11843.9 + ], + "memo info": [ + { + "rank": 0, + "allocated memory": 20283.76953125, + "max allocated memory": 24900.033203125 + }, + { + "rank": 2, + "allocated memory": 22471.15185546875, + "max allocated memory": 27331.3740234375 + }, + { + "rank": 4, + "allocated memory": 22470.65185546875, + "max allocated memory": 23292.36181640625 + }, + { + "rank": 6, + "allocated memory": 24313.50390625, + "max allocated memory": 25759.67529296875 + } + ], + "warm_up": 1 +} \ No newline at end of file diff --git a/tests/st/run_configs/finetune_internvl2_8B/data_8B.json b/tests/st/run_configs/finetune_internvl2_8B/data_8B.json new file mode 100644 index 0000000000000000000000000000000000000000..7562cdb361baf423d943a6a834ca1550696ee24a --- /dev/null +++ b/tests/st/run_configs/finetune_internvl2_8B/data_8B.json @@ -0,0 +1,54 @@ +{ + "dataset_param": { + "dataset_type": "image", + "basic_parameters": { + "data_path": "/home/ci_resource/data/internvlv1-2-sft/opensource/ai2d_train_12k.jsonl", + "data_folder": "/home/ci_resource/data/internvlv1-2-sft/data/ai2d" + }, + "preprocess_parameters": { + "image_reader_type": "torchvision", + "image_processer_type": "image2pixel", + "train_pipeline": { + "image":[ + {"trans_type": "Pad2Square", "param": {"mean": [0.485, 0.456, 0.406]}}, + {"trans_type": "Resize", "param": {"size": [448, 448], "interpolation": "BICUBIC"}}, + {"trans_type": "ToTensor"}, + {"trans_type": "norm_fun", "param": {"mean":[0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225]}} + ] + } + }, + "tokenizer_config": { + "hub_backend": "hf", + "autotokenizer_name": "AutoTokenizer", + "from_pretrained": "/home/ci_resource/models/InternVL2-8B/pretrained/raw_ckpt/InternVL2-8B", + "model_max_length": 4096, + "add_eos_token": false, + "trust_remote_code": true, + "use_fast": false + }, + "use_text_processer": true, + "template_name": "internlm2-chat", + "repeat_time": 1, + "patch_size": 14, + "image_size": 448, + "down_sample_ratio": 0.5, + "group_by_length": true, + "dynamic_image_size": true, + "use_thumbnail": true, + "min_dynamic_patch": 1, + "max_dynamic_patch": 6 + }, + "dataloader_param": { + "dataloader_mode": "sampler", + "batch_size": 1, + "num_workers": 4, + "shuffle": false, + "drop_last": true, + "pin_memory": true, + "sampler_type": "BaseRandomBatchSampler", + "collate_param": { + "model_name": "internvl", + "pad_id": 2 + } + } +} \ No newline at end of file diff --git a/tests/st/run_configs/finetune_internvl2_8B/model_8B.json b/tests/st/run_configs/finetune_internvl2_8B/model_8B.json new file mode 100644 index 0000000000000000000000000000000000000000..62b1332ac6cd26a2170dc2e5218355bc61ac254d --- /dev/null +++ b/tests/st/run_configs/finetune_internvl2_8B/model_8B.json @@ -0,0 +1,90 @@ +{ + "model_id": "InternVL", + "pre_process": true, + "post_process": true, + "add_text_encoder": false, + "img_embedding_idx": 1, + "downsample_ratio": 0.5, + "select_layer": -1, + "ps_version": "v2", + "add_rmsnorm_offset": false, + "img_context_token_id": 92546, + "text_decoder": { + "num_layers": 32, + "pipeline_layer_index": [0, 6, 15, 24], + "hidden_size": 4096, + "num_attention_heads": 32, + "num_query_groups": 8, + "ffn_hidden_size": 14336, + "kv_channels": 128, + "hidden_dropout": 0.0, + "attention_dropout": 0.0, + "layernorm_epsilon": 1e-05, + "normalization": "RMSNorm", + "qk_layernorm": false, + "add_bias_linear": false, + "add_qkv_bias": false, + "bias_activation_fusion": false, + "gated_linear_unit": true, + "init_method_std": 0.01, + "apply_query_key_layer_scaling":false, + "attention_softmax_in_fp32": true, + "masked_softmax_fusion": false, + "layernorm_zero_centered_gamma": false, + "bias_dropout_fusion":false, + "apply_rope_fusion": true, + "memory_efficient_layer_norm": false, + "max_position_embeddings": 4096, + "fp16": false, + "bf16": true, + "params_dtype": "bf16", + "fp16_lm_cross_entropy": false, + "rotary_percent": 1.0, + "position_embedding_type": "rope", + "parallel_output": true, + "initializer_factor": 0.1, + "persist_layer_norm": true, + "activation_func": "silu", + "vocab_size": 92553, + "rotary_base": 1000000 + }, + "image_encoder": { + "vision_encoder": { + "model_id": "InternViT", + "num_layers": 24, + "hidden_size": 1024, + "ffn_hidden_size": 4096, + "num_attention_heads": 16, + "num_channels": 3, + "patch_size": 14, + "image_size": 448, + "add_qkv_bias": true, + "qk_layernorm": false, + "activation_func": "gelu", + "normalization": "LayerNorm", + "layernorm_epsilon": 1e-6, + "hidden_dropout": 0.0, + "drop_path_rate": 0.0, + "attention_dropout": 0.0, + "init_method_std": 0.02, + "initializer_factor": 1.0, + "output_hidden_states": false, + "use_return_dict": false, + "recompute_granularity": "full", + "recompute_method": "uniform", + "recompute_num_layers": 1, + "params_dtype": "bf16", + "post_layer_norm": false, + "downsample_ratio": 0.5, + "fp16": false, + "bf16": true, + "attention_softmax_in_fp32": false, + "select_layer": -1, + "ps_version": "v2", + "is_freeze": true + }, + "vision_projector": null + }, + "text_encoder": null, + "video_encoder": null +} \ No newline at end of file diff --git a/tests/st/shell_scripts/finetune_internvl2_8B.sh b/tests/st/shell_scripts/finetune_internvl2_8B.sh new file mode 100644 index 0000000000000000000000000000000000000000..8ebd7125f73b203d7cb2bfbd50ff28d93bbd3bbc --- /dev/null +++ b/tests/st/shell_scripts/finetune_internvl2_8B.sh @@ -0,0 +1,103 @@ +#!/bin/bash +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export ASCEND_GLOBAL_LOG_LEVEL=3 +export TASK_QUEUE_ENABLE=2 +export COMBINED_ENABLE=1 +export CPU_AFFINITY_CONF=1 +export HCCL_CONNECT_TIMEOUT=1200 +export CUDA_DEVICE_MAX_CONNECTIONS=1 +export HOST_CACHE_CAPACITY=20 +export ACLNN_CACHE_LIMIT=100000 + +GPUS_PER_NODE=8 +MASTER_ADDR=localhost +MASTER_PORT=6000 +NNODES=1 +NODE_RANK=0 +WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES)) + +MBS=1 +GRAD_ACC_STEP=64 +TP=1 +PP=4 +CP=1 +DP=$(($WORLD_SIZE/$TP/$PP/$CP)) +GBS=$(($MBS*$GRAD_ACC_STEP*$DP)) + +BASEPATH=$(cd `dirname $0`; cd ../../../; pwd) + +MM_DATA="$BASEPATH/tests/st/run_configs/finetune_internvl2_8B/data_8B.json" +MM_MODEL="$BASEPATH/tests/st/run_configs/finetune_internvl2_8B/model_8B.json" +MM_TOOL="$BASEPATH/mindspeed_mm/tools/tools.json" +LOAD_PATH="/home/ci_resource/models/InternVL2-8B/pretrained/ckpt_pp4" + +MM_ARGS=" + --mm-data ${MM_DATA} \ + --mm-model ${MM_MODEL} \ + --mm-tool ${MM_TOOL} +" + +DISTRIBUTED_ARGS=" + --nproc_per_node $GPUS_PER_NODE \ + --nnodes $NNODES \ + --node_rank $NODE_RANK \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT +" + +GPT_ARGS=" + --tensor-model-parallel-size ${TP} \ + --pipeline-model-parallel-size ${PP} \ + --context-parallel-size ${CP} \ + --micro-batch-size ${MBS} \ + --global-batch-size ${GBS} \ + --num-layers 32 \ + --hidden-size 4096 \ + --num-attention-heads 16 \ + --seq-length 4096 \ + --max-position-embeddings 4096 \ + --attention-dropout 0.0 \ + --hidden-dropout 0.0 \ + --tokenizer-type NullTokenizer \ + --vocab-size 92553 \ + --position-embedding-type rope \ + --rotary-base 1000000 \ + --swiglu \ + --no-masked-softmax-fusion \ + --lr 4e-5 \ + --min-lr 0.0 \ + --train-iters 2 \ + --lr-decay-style cosine \ + --weight-decay 0.05 \ + --clip-grad 1.0 \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --no-gradient-accumulation-fusion \ + --no-load-optim \ + --no-load-rng \ + --no-save-optim \ + --no-save-rng \ + --use-distributed-optimizer \ + --bf16 \ + --load $LOAD_PATH \ + --use-flash-attn \ + --use-fused-rotary-pos-emb \ + --variable-seq-lengths \ + --normalization RMSNorm \ + --use-fused-rmsnorm \ +" + +OUTPUT_ARGS=" + --log-interval 1 \ + --save-interval 5000 \ + --eval-interval 5000 \ + --eval-iters 5000 \ +" + + +torchrun $DISTRIBUTED_ARGS \ + $BASEPATH/pretrain_internvl.py \ + $GPT_ARGS \ + $MM_ARGS \ + $OUTPUT_ARGS \ + --distributed-backend nccl \ No newline at end of file