diff --git a/.gitignore b/.gitignore index 7aa4dbb41a336c2220eef18e997b4f100270b77c..1977d146276e7d823f8b67e8b8671b7036547fb3 100644 --- a/.gitignore +++ b/.gitignore @@ -151,9 +151,9 @@ cython_debug/ /ci/kernel*/ # mindspeed core -/mindspeed/ +mindspeed -# test +# test /tests/st/run_jsons/ /tests/st/run_logs/ diff --git a/checkpoint/internvl2_hf_to_mm.py b/checkpoint/internvl2_hf_to_mm.py index 62f037aa6a5bd22e5e14f42438fdf7cd076979f1..35f25e6e714e2b2e2611dc56bbe44ce8f2165999 100644 --- a/checkpoint/internvl2_hf_to_mm.py +++ b/checkpoint/internvl2_hf_to_mm.py @@ -17,8 +17,10 @@ from checkpoint.utils import ConvertVppMMConfig def load_from_hf(load_dir, trust_remote_code): # Load Huggingface model. - hf_model = AutoModelForCausalLM.from_pretrained(load_dir, device_map='cpu', trust_remote_code=trust_remote_code, - local_files_only=True) + hf_model = AutoModelForCausalLM.from_pretrained( + load_dir, device_map='cpu', + trust_remote_code=trust_remote_code, + local_files_only=True) print(hf_model) return hf_model @@ -274,7 +276,7 @@ def main(convert_config: ConvertVppMMConfig): pp_size = parallel_config.pp_size vp_size = parallel_config.vpp_size - + pp_split = merge_pp_index( vit_pipeline_num_layers=parallel_config.vit_pp_layers, llm_pipeline_num_layers=parallel_config.llm_pp_layers @@ -288,7 +290,7 @@ def main(convert_config: ConvertVppMMConfig): if len(remains) > 0: print(remains) raise RuntimeWarning("There are some weights ungrouped.") - + for rank, pipeline_state_dict in enumerate(pipeline_state_dicts): print(20 * '#', f'stage {rank}', 20 * '#') for key, value in pipeline_state_dict.items(): diff --git a/checkpoint/utils.py b/checkpoint/utils.py index ee702b840a2a4d0b4cffa2c5170d5b305951fb9b..14f2061877f717abd853d4f3841e10279c8a6882 100644 --- a/checkpoint/utils.py +++ b/checkpoint/utils.py @@ -42,7 +42,7 @@ class ParallelConfig(BaseModel): if len(self.vit_pp_layers) < 1: raise ValueError("pp layers长度至少为1") return self - + class VppParallelConfig(BaseModel): """权模型切分配置,包括tp的size,以及pp切分时vit和llm在pp域每张卡上切分的层数""" @@ -59,7 +59,7 @@ class VppParallelConfig(BaseModel): @computed_field def pp_size(self) -> PositiveInt: return len(self.llm_pp_layers[0]) - + @computed_field def vpp_size(self) -> PositiveInt: return len(self.llm_pp_layers) @@ -71,7 +71,7 @@ class VppParallelConfig(BaseModel): if len(self.vit_pp_layers) < 1: raise ValueError("pp layers长度至少为1") return self - + @model_validator(mode='after') def validate_vpp_layers(self) -> "VppParallelConfig": pp_size = self.pp_size @@ -91,7 +91,7 @@ class HfConfig(BaseModel): @cached_property def config(self) -> PretrainedConfig: - return AutoConfig.from_pretrained(self.hf_dir) + return AutoConfig.from_pretrained(self.hf_dir, local_files_only=True) @model_validator(mode='after') def validate_hf_dir(self) -> "HfConfig": @@ -147,7 +147,7 @@ class ConvertResplitConfig(BaseModel): if sum(self.source_parallel_config.llm_pp_layers) != sum(self.target_parallel_config.llm_pp_layers): raise ValueError("llm pp layers not equal!") return self - + # BaseModel/dataclasses注意要在field的下一行添加描述说明 class ConvertVppMMConfig(BaseModel): diff --git a/evaluate_vlm.py b/evaluate_vlm.py index 0d4127588c242605e92d07ec7e311147bbe6123f..38fd0d7436e1d389e1ac618714675c6ee2eaf06d 100644 --- a/evaluate_vlm.py +++ b/evaluate_vlm.py @@ -3,13 +3,14 @@ from megatron.training import get_args from megatron.training.initialize import initialize_megatron from mindspeed_mm.configs.config import merge_mm_args from mindspeed_mm.configs.config import mm_extra_args_provider +from mindspeed_mm.arguments import extra_args_provider_decorator from mindspeed_mm.tasks.evaluation.eval_datasets import eval_dataset_dict from mindspeed_mm.tasks.evaluation.eval_impl import eval_impl_dict, eval_pipeline_dict from mindspeed_mm.tasks.evaluation.eval_prompt import eval_model_prompt_dict def main(): - initialize_megatron(extra_args_provider=mm_extra_args_provider) + initialize_megatron(extra_args_provider=extra_args_provider_decorator(mm_extra_args_provider)) args = get_args() merge_mm_args(args) args = args.mm.model diff --git a/examples/diffusers/sana/patch_sana.py b/examples/diffusers/sana/patch_sana.py index 7f943b60ab4bf2ed98e5ac6f3421e49f2e98fcba..362cd5fc89493b9c8124ca3c395cd85146281461 100644 --- a/examples/diffusers/sana/patch_sana.py +++ b/examples/diffusers/sana/patch_sana.py @@ -72,7 +72,8 @@ def create_load_model_hook( raise ValueError(f"unexpected save model: {model.__class__}") else: transformer_ = SanaTransformer2DModel.from_pretrained( - args.pretrained_model_name_or_path, subfolder="transformer" + args.pretrained_model_name_or_path, subfolder="transformer", + local_files_only=True ) # Make sure the trainable params are in float32. This is again needed since the base models diff --git a/examples/hunyuanvideo/convert_ckpt_to_mm.py b/examples/hunyuanvideo/convert_ckpt_to_mm.py index c12ee1b8cf8c7f2cc0efb46411b9fa93f460212c..2824b2bf8addd4e3018b8bbdae7cdd398830e18d 100644 --- a/examples/hunyuanvideo/convert_ckpt_to_mm.py +++ b/examples/hunyuanvideo/convert_ckpt_to_mm.py @@ -40,7 +40,8 @@ def preprocess_text_encoder_tokenizer(source_dir, save_dir): model = LlavaForConditionalGeneration.from_pretrained( source_dir, torch_dtype=torch.float16, - low_cpu_mem_usage=True + low_cpu_mem_usage=True, + local_files_only=True ) model.language_model.save_pretrained(save_dir) processor.tokenizer.save_pretrained(save_dir) @@ -122,7 +123,7 @@ def get_tp_split_layer_names( f"single_blocks.{index}.linear1.weight", f"single_blocks.{index}.linear1.bias", ] - + return ( column_parallel_linears, row_parallel_linears, @@ -165,7 +166,7 @@ def split_by_tp( new_state_dict[name] = torch.chunk(state_dict[name], tp_size, dim=0)[tp_rank] for name in row_parallel_linears: new_state_dict[name] = torch.chunk(state_dict[name], tp_size, dim=1)[tp_rank] - + for name in qkv_fused_projs: wq, wk, wv = torch.chunk(state_dict[name], 3, dim=0) wq = torch.chunk(wq, tp_size, dim=0)[tp_rank] @@ -210,7 +211,7 @@ def merge_by_tp( if tp_size == 1: return state_dicts - + merged_state_dict = copy.deepcopy(state_dicts[0]) ( column_parallel_linears, @@ -234,7 +235,7 @@ def merge_by_tp( [state_dicts[tp_rank][name] for tp_rank in range(tp_size)], dim=1 ) - + for name in qkv_fused_projs: wq = torch.cat( [torch.chunk(state_dicts[tp_rank][name], 3, dim=0)[0] for tp_rank in range(tp_size)], @@ -270,7 +271,7 @@ def merge_by_tp( dim=0 ) merged_state_dict[name] = torch.cat([wq, wk, wv, wmlp], dim=0) - + for name in x_mlp_fused_row_parallel_linear: wx = torch.cat( [state_dicts[tp_rank][name][:, :hidden_size // tp_size] for tp_rank in range(tp_size)], @@ -281,7 +282,7 @@ def merge_by_tp( dim=1, ) merged_state_dict[name] = torch.cat([wx, wmlp], dim=1) - + return merged_state_dict @@ -301,7 +302,7 @@ def load_state_dicts_by_tp(load_dir: str, tp_size: int = 2) -> List[Dict[str, An state_dict_path = os.path.join(load_dir, directory, f"mp_rank_{tp_rank:02d}", "model_optim_rng.pt") tp_state_dicts.append(torch.load(state_dict_path)['model']) - return tp_state_dicts + return tp_state_dicts def save(state_dicts: List[Dict], save_dir: str, latest_checkpointed_iteration="release"): @@ -316,7 +317,7 @@ def save(state_dicts: List[Dict], save_dir: str, latest_checkpointed_iteration=" directory = 'release' else: directory = 'iter_{:07d}'.format(latest_checkpointed_iteration) - + for tp_rank, state_dict in enumerate(state_dicts): os.makedirs(os.path.join(save_dir, directory, f"mp_rank_{tp_rank:02d}")) save_path = os.path.join(save_dir, directory, f"mp_rank_{tp_rank:02d}", "model_optim_rng.pt") @@ -331,7 +332,7 @@ def get_args(): parser.add_argument("--source_path", type=str, default="./transformers/mp_rank_00/model_states.pt", help="Source path of checkpoint") parser.add_argument("--target_path", type=str, default="./ckpt/hunyuanvideo/", help="Save path of MM checkpoint") parser.add_argument("--tp_size", type=int, default=2, help="Tensor model parallel world size") - parser.add_argument("--mode", type=str, default="split", choices=["split", "merge"], + parser.add_argument("--mode", type=str, default="split", choices=["split", "merge"], help="Split mode is used to split the pretrained weights according to tp_size before training, \ and Merge mode is used to merge weights based on tp_size after training is completed") @@ -345,7 +346,7 @@ if __name__ == "__main__": if args.module == "text_encoder": preprocess_text_encoder_tokenizer(args.source_path, args.target_path) else: - if args.mode == "split": + if args.mode == "split": source_state_dict = torch.load(args.source_path, map_location='cpu')['module'] state_dict = replace_state_dict(source_state_dict, convert_mapping=DIT_CONVERT_MAPPING) state_dicts = split_by_tp( diff --git a/examples/internvl2.5/data_4B.json b/examples/internvl2.5/data_4B.json index f7b0e8e761b99d6a3070e7649773ed9c3081925d..a134628c493bf59d958eeee4bd9005925fbe2a58 100644 --- a/examples/internvl2.5/data_4B.json +++ b/examples/internvl2.5/data_4B.json @@ -24,7 +24,6 @@ "from_pretrained": "OpenGVLab/InternVL2_5-4B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "use_text_processer": true, diff --git a/examples/internvl2.5/data_78B.json b/examples/internvl2.5/data_78B.json index bd69b1116576a91f20c81e89f14bd13e87b75f4b..4cd0145de5880d9e9cbca1d6a01faaf4178d1868 100644 --- a/examples/internvl2.5/data_78B.json +++ b/examples/internvl2.5/data_78B.json @@ -24,7 +24,6 @@ "from_pretrained": "OpenGVLab/InternVL2_5-78B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "use_text_processer": true, diff --git a/examples/internvl2.5/finetune_internvl2.5_4B.sh b/examples/internvl2.5/finetune_internvl2.5_4B.sh index 2256b696c48429a92f551684a03049a2b02abb61..aa4abd968ddf5530e7790b0dc5613aee349b62e8 100644 --- a/examples/internvl2.5/finetune_internvl2.5_4B.sh +++ b/examples/internvl2.5/finetune_internvl2.5_4B.sh @@ -79,7 +79,8 @@ GPT_ARGS=" --load $LOAD_PATH \ --variable-seq-lengths \ --normalization RMSNorm \ - --num-workers 4 + --num-workers 4 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2.5/finetune_internvl2.5_78B.sh b/examples/internvl2.5/finetune_internvl2.5_78B.sh index 2453d4c765c70a18274998a11037380f0dc74453..d4283d059f987ad9744c235cfcedb81956d46280 100644 --- a/examples/internvl2.5/finetune_internvl2.5_78B.sh +++ b/examples/internvl2.5/finetune_internvl2.5_78B.sh @@ -88,7 +88,8 @@ GPT_ARGS=" --load $LOAD_PATH \ --variable-seq-lengths \ --normalization RMSNorm \ - --num-workers 4 + --num-workers 4 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2.5/inference_4B.json b/examples/internvl2.5/inference_4B.json index 9d4988bc52e8a4e64ffe250f67eb0288e274ec25..e0f07a0aeae4bcd46295becb0c4a4316abb839a6 100644 --- a/examples/internvl2.5/inference_4B.json +++ b/examples/internvl2.5/inference_4B.json @@ -1,4 +1,4 @@ -{ +{ "infer_data_type": "image", "file_path": "./examples/internvl2.5/view.jpg", "prompts": "Please describe the image shortly.", @@ -102,7 +102,6 @@ "from_pretrained": "OpenGVLab/InternVL2_5-4B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "generation_config":{ diff --git a/examples/internvl2.5/inference_internvl.sh b/examples/internvl2.5/inference_internvl.sh index 45fa37c310e1226a38b7cca7c8c2577993bcfd1d..0240ddf415b68fa380fa3a30168ea8a9aa3f649d 100644 --- a/examples/internvl2.5/inference_internvl.sh +++ b/examples/internvl2.5/inference_internvl.sh @@ -53,6 +53,7 @@ GPT_ARGS=" --no-masked-softmax-fusion \ --use-distributed-optimizer \ --bf16 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py b/examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py index a7be244a845ee2d1044f4e14fbd4fa7135811126..d6f12037f6c1ba1e4691ce42b9c53e696027131f 100644 --- a/examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py +++ b/examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py @@ -3,7 +3,7 @@ import os from copy import deepcopy from dataclasses import dataclass import stat -import re +import re import torch from transformers import AutoModelForCausalLM, AutoConfig @@ -78,10 +78,13 @@ model_config_dict = { def load_from_hf(load_dir, trust_remote_code): # Load Huggingface model. - hf_model = AutoModelForCausalLM.from_pretrained(load_dir, device_map='cpu', trust_remote_code=trust_remote_code, - local_files_only=True) + hf_model = AutoModelForCausalLM.from_pretrained( + load_dir, device_map='cpu', + trust_remote_code=trust_remote_code, + local_files_only=True) print(hf_model) - config = AutoConfig.from_pretrained(load_dir, trust_remote_code=trust_remote_code) + config = AutoConfig.from_pretrained( + load_dir, trust_remote_code=trust_remote_code, local_files_only=True) global llm_arch llm_arch = config.llm_config.architectures[0] return hf_model, config @@ -187,7 +190,7 @@ def convert_hg_to_mm(_state_dict, model_config, num_key_value_heads): new_key = new_key.replace('post_attention_layernorm', 'pre_mlp_layernorm') new_key = new_key.replace('gate_proj', 'linear_fc1_gate') new_key = new_key.replace('up_proj', 'linear_fc1_up') - new_key = new_key.replace('down_proj', 'linear_fc2') + new_key = new_key.replace('down_proj', 'linear_fc2') new_key = new_key.replace('model.norm', 'decoder.final_layernorm') new_key = new_key.replace('model.embed_tokens', 'embedding.word_embeddings') @@ -222,10 +225,10 @@ def convert_hg_to_mm(_state_dict, model_config, num_key_value_heads): wv = new_dict[v_name] else: raise AssertionError(f'Missing key {v_name}') - + q_chunks = torch.chunk(wq, num_key_value_heads, dim=0) k_chunks = torch.chunk(wk, num_key_value_heads, dim=0) - v_chunks = torch.chunk(wv, num_key_value_heads, dim=0) + v_chunks = torch.chunk(wv, num_key_value_heads, dim=0) all_chunks = [] for j in range(num_key_value_heads): all_chunks.append(q_chunks[j]) @@ -260,10 +263,10 @@ def convert_hg_to_mm(_state_dict, model_config, num_key_value_heads): wv = new_dict[v_name] else: raise AssertionError(f'Missing key {v_name}') - + q_chunks = torch.chunk(wq, num_key_value_heads, dim=0) k_chunks = torch.chunk(wk, num_key_value_heads, dim=0) - v_chunks = torch.chunk(wv, num_key_value_heads, dim=0) + v_chunks = torch.chunk(wv, num_key_value_heads, dim=0) all_chunks = [] for j in range(num_key_value_heads): all_chunks.append(q_chunks[j]) @@ -276,7 +279,7 @@ def convert_hg_to_mm(_state_dict, model_config, num_key_value_heads): if k_name in new_dict: new_dict.pop(k_name) if v_name in new_dict: - new_dict.pop(v_name) + new_dict.pop(v_name) # 合并mlp的gate和up权重 @@ -433,17 +436,17 @@ if __name__ == '__main__': model_config = get_model_config( args.model_size, args.vpp) pp_split = merge_pp_index(model_config) - + for key, value in state_dict.items(): print(key, value.shape) state_dict = convert_hg_to_mm(state_dict, model_config, num_key_value_heads) pipeline_state_dicts, remains = split_model_by_pipeline(state_dict, pp_split) - + if len(remains) > 0: print(remains) raise RuntimeWarning("There are some weights ungrouped.") - + for rank, pipeline_state_dict in enumerate(pipeline_state_dicts): print(20 * '#', f'stage {rank}', 20 * '#') for key, value in pipeline_state_dict.items(): diff --git a/examples/internvl2/data_26B.json b/examples/internvl2/data_26B.json index 2f450d7642332f6c390fa9acbcebaeeae48c165d..1c2f2a59ce426cc6f6850d023f5f652652ce1284 100644 --- a/examples/internvl2/data_26B.json +++ b/examples/internvl2/data_26B.json @@ -14,7 +14,7 @@ {"trans_type": "Resize", "param": {"size": [448, 448], "interpolation": "BICUBIC"}}, {"trans_type": "ToTensor"}, {"trans_type": "norm_fun", "param": {"mean":[0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225]}} - ] + ] } }, "tokenizer_config": { @@ -23,7 +23,6 @@ "from_pretrained": "OpenGVLab/InternVL2-26B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "use_text_processer": true, diff --git a/examples/internvl2/data_2B.json b/examples/internvl2/data_2B.json index 25a649cc46dbc6e734c262f80b27f5b433537986..7f6cd0b112eafef03e1e83de6bcbe57a739e7b19 100644 --- a/examples/internvl2/data_2B.json +++ b/examples/internvl2/data_2B.json @@ -24,7 +24,6 @@ "from_pretrained": "OpenGVLab/InternVL2-2B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "use_text_processer": true, diff --git a/examples/internvl2/data_76B.json b/examples/internvl2/data_76B.json index aa9c60140d49315f905535aee5a65ba07508c488..21476838a6bf02aafa98d61867d7558f181d5ec5 100644 --- a/examples/internvl2/data_76B.json +++ b/examples/internvl2/data_76B.json @@ -23,7 +23,6 @@ "from_pretrained": "OpenGVLab/InternVL2-Llama3-76B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": false, "use_fast": false }, "use_text_processer": true, diff --git a/examples/internvl2/data_8B.json b/examples/internvl2/data_8B.json index 94898a668476f157c2cce90d5e1bbd95a75d531d..e5bd9f0b05fd7349a982030eeab7e34aebf15a6d 100644 --- a/examples/internvl2/data_8B.json +++ b/examples/internvl2/data_8B.json @@ -23,7 +23,6 @@ "from_pretrained": "OpenGVLab/InternVL2-8B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "use_text_processer": true, diff --git a/examples/internvl2/evaluate_internvl2_8B.json b/examples/internvl2/evaluate_internvl2_8B.json index fcab66f3dbcb20abc3dc773b5a8620a89fd0164c..9002918e2f3ee72e8ec7bc87fdb907eca2ff0eb3 100644 --- a/examples/internvl2/evaluate_internvl2_8B.json +++ b/examples/internvl2/evaluate_internvl2_8B.json @@ -104,7 +104,6 @@ "from_pretrained": "./InternVL2-8B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "generation_config":{ diff --git a/examples/internvl2/evaluate_internvl2_8B.sh b/examples/internvl2/evaluate_internvl2_8B.sh index 659bfaa90cf48217d1391586ed39914dc1a2886f..cd6de99a75332c635561ec676b30c526b68817ca 100644 --- a/examples/internvl2/evaluate_internvl2_8B.sh +++ b/examples/internvl2/evaluate_internvl2_8B.sh @@ -76,6 +76,7 @@ GPT_ARGS=" --bf16 \ --distributed-timeout-minutes 1000 \ --use-flash-attn \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2/finetune_internvl2_26B.sh b/examples/internvl2/finetune_internvl2_26B.sh index 0a0e7a56a108400adf40fedeb015f518d5a9d280..bee97ab89f742d15bfc2ca8ed4a5c229487a9161 100644 --- a/examples/internvl2/finetune_internvl2_26B.sh +++ b/examples/internvl2/finetune_internvl2_26B.sh @@ -82,6 +82,7 @@ GPT_ARGS=" --no-save-rng \ --num-workers 4 \ --enable-dummy-optimizer \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2/finetune_internvl2_2B.sh b/examples/internvl2/finetune_internvl2_2B.sh index 09d068ad50a8053a5397d50b5c02788eb1306196..6bed8ad00e84b22521f01934908d8f2c213551fe 100644 --- a/examples/internvl2/finetune_internvl2_2B.sh +++ b/examples/internvl2/finetune_internvl2_2B.sh @@ -81,6 +81,7 @@ GPT_ARGS=" --no-save-optim \ --no-save-rng \ --num-workers 4 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2/finetune_internvl2_8B.sh b/examples/internvl2/finetune_internvl2_8B.sh index c1fc022974a49e0e7a80409e2adfdf5cc814e713..57d10a1b47109add0048f91a131f2a78dc5b0bc9 100644 --- a/examples/internvl2/finetune_internvl2_8B.sh +++ b/examples/internvl2/finetune_internvl2_8B.sh @@ -81,6 +81,7 @@ GPT_ARGS=" --no-save-optim \ --no-save-rng \ --num-workers 4 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2/finetune_internvl2_8B_vpp.sh b/examples/internvl2/finetune_internvl2_8B_vpp.sh index c84611b5f752371a12b7b13de8dd3cded7bd15e5..2091707a8ba407b09c169077042530b3cb2c68af 100644 --- a/examples/internvl2/finetune_internvl2_8B_vpp.sh +++ b/examples/internvl2/finetune_internvl2_8B_vpp.sh @@ -82,6 +82,7 @@ GPT_ARGS=" --no-save-optim \ --no-save-rng \ --num-workers 4 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2/inference_2B.json b/examples/internvl2/inference_2B.json index 67ecf8fbe617dc4bb6c8bc1b56b7d9130f5878f5..0dd767ac7a089ab8c214f65684b4c8a33009dbc0 100644 --- a/examples/internvl2/inference_2B.json +++ b/examples/internvl2/inference_2B.json @@ -1,4 +1,4 @@ -{ +{ "infer_data_type": "image", "file_path": "./examples/internvl2/view.jpg", "prompts": "Please describe the image shortly.", @@ -102,7 +102,6 @@ "from_pretrained": "OpenGVLab/InternVL2-2B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "generation_config":{ diff --git a/examples/internvl2/inference_8B.json b/examples/internvl2/inference_8B.json index e79d6836f36e9947e8ecf4493e7906fba4f9a963..3f32d0e2f125586b8e6bdfe7a96a2a4b5e07c981 100644 --- a/examples/internvl2/inference_8B.json +++ b/examples/internvl2/inference_8B.json @@ -1,4 +1,4 @@ -{ +{ "infer_data_type": "image", "file_path": "./examples/internvl2/view.jpg", "prompts": "Please describe the image shortly.", @@ -102,7 +102,6 @@ "from_pretrained": "OpenGVLab/InternVL2-8B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "generation_config":{ diff --git a/examples/internvl2/inference_internvl.sh b/examples/internvl2/inference_internvl.sh index 4d945790437fb90fb8bb89f062baf8828c1948ff..f7c0c11a4736c3aa4a361cea98292688dc1fc980 100644 --- a/examples/internvl2/inference_internvl.sh +++ b/examples/internvl2/inference_internvl.sh @@ -54,6 +54,7 @@ GPT_ARGS=" --use-distributed-optimizer \ --bf16 \ --use-flash-attn \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2/internvl2_convert_to_mm_ckpt.py b/examples/internvl2/internvl2_convert_to_mm_ckpt.py index 6c6379d22e17ff30df3443c572c3a8b64ce8e45f..cbc2b920782a1d3ef497ed476811eb07ba4481bd 100644 --- a/examples/internvl2/internvl2_convert_to_mm_ckpt.py +++ b/examples/internvl2/internvl2_convert_to_mm_ckpt.py @@ -13,10 +13,13 @@ llm_arch = '' def load_from_hf(load_dir, trust_remote_code): # Load Huggingface model. - hf_model = AutoModelForCausalLM.from_pretrained(load_dir, device_map='cpu', trust_remote_code=trust_remote_code, - local_files_only=True) + hf_model = AutoModelForCausalLM.from_pretrained( + load_dir, device_map='cpu', + trust_remote_code=trust_remote_code, + local_files_only=True) print(hf_model) - config = AutoConfig.from_pretrained(load_dir, trust_remote_code=trust_remote_code) + config = AutoConfig.from_pretrained( + load_dir, trust_remote_code=trust_remote_code, local_files_only=True) global llm_arch llm_arch = config.llm_config.architectures[0] return hf_model diff --git a/examples/llava1.5/evaluate_llava1_5.json b/examples/llava1.5/evaluate_llava1_5.json index deba5834ac54a2fa88fb31a5ec2467cb8dbf3017..a93a82c100537e38651674b19b347cd63eb07cf9 100644 --- a/examples/llava1.5/evaluate_llava1_5.json +++ b/examples/llava1.5/evaluate_llava1_5.json @@ -87,7 +87,6 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "./llava_7b", - "local_files_only": false, "use_fast": false }, "generation_config": { diff --git a/examples/llava1.5/inference_llava.json b/examples/llava1.5/inference_llava.json index 38ba3b3c26c603968bc1a000fd06580330430965..c18e068cb918d77633db1bb237aabc357df61a13 100644 --- a/examples/llava1.5/inference_llava.json +++ b/examples/llava1.5/inference_llava.json @@ -89,8 +89,7 @@ "tokenizer":{ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "llava_weights/vicuna-7b-v1.5", - "local_files_only": false + "from_pretrained": "llava_weights/vicuna-7b-v1.5" }, "generation_config":{ "bos_token_id": 1, diff --git a/examples/llava1.5/vicuna_converter.py b/examples/llava1.5/vicuna_converter.py index ab95b13e6eac9b88d90e63f76fd25298aa2e81fc..3e464140f99909857860a8befa522b87ebc4ff46 100644 --- a/examples/llava1.5/vicuna_converter.py +++ b/examples/llava1.5/vicuna_converter.py @@ -7,11 +7,14 @@ from transformers import AutoModelForCausalLM, AutoConfig def load_from_hf(load_dir, trust_remote_code): # Load Huggingface model. - hf_model = AutoModelForCausalLM.from_pretrained(load_dir, device_map='cpu', trust_remote_code=trust_remote_code, - torch_dtype=torch.bfloat16, local_files_only=True) + hf_model = AutoModelForCausalLM.from_pretrained( + load_dir, device_map='cpu', + trust_remote_code=trust_remote_code, + torch_dtype=torch.bfloat16, local_files_only=True) print(hf_model) - config = AutoConfig.from_pretrained(load_dir, trust_remote_code=trust_remote_code) - + config = AutoConfig.from_pretrained( + load_dir, trust_remote_code=trust_remote_code, local_files_only=True) + return hf_model, config @@ -26,7 +29,7 @@ def merge_qkv(wq, wk, wv, ng=32): qkv[j * d : j * d + dq, :] = wq[j * dq : (j + 1) * dq, :] qkv[j * d + dq : j * d + dq + dkv, :] = wk[j * dkv : (j + 1) * dkv, :] qkv[j * d + dq + dkv : j * d + dq + dkv * 2, :] = wv[j * dkv : (j + 1) * dkv, :] - + return qkv diff --git a/examples/opensora1.0/inference_model_120x256x256.json b/examples/opensora1.0/inference_model_120x256x256.json index 31fe16d5502a0d403b9d9c027862e7406ce1f519..e8d7ddc32bf4272bf17db399a7c279ac44eeef39 100644 --- a/examples/opensora1.0/inference_model_120x256x256.json +++ b/examples/opensora1.0/inference_model_120x256x256.json @@ -10,7 +10,6 @@ "hub_backend": "hf", "model_id": "T5", "from_pretrained": "DeepFloyd/t5-v1_1-xxl", - "local_files_only": false, "low_cpu_mem_usage": true, "dtype": "fp32" }, @@ -18,7 +17,6 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "DeepFloyd/t5-v1_1-xxl", - "local_files_only": false, "model_max_length": 120 }, "predictor": { diff --git a/examples/opensora1.0/inference_model_16x512x512.json b/examples/opensora1.0/inference_model_16x512x512.json index d382f2665d7863b2dada9fb1d632fef639d63b0b..5965d4b0096223ecfdab9decffce4c32707cd19b 100644 --- a/examples/opensora1.0/inference_model_16x512x512.json +++ b/examples/opensora1.0/inference_model_16x512x512.json @@ -10,7 +10,6 @@ "hub_backend": "hf", "model_id": "T5", "from_pretrained": "DeepFloyd/t5-v1_1-xxl", - "local_files_only": false, "low_cpu_mem_usage": true, "dtype": "fp32" }, @@ -18,7 +17,6 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "DeepFloyd/t5-v1_1-xxl", - "local_files_only": false, "model_max_length": 120 }, "predictor": { diff --git a/examples/opensora1.2/inference_model_102x720x1280.json b/examples/opensora1.2/inference_model_102x720x1280.json index 928ada67672256ba0dde2c8949ec65d60dad6007..469e945a685647e9428b86ec5b6ce74197dad08d 100644 --- a/examples/opensora1.2/inference_model_102x720x1280.json +++ b/examples/opensora1.2/inference_model_102x720x1280.json @@ -15,7 +15,6 @@ "hub_backend": "hf", "model_id": "T5", "from_pretrained": "DeepFloyd/t5-v1_1-xxl", - "local_files_only": false, "low_cpu_mem_usage": true, "dtype": "fp32" }, @@ -23,7 +22,6 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "DeepFloyd/t5-v1_1-xxl", - "local_files_only": false, "model_max_length":300 }, "predictor": { diff --git a/examples/opensoraplan1.2/inference_model_29x480x640.json b/examples/opensoraplan1.2/inference_model_29x480x640.json index 4c5548d46c30601eae9470b0d00c969596edab10..8cc08231bb1f06393b85d5a19f6fdb046f3c0105 100644 --- a/examples/opensoraplan1.2/inference_model_29x480x640.json +++ b/examples/opensoraplan1.2/inference_model_29x480x640.json @@ -47,7 +47,6 @@ "hub_backend": "hf", "model_id": "MT5", "from_pretrained": "./weights/google/mt5-xxl", - "local_files_only": false, "low_cpu_mem_usage": true, "dtype": "fp16" }, @@ -55,7 +54,6 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "./opensoraplanv12/weights/mt5", - "local_files_only": false, "model_max_length": 512 }, "predictor": { diff --git a/examples/opensoraplan1.3/i2v/inference_i2v_model.json b/examples/opensoraplan1.3/i2v/inference_i2v_model.json index cf26436c99984f916c764ac4229e0ba6ec95e7e7..a2050288b1fc39150333d272e4a7e09817526a4a 100644 --- a/examples/opensoraplan1.3/i2v/inference_i2v_model.json +++ b/examples/opensoraplan1.3/i2v/inference_i2v_model.json @@ -1,4 +1,4 @@ -{ +{ "predictor": { "model_id": "videoditsparsei2v", "from_pretrained": "./weights/sparsedit/sparsediti2v_mm.pth", @@ -49,10 +49,9 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "./weights/google/mt5-xxl", - "local_files_only": false, "model_max_length": 512 }, - + "diffusion": { "model_id": "EulerAncestralDiscrete", "num_inference_steps":100, diff --git a/examples/opensoraplan1.3/t2v/inference_t2v_model.json b/examples/opensoraplan1.3/t2v/inference_t2v_model.json index bcda965eb6c82f54d948df87cb09a515f9cb4617..249dce8fd8951e39645b09dc3557fd7e0f6bb592 100644 --- a/examples/opensoraplan1.3/t2v/inference_t2v_model.json +++ b/examples/opensoraplan1.3/t2v/inference_t2v_model.json @@ -1,4 +1,4 @@ -{ +{ "predictor": { "model_id": "videoditsparse", "from_pretrained": "./weights/sparsedit/sparsedit_mm.pth", @@ -49,10 +49,9 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "./weights/google/mt5-xxl", - "local_files_only": false, "model_max_length": 512 }, - + "diffusion": { "model_id": "EulerAncestralDiscrete", "num_inference_steps":100, diff --git a/examples/qihoo_t2x/inference_model_image.json b/examples/qihoo_t2x/inference_model_image.json index 9878e1ed7beb7d8dce775217b297da6c5b1cb39e..0494baad1bad45f4e1399f25033bd3775ab1515d 100644 --- a/examples/qihoo_t2x/inference_model_image.json +++ b/examples/qihoo_t2x/inference_model_image.json @@ -47,15 +47,13 @@ "hub_backend": "hf", "model_id": "T5", "from_pretrained": "./pretrain_models/text_encoder", - "local_files_only": false, "low_cpu_mem_usage": true, "dtype": "fp32" }, "tokenizer":{ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "./pretrain_models/tokenizer", - "local_files_only": false + "from_pretrained": "./pretrain_models/tokenizer" }, "predictor": { "dtype": "bf16", diff --git a/examples/qwen2vl/evaluate_qwen2vl_7b.json b/examples/qwen2vl/evaluate_qwen2vl_7b.json index fba234e0c994e0bee0003e10ddf29a5132729ad1..8c77e68ad40de54515febe3aa506091e181f6d7c 100644 --- a/examples/qwen2vl/evaluate_qwen2vl_7b.json +++ b/examples/qwen2vl/evaluate_qwen2vl_7b.json @@ -87,8 +87,7 @@ "tokenizer": { "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "./Qwen2-VL-7B-Instruct", - "local_files_only":false + "from_pretrained": "./Qwen2-VL-7B-Instruct" }, "generation_config": { "bos_token_id": 151643, diff --git a/examples/qwen2vl/inference_qwen2vl_2b.json b/examples/qwen2vl/inference_qwen2vl_2b.json index 0f9aa05201a263ed1e571513069308a1f3d5e426..8e5fdcdd32d651bf155bda688bceca7f212129c1 100644 --- a/examples/qwen2vl/inference_qwen2vl_2b.json +++ b/examples/qwen2vl/inference_qwen2vl_2b.json @@ -85,8 +85,7 @@ "tokenizer": { "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "ckpt/hf_path/Qwen2-VL-2B-Instruct", - "local_files_only":false + "from_pretrained": "ckpt/hf_path/Qwen2-VL-2B-Instruct" }, "generation_config": { "bos_token_id": 151643, diff --git a/examples/qwen2vl/inference_qwen2vl_72b.json b/examples/qwen2vl/inference_qwen2vl_72b.json index 64e23985e7fc8c8075024ad3a8b4b6f30cc8a6c3..5a39ca49a6bf6cf626daddc7174c2b1d30441746 100644 --- a/examples/qwen2vl/inference_qwen2vl_72b.json +++ b/examples/qwen2vl/inference_qwen2vl_72b.json @@ -84,8 +84,7 @@ "tokenizer": { "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "ckpt/hf_path/Qwen2-VL-72B-Instruct", - "local_files_only":false + "from_pretrained": "ckpt/hf_path/Qwen2-VL-72B-Instruct" }, "generation_config": { "bos_token_id": 151643, diff --git a/examples/qwen2vl/inference_qwen2vl_7b.json b/examples/qwen2vl/inference_qwen2vl_7b.json index 4fc01c72f8eaf2568eba0d35f9559cad03f05886..6421d771896d15a7a7c27cace31c6d373b1ebc8c 100644 --- a/examples/qwen2vl/inference_qwen2vl_7b.json +++ b/examples/qwen2vl/inference_qwen2vl_7b.json @@ -85,8 +85,7 @@ "tokenizer": { "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "ckpt/hf_path/Qwen2-VL-7B-Instruct", - "local_files_only":false + "from_pretrained": "ckpt/hf_path/Qwen2-VL-7B-Instruct" }, "generation_config": { "bos_token_id": 151643, diff --git a/examples/whisper/pretrain_whisper.sh b/examples/whisper/pretrain_whisper.sh index 4fa7cb12324b86fe59fa1fdcdf6f42a2d9eebc63..6927fa330750b1b4f44af5c47c7f92dd502a7ad9 100644 --- a/examples/whisper/pretrain_whisper.sh +++ b/examples/whisper/pretrain_whisper.sh @@ -65,6 +65,7 @@ GPT_ARGS=" --overlap-grad-reduce \ --overlap-param-gather \ --num-workers 4 \ + --trust-remote-code \ " MM_ARGS=" diff --git a/inference_qihoo.py b/inference_qihoo.py index cf1ab3c543ac5c42bf954e45a99a37a4a268958b..a331df3f1fa19b97b99a2ba7e0b80b114a72853d 100644 --- a/inference_qihoo.py +++ b/inference_qihoo.py @@ -6,6 +6,7 @@ from megatron.training.initialize import initialize_megatron from megatron.training import get_args from mindspeed_mm.configs.config import merge_mm_args, mm_extra_args_provider +from mindspeed_mm.arguments import extra_args_provider_decorator from mindspeed_mm.tasks.inference.pipeline import sora_pipeline_dict from mindspeed_mm.tasks.inference.pipeline.utils.sora_utils import save_videos, load_prompts, save_image_or_videos from mindspeed_mm.models.predictor import PredictModel @@ -37,7 +38,7 @@ def prepare_pipeline(args, device): def main(): - initialize_megatron(extra_args_provider=mm_extra_args_provider, args_defaults={}) + initialize_megatron(extra_args_provider=extra_args_provider_decorator(mm_extra_args_provider), args_defaults={}) args = get_args() merge_mm_args(args) args = args.mm.model diff --git a/inference_vlm.py b/inference_vlm.py index d82b97df0af7a157f81f8eae6eeaa7b02b110fa8..245609c6133003bcf49d9e903e0ae3e229919d26 100644 --- a/inference_vlm.py +++ b/inference_vlm.py @@ -4,6 +4,7 @@ import mindspeed.megatron_adaptor from megatron.training import get_args from mindspeed_mm.tasks.inference.pipeline import vlm_pipeline_dict from mindspeed_mm.configs.config import mm_extra_args_provider +from mindspeed_mm.arguments import extra_args_provider_decorator def main(): @@ -14,7 +15,7 @@ def main(): torch.set_grad_enabled(False) initialize_megatron( - extra_args_provider=mm_extra_args_provider, args_defaults={'tokenizer_type': 'GPT2BPETokenizer'} + extra_args_provider=extra_args_provider_decorator(mm_extra_args_provider), args_defaults={'tokenizer_type': 'GPT2BPETokenizer'} ) args = get_args() merge_mm_args(args) diff --git a/mindspeed_mm/arguments.py b/mindspeed_mm/arguments.py index 51d17339ab2995a4152d1e8d4e75fb282d7d5f52..47fa1b2434fd9daadbb04d840577ecd2aba38dc2 100644 --- a/mindspeed_mm/arguments.py +++ b/mindspeed_mm/arguments.py @@ -34,6 +34,7 @@ def process_args(parser): parser = _add_network_size_args(parser) parser = _add_dummy_optimizer_args(parser) parser = _add_logging_args(parser) + parser = _add_security_args(parser) return parser @@ -75,7 +76,9 @@ def _add_training_args(parser): action='store_true', default=False, help='Use internal format to train') - group.add_argument('--virtual-pipeline-model-parallel-size', type=int, default=None, + group.add_argument('--virtual-pipeline-model-parallel-size', + type=int, + default=None, help='vpp size') group.add_argument('--encoder-dp-balance', action='store_true', @@ -115,11 +118,22 @@ def _add_dummy_optimizer_args(parser): def _add_logging_args(parser): - group = parser.add_argument_group(title='mm_logging') + group = parser.add_argument_group(title='logging') group.add_argument('--log-tps', action='store_true', default=False, help='calculate and log average tokens per sample') - - return parser \ No newline at end of file + + return parser + + +def _add_security_args(parser): + group = parser.add_argument_group(title='security configuration') + + group.add_argument('--trust-remote-code', + action='store_true', + default=False, + help='Whether or not to allow for custom models defined on the Hub in their own modeling files.') + + return parser diff --git a/mindspeed_mm/data/datasets/audio_dataset.py b/mindspeed_mm/data/datasets/audio_dataset.py index c53bf8a033ebc17feca6c75f0b3ee5a15d855673..d9265b2ac0d9359552c4f1c41af466f9ec8d82ac 100644 --- a/mindspeed_mm/data/datasets/audio_dataset.py +++ b/mindspeed_mm/data/datasets/audio_dataset.py @@ -14,6 +14,7 @@ from datasets import Audio, load_dataset from torch.utils.data import Dataset from transformers import WhisperProcessor +from megatron.training import get_args class AudioDataset(Dataset): @@ -40,7 +41,7 @@ class AudioDataset(Dataset): dataset_name_or_path, language, split="train+validation", - trust_remote_code=True, + trust_remote_code=get_args().trust_remote_code, ) train_dataset = train_dataset.remove_columns( [ @@ -59,7 +60,7 @@ class AudioDataset(Dataset): processor = WhisperProcessor.from_pretrained( processor_name_or_path, language=processor_language, - task=task, + task=task, local_files_only=True, ) feature_extractor = processor.feature_extractor diff --git a/mindspeed_mm/models/text_encoder/text_encoder.py b/mindspeed_mm/models/text_encoder/text_encoder.py index b6c150de8d772a712eb44bd25990bcf3fcd8ac50..2aad5aefa15f55a2d3796901d3715db4be5f1f81 100644 --- a/mindspeed_mm/models/text_encoder/text_encoder.py +++ b/mindspeed_mm/models/text_encoder/text_encoder.py @@ -1,6 +1,7 @@ import importlib import torch import torch.nn as nn + from mindspeed_mm.utils.utils import get_dtype @@ -25,18 +26,18 @@ class TextEncoder(nn.Module): "backend": type-str, "hf" or "om", "model_id": type-str, "AutoModel" or other automodel name, "dtype": type-str, dtype of text encoder - + (2) args for automodel.from_pretrained() of transformers or openmind "pretrained_model_name_or_path": type-str, local path or hub path, "local_files_only": type-bool, ... } - - If `config` is a list of dictionaries, each dictionary in the list will be used to instantiate a separate Text Encoder Model instance, + - If `config` is a list of dictionaries, each dictionary in the list will be used to instantiate a separate Text Encoder Model instance, effectively allowing the creation of multiple Text Encoder based on different configurations. """ def __init__(self, config): super().__init__() - + if isinstance(config, list) or isinstance(config, tuple): self.text_encoders = nn.ModuleList() for config_i in config: @@ -59,7 +60,7 @@ class TextEncoder(nn.Module): else: outputs = self._single_encode(self.text_encoders, input_ids, mask) return outputs - + def _single_encode(self, text_encoder, input_ids, attention_mask, **kwargs): *BN, L = input_ids.shape input_ids = input_ids.to(text_encoder.device).view(-1, L) @@ -90,16 +91,16 @@ class TextEncoder(nn.Module): ) * emb ) - + if text_encoder.output_key in ["last_hidden_state", "hidden_states"]: emb = emb.view(*BN, L, -1) elif text_encoder.output_key in ["pooler_output"]: emb = emb.view(*BN, -1) else: raise NotImplementedError(f"Text encoder output_key: {text_encoder.output_key} is not implenmented! ") - - return emb - + + return emb + def _init_text_encoder(self, config): if not isinstance(config, dict): config = config.to_dict() @@ -116,6 +117,12 @@ class TextEncoder(nn.Module): self.automodel_name = TEXT_ENCODER_MAPPING[model_id] config["pretrained_model_name_or_path"] = config.pop("from_pretrained") config["torch_dtype"] = get_dtype(config.pop("dtype")) + config["local_files_only"] = True + try: + from megatron.training import get_args + config["trust_remote_code"] = get_args().trust_remote_code + except (ImportError, AssertionError): + config["trust_remote_code"] = False # Only huggingface backend is supported, OpenMind backend will be supported soon. module = importlib.import_module("transformers") diff --git a/mindspeed_mm/models/text_encoder/tokenizer.py b/mindspeed_mm/models/text_encoder/tokenizer.py index 3a8754196666d6459ff24fce88da394a98b1067c..2e9ec259dfdb1a8a7761569dcce48a7b82e17e6f 100644 --- a/mindspeed_mm/models/text_encoder/tokenizer.py +++ b/mindspeed_mm/models/text_encoder/tokenizer.py @@ -1,5 +1,4 @@ import importlib -from torch import nn class Tokenizer: @@ -20,7 +19,7 @@ class Tokenizer: "local_files_only": type-bool, ... } - - If `config` is a list of dictionaries, each dictionary in the list will be used to instantiate a separate Tokenizer instance, + - If `config` is a list of dictionaries, each dictionary in the list will be used to instantiate a separate Tokenizer instance, effectively allowing the creation of multiple tokenizers based on different configurations. """ @@ -32,11 +31,11 @@ class Tokenizer: tokenizer_i = self._init_tokenizer(module, config_i) self.tokenizers.append(tokenizer_i) else: - self.tokenizers = self._init_tokenizer(module, config) + self.tokenizers = self._init_tokenizer(module, config) def get_tokenizer(self): return self.tokenizers - + def _init_tokenizer(self, module, config): if not isinstance(config, dict): config = config.to_dict() @@ -45,5 +44,11 @@ class Tokenizer: self.backend = config.pop("hub_backend") tokenizer_name = config.pop("autotokenizer_name") config["pretrained_model_name_or_path"] = config.pop("from_pretrained") + config["local_files_only"] = True + try: + from megatron.training import get_args + config["trust_remote_code"] = get_args().trust_remote_code + except (ImportError, AssertionError): + config["trust_remote_code"] = False tokenizer_cls = getattr(module, tokenizer_name) return tokenizer_cls.from_pretrained(**config) \ No newline at end of file diff --git a/tests/st/run_configs/finetune_internvl2_8B/data_8B.json b/tests/st/run_configs/finetune_internvl2_8B/data_8B.json index 8cc38468ee84fdc3f558c16a60e8d008ca0b8310..bf61000cca1522bf3f5325058ce7098d3b4f5297 100644 --- a/tests/st/run_configs/finetune_internvl2_8B/data_8B.json +++ b/tests/st/run_configs/finetune_internvl2_8B/data_8B.json @@ -23,7 +23,6 @@ "from_pretrained": "/home/ci_resource/models/InternVL2-8B/pretrained/raw_ckpt/InternVL2-8B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "use_text_processer": true, diff --git a/tests/st/run_configs/inference_qwen2vl_7B_pp1/inference_qwen2vl_7b.json b/tests/st/run_configs/inference_qwen2vl_7B_pp1/inference_qwen2vl_7b.json index 7df1c60b45b64857d1e4fc984229b207270ee26a..4bf6067b204c9c6e1f45aae04e60ef22039cef02 100644 --- a/tests/st/run_configs/inference_qwen2vl_7B_pp1/inference_qwen2vl_7b.json +++ b/tests/st/run_configs/inference_qwen2vl_7B_pp1/inference_qwen2vl_7b.json @@ -85,8 +85,7 @@ "tokenizer": { "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "/home/ci_resource/models/qwen2vl_7b/qwen2vl7b", - "local_files_only":false + "from_pretrained": "/home/ci_resource/models/qwen2vl_7b/qwen2vl7b" }, "generation_config": { "bos_token_id": 151643, diff --git a/tests/st/run_configs/inference_qwen2vl_7B_pp4/inference_qwen2vl_7b.json b/tests/st/run_configs/inference_qwen2vl_7B_pp4/inference_qwen2vl_7b.json index 7df1c60b45b64857d1e4fc984229b207270ee26a..4bf6067b204c9c6e1f45aae04e60ef22039cef02 100644 --- a/tests/st/run_configs/inference_qwen2vl_7B_pp4/inference_qwen2vl_7b.json +++ b/tests/st/run_configs/inference_qwen2vl_7B_pp4/inference_qwen2vl_7b.json @@ -85,8 +85,7 @@ "tokenizer": { "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "/home/ci_resource/models/qwen2vl_7b/qwen2vl7b", - "local_files_only":false + "from_pretrained": "/home/ci_resource/models/qwen2vl_7b/qwen2vl7b" }, "generation_config": { "bos_token_id": 151643, diff --git a/tests/st/shell_scripts/finetune_internvl2_8B.sh b/tests/st/shell_scripts/finetune_internvl2_8B.sh index 16fb69b38c7bfce5b97f3fc357d36e7c4fd8ccd4..e006622aaecc0eedeb3806ec46369e7df45ddce1 100644 --- a/tests/st/shell_scripts/finetune_internvl2_8B.sh +++ b/tests/st/shell_scripts/finetune_internvl2_8B.sh @@ -84,6 +84,7 @@ GPT_ARGS=" --normalization RMSNorm \ --use-fused-rmsnorm \ --num-workers 4 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/tests/ut/models/text_encoder/test_text_encoder_processor.py b/tests/ut/models/text_encoder/test_text_encoder_processor.py index fd19421552a5a25ead4153938508dc172c2a12b8..2ae206c617b624857d3f6367a4a07b685572d9b8 100644 --- a/tests/ut/models/text_encoder/test_text_encoder_processor.py +++ b/tests/ut/models/text_encoder/test_text_encoder_processor.py @@ -26,7 +26,7 @@ class TestTextEncoder: text_encoder_dict = { "hub_backend": "hf", "model_id": "T5", - "dtype": "bf16", + "dtype": "bf16", "from_pretrained": T5_MODEL_PATH, } tokenizer_dict = {