From 1eaa2b174e2097b1a55bc81d6491a8019749b8da Mon Sep 17 00:00:00 2001 From: htwang Date: Tue, 25 Feb 2025 10:04:49 +0800 Subject: [PATCH 1/4] support local files only --- checkpoint/internvl2_hf_to_mm.py | 10 ++++--- checkpoint/utils.py | 8 +++--- examples/diffusers/sana/patch_sana.py | 3 ++- examples/hunyuanvideo/convert_ckpt_to_mm.py | 23 ++++++++-------- .../internvl2.5_convert_to_mm_ckpt.py | 26 ++++++++++--------- .../internvl2/internvl2_convert_to_mm_ckpt.py | 6 +++-- examples/llava1.5/vicuna_converter.py | 10 ++++--- mindspeed | 1 + mindspeed_mm/arguments.py | 18 ++++++++++--- .../models/text_encoder/text_encoder.py | 18 ++++++------- mindspeed_mm/models/text_encoder/tokenizer.py | 6 ++--- .../test_text_encoder_processor.py | 2 +- 12 files changed, 77 insertions(+), 54 deletions(-) create mode 120000 mindspeed diff --git a/checkpoint/internvl2_hf_to_mm.py b/checkpoint/internvl2_hf_to_mm.py index 62f037aa..35f25e6e 100644 --- a/checkpoint/internvl2_hf_to_mm.py +++ b/checkpoint/internvl2_hf_to_mm.py @@ -17,8 +17,10 @@ from checkpoint.utils import ConvertVppMMConfig def load_from_hf(load_dir, trust_remote_code): # Load Huggingface model. - hf_model = AutoModelForCausalLM.from_pretrained(load_dir, device_map='cpu', trust_remote_code=trust_remote_code, - local_files_only=True) + hf_model = AutoModelForCausalLM.from_pretrained( + load_dir, device_map='cpu', + trust_remote_code=trust_remote_code, + local_files_only=True) print(hf_model) return hf_model @@ -274,7 +276,7 @@ def main(convert_config: ConvertVppMMConfig): pp_size = parallel_config.pp_size vp_size = parallel_config.vpp_size - + pp_split = merge_pp_index( vit_pipeline_num_layers=parallel_config.vit_pp_layers, llm_pipeline_num_layers=parallel_config.llm_pp_layers @@ -288,7 +290,7 @@ def main(convert_config: ConvertVppMMConfig): if len(remains) > 0: print(remains) raise RuntimeWarning("There are some weights ungrouped.") - + for rank, pipeline_state_dict in enumerate(pipeline_state_dicts): print(20 * '#', f'stage {rank}', 20 * '#') for key, value in pipeline_state_dict.items(): diff --git a/checkpoint/utils.py b/checkpoint/utils.py index ee702b84..b995beaa 100644 --- a/checkpoint/utils.py +++ b/checkpoint/utils.py @@ -42,7 +42,7 @@ class ParallelConfig(BaseModel): if len(self.vit_pp_layers) < 1: raise ValueError("pp layers长度至少为1") return self - + class VppParallelConfig(BaseModel): """权模型切分配置,包括tp的size,以及pp切分时vit和llm在pp域每张卡上切分的层数""" @@ -59,7 +59,7 @@ class VppParallelConfig(BaseModel): @computed_field def pp_size(self) -> PositiveInt: return len(self.llm_pp_layers[0]) - + @computed_field def vpp_size(self) -> PositiveInt: return len(self.llm_pp_layers) @@ -71,7 +71,7 @@ class VppParallelConfig(BaseModel): if len(self.vit_pp_layers) < 1: raise ValueError("pp layers长度至少为1") return self - + @model_validator(mode='after') def validate_vpp_layers(self) -> "VppParallelConfig": pp_size = self.pp_size @@ -147,7 +147,7 @@ class ConvertResplitConfig(BaseModel): if sum(self.source_parallel_config.llm_pp_layers) != sum(self.target_parallel_config.llm_pp_layers): raise ValueError("llm pp layers not equal!") return self - + # BaseModel/dataclasses注意要在field的下一行添加描述说明 class ConvertVppMMConfig(BaseModel): diff --git a/examples/diffusers/sana/patch_sana.py b/examples/diffusers/sana/patch_sana.py index 7f943b60..362cd5fc 100644 --- a/examples/diffusers/sana/patch_sana.py +++ b/examples/diffusers/sana/patch_sana.py @@ -72,7 +72,8 @@ def create_load_model_hook( raise ValueError(f"unexpected save model: {model.__class__}") else: transformer_ = SanaTransformer2DModel.from_pretrained( - args.pretrained_model_name_or_path, subfolder="transformer" + args.pretrained_model_name_or_path, subfolder="transformer", + local_files_only=True ) # Make sure the trainable params are in float32. This is again needed since the base models diff --git a/examples/hunyuanvideo/convert_ckpt_to_mm.py b/examples/hunyuanvideo/convert_ckpt_to_mm.py index c12ee1b8..2824b2bf 100644 --- a/examples/hunyuanvideo/convert_ckpt_to_mm.py +++ b/examples/hunyuanvideo/convert_ckpt_to_mm.py @@ -40,7 +40,8 @@ def preprocess_text_encoder_tokenizer(source_dir, save_dir): model = LlavaForConditionalGeneration.from_pretrained( source_dir, torch_dtype=torch.float16, - low_cpu_mem_usage=True + low_cpu_mem_usage=True, + local_files_only=True ) model.language_model.save_pretrained(save_dir) processor.tokenizer.save_pretrained(save_dir) @@ -122,7 +123,7 @@ def get_tp_split_layer_names( f"single_blocks.{index}.linear1.weight", f"single_blocks.{index}.linear1.bias", ] - + return ( column_parallel_linears, row_parallel_linears, @@ -165,7 +166,7 @@ def split_by_tp( new_state_dict[name] = torch.chunk(state_dict[name], tp_size, dim=0)[tp_rank] for name in row_parallel_linears: new_state_dict[name] = torch.chunk(state_dict[name], tp_size, dim=1)[tp_rank] - + for name in qkv_fused_projs: wq, wk, wv = torch.chunk(state_dict[name], 3, dim=0) wq = torch.chunk(wq, tp_size, dim=0)[tp_rank] @@ -210,7 +211,7 @@ def merge_by_tp( if tp_size == 1: return state_dicts - + merged_state_dict = copy.deepcopy(state_dicts[0]) ( column_parallel_linears, @@ -234,7 +235,7 @@ def merge_by_tp( [state_dicts[tp_rank][name] for tp_rank in range(tp_size)], dim=1 ) - + for name in qkv_fused_projs: wq = torch.cat( [torch.chunk(state_dicts[tp_rank][name], 3, dim=0)[0] for tp_rank in range(tp_size)], @@ -270,7 +271,7 @@ def merge_by_tp( dim=0 ) merged_state_dict[name] = torch.cat([wq, wk, wv, wmlp], dim=0) - + for name in x_mlp_fused_row_parallel_linear: wx = torch.cat( [state_dicts[tp_rank][name][:, :hidden_size // tp_size] for tp_rank in range(tp_size)], @@ -281,7 +282,7 @@ def merge_by_tp( dim=1, ) merged_state_dict[name] = torch.cat([wx, wmlp], dim=1) - + return merged_state_dict @@ -301,7 +302,7 @@ def load_state_dicts_by_tp(load_dir: str, tp_size: int = 2) -> List[Dict[str, An state_dict_path = os.path.join(load_dir, directory, f"mp_rank_{tp_rank:02d}", "model_optim_rng.pt") tp_state_dicts.append(torch.load(state_dict_path)['model']) - return tp_state_dicts + return tp_state_dicts def save(state_dicts: List[Dict], save_dir: str, latest_checkpointed_iteration="release"): @@ -316,7 +317,7 @@ def save(state_dicts: List[Dict], save_dir: str, latest_checkpointed_iteration=" directory = 'release' else: directory = 'iter_{:07d}'.format(latest_checkpointed_iteration) - + for tp_rank, state_dict in enumerate(state_dicts): os.makedirs(os.path.join(save_dir, directory, f"mp_rank_{tp_rank:02d}")) save_path = os.path.join(save_dir, directory, f"mp_rank_{tp_rank:02d}", "model_optim_rng.pt") @@ -331,7 +332,7 @@ def get_args(): parser.add_argument("--source_path", type=str, default="./transformers/mp_rank_00/model_states.pt", help="Source path of checkpoint") parser.add_argument("--target_path", type=str, default="./ckpt/hunyuanvideo/", help="Save path of MM checkpoint") parser.add_argument("--tp_size", type=int, default=2, help="Tensor model parallel world size") - parser.add_argument("--mode", type=str, default="split", choices=["split", "merge"], + parser.add_argument("--mode", type=str, default="split", choices=["split", "merge"], help="Split mode is used to split the pretrained weights according to tp_size before training, \ and Merge mode is used to merge weights based on tp_size after training is completed") @@ -345,7 +346,7 @@ if __name__ == "__main__": if args.module == "text_encoder": preprocess_text_encoder_tokenizer(args.source_path, args.target_path) else: - if args.mode == "split": + if args.mode == "split": source_state_dict = torch.load(args.source_path, map_location='cpu')['module'] state_dict = replace_state_dict(source_state_dict, convert_mapping=DIT_CONVERT_MAPPING) state_dicts = split_by_tp( diff --git a/examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py b/examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py index a7be244a..31b5113d 100644 --- a/examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py +++ b/examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py @@ -3,7 +3,7 @@ import os from copy import deepcopy from dataclasses import dataclass import stat -import re +import re import torch from transformers import AutoModelForCausalLM, AutoConfig @@ -78,8 +78,10 @@ model_config_dict = { def load_from_hf(load_dir, trust_remote_code): # Load Huggingface model. - hf_model = AutoModelForCausalLM.from_pretrained(load_dir, device_map='cpu', trust_remote_code=trust_remote_code, - local_files_only=True) + hf_model = AutoModelForCausalLM.from_pretrained( + load_dir, device_map='cpu', + trust_remote_code=trust_remote_code, + local_files_only=True) print(hf_model) config = AutoConfig.from_pretrained(load_dir, trust_remote_code=trust_remote_code) global llm_arch @@ -187,7 +189,7 @@ def convert_hg_to_mm(_state_dict, model_config, num_key_value_heads): new_key = new_key.replace('post_attention_layernorm', 'pre_mlp_layernorm') new_key = new_key.replace('gate_proj', 'linear_fc1_gate') new_key = new_key.replace('up_proj', 'linear_fc1_up') - new_key = new_key.replace('down_proj', 'linear_fc2') + new_key = new_key.replace('down_proj', 'linear_fc2') new_key = new_key.replace('model.norm', 'decoder.final_layernorm') new_key = new_key.replace('model.embed_tokens', 'embedding.word_embeddings') @@ -222,10 +224,10 @@ def convert_hg_to_mm(_state_dict, model_config, num_key_value_heads): wv = new_dict[v_name] else: raise AssertionError(f'Missing key {v_name}') - + q_chunks = torch.chunk(wq, num_key_value_heads, dim=0) k_chunks = torch.chunk(wk, num_key_value_heads, dim=0) - v_chunks = torch.chunk(wv, num_key_value_heads, dim=0) + v_chunks = torch.chunk(wv, num_key_value_heads, dim=0) all_chunks = [] for j in range(num_key_value_heads): all_chunks.append(q_chunks[j]) @@ -260,10 +262,10 @@ def convert_hg_to_mm(_state_dict, model_config, num_key_value_heads): wv = new_dict[v_name] else: raise AssertionError(f'Missing key {v_name}') - + q_chunks = torch.chunk(wq, num_key_value_heads, dim=0) k_chunks = torch.chunk(wk, num_key_value_heads, dim=0) - v_chunks = torch.chunk(wv, num_key_value_heads, dim=0) + v_chunks = torch.chunk(wv, num_key_value_heads, dim=0) all_chunks = [] for j in range(num_key_value_heads): all_chunks.append(q_chunks[j]) @@ -276,7 +278,7 @@ def convert_hg_to_mm(_state_dict, model_config, num_key_value_heads): if k_name in new_dict: new_dict.pop(k_name) if v_name in new_dict: - new_dict.pop(v_name) + new_dict.pop(v_name) # 合并mlp的gate和up权重 @@ -433,17 +435,17 @@ if __name__ == '__main__': model_config = get_model_config( args.model_size, args.vpp) pp_split = merge_pp_index(model_config) - + for key, value in state_dict.items(): print(key, value.shape) state_dict = convert_hg_to_mm(state_dict, model_config, num_key_value_heads) pipeline_state_dicts, remains = split_model_by_pipeline(state_dict, pp_split) - + if len(remains) > 0: print(remains) raise RuntimeWarning("There are some weights ungrouped.") - + for rank, pipeline_state_dict in enumerate(pipeline_state_dicts): print(20 * '#', f'stage {rank}', 20 * '#') for key, value in pipeline_state_dict.items(): diff --git a/examples/internvl2/internvl2_convert_to_mm_ckpt.py b/examples/internvl2/internvl2_convert_to_mm_ckpt.py index 6c6379d2..b2346ab2 100644 --- a/examples/internvl2/internvl2_convert_to_mm_ckpt.py +++ b/examples/internvl2/internvl2_convert_to_mm_ckpt.py @@ -13,8 +13,10 @@ llm_arch = '' def load_from_hf(load_dir, trust_remote_code): # Load Huggingface model. - hf_model = AutoModelForCausalLM.from_pretrained(load_dir, device_map='cpu', trust_remote_code=trust_remote_code, - local_files_only=True) + hf_model = AutoModelForCausalLM.from_pretrained( + load_dir, device_map='cpu', + trust_remote_code=trust_remote_code, + local_files_only=True) print(hf_model) config = AutoConfig.from_pretrained(load_dir, trust_remote_code=trust_remote_code) global llm_arch diff --git a/examples/llava1.5/vicuna_converter.py b/examples/llava1.5/vicuna_converter.py index ab95b13e..04727556 100644 --- a/examples/llava1.5/vicuna_converter.py +++ b/examples/llava1.5/vicuna_converter.py @@ -7,11 +7,13 @@ from transformers import AutoModelForCausalLM, AutoConfig def load_from_hf(load_dir, trust_remote_code): # Load Huggingface model. - hf_model = AutoModelForCausalLM.from_pretrained(load_dir, device_map='cpu', trust_remote_code=trust_remote_code, - torch_dtype=torch.bfloat16, local_files_only=True) + hf_model = AutoModelForCausalLM.from_pretrained( + load_dir, device_map='cpu', + trust_remote_code=trust_remote_code, + torch_dtype=torch.bfloat16, local_files_only=True) print(hf_model) config = AutoConfig.from_pretrained(load_dir, trust_remote_code=trust_remote_code) - + return hf_model, config @@ -26,7 +28,7 @@ def merge_qkv(wq, wk, wv, ng=32): qkv[j * d : j * d + dq, :] = wq[j * dq : (j + 1) * dq, :] qkv[j * d + dq : j * d + dq + dkv, :] = wk[j * dkv : (j + 1) * dkv, :] qkv[j * d + dq + dkv : j * d + dq + dkv * 2, :] = wv[j * dkv : (j + 1) * dkv, :] - + return qkv diff --git a/mindspeed b/mindspeed new file mode 120000 index 00000000..22f84c16 --- /dev/null +++ b/mindspeed @@ -0,0 +1 @@ +../MindSpeed-3f09d67/mindspeed/ \ No newline at end of file diff --git a/mindspeed_mm/arguments.py b/mindspeed_mm/arguments.py index 51d17339..8a280412 100644 --- a/mindspeed_mm/arguments.py +++ b/mindspeed_mm/arguments.py @@ -34,6 +34,7 @@ def process_args(parser): parser = _add_network_size_args(parser) parser = _add_dummy_optimizer_args(parser) parser = _add_logging_args(parser) + parser = _add_security_args(parser) return parser @@ -115,11 +116,22 @@ def _add_dummy_optimizer_args(parser): def _add_logging_args(parser): - group = parser.add_argument_group(title='mm_logging') + group = parser.add_argument_group(title='logging') group.add_argument('--log-tps', action='store_true', default=False, help='calculate and log average tokens per sample') - - return parser \ No newline at end of file + + return parser + + +def _add_security_args(parser): + group = parser.add_argument_group(title='security configuration') + + group.add_argument('--trust-remote-files', + action='store_true', + default=False, + help='Whether or not to allow for custom models defined on the Hub in their own modeling files.') + + return parser diff --git a/mindspeed_mm/models/text_encoder/text_encoder.py b/mindspeed_mm/models/text_encoder/text_encoder.py index b6c150de..10d26bd5 100644 --- a/mindspeed_mm/models/text_encoder/text_encoder.py +++ b/mindspeed_mm/models/text_encoder/text_encoder.py @@ -25,18 +25,18 @@ class TextEncoder(nn.Module): "backend": type-str, "hf" or "om", "model_id": type-str, "AutoModel" or other automodel name, "dtype": type-str, dtype of text encoder - + (2) args for automodel.from_pretrained() of transformers or openmind "pretrained_model_name_or_path": type-str, local path or hub path, "local_files_only": type-bool, ... } - - If `config` is a list of dictionaries, each dictionary in the list will be used to instantiate a separate Text Encoder Model instance, + - If `config` is a list of dictionaries, each dictionary in the list will be used to instantiate a separate Text Encoder Model instance, effectively allowing the creation of multiple Text Encoder based on different configurations. """ def __init__(self, config): super().__init__() - + if isinstance(config, list) or isinstance(config, tuple): self.text_encoders = nn.ModuleList() for config_i in config: @@ -59,7 +59,7 @@ class TextEncoder(nn.Module): else: outputs = self._single_encode(self.text_encoders, input_ids, mask) return outputs - + def _single_encode(self, text_encoder, input_ids, attention_mask, **kwargs): *BN, L = input_ids.shape input_ids = input_ids.to(text_encoder.device).view(-1, L) @@ -90,16 +90,16 @@ class TextEncoder(nn.Module): ) * emb ) - + if text_encoder.output_key in ["last_hidden_state", "hidden_states"]: emb = emb.view(*BN, L, -1) elif text_encoder.output_key in ["pooler_output"]: emb = emb.view(*BN, -1) else: raise NotImplementedError(f"Text encoder output_key: {text_encoder.output_key} is not implenmented! ") - - return emb - + + return emb + def _init_text_encoder(self, config): if not isinstance(config, dict): config = config.to_dict() @@ -116,7 +116,7 @@ class TextEncoder(nn.Module): self.automodel_name = TEXT_ENCODER_MAPPING[model_id] config["pretrained_model_name_or_path"] = config.pop("from_pretrained") config["torch_dtype"] = get_dtype(config.pop("dtype")) - + config["local_files_only"] = True # Only huggingface backend is supported, OpenMind backend will be supported soon. module = importlib.import_module("transformers") automodel = getattr(module, self.automodel_name) diff --git a/mindspeed_mm/models/text_encoder/tokenizer.py b/mindspeed_mm/models/text_encoder/tokenizer.py index 3a875419..6600ed6c 100644 --- a/mindspeed_mm/models/text_encoder/tokenizer.py +++ b/mindspeed_mm/models/text_encoder/tokenizer.py @@ -20,7 +20,7 @@ class Tokenizer: "local_files_only": type-bool, ... } - - If `config` is a list of dictionaries, each dictionary in the list will be used to instantiate a separate Tokenizer instance, + - If `config` is a list of dictionaries, each dictionary in the list will be used to instantiate a separate Tokenizer instance, effectively allowing the creation of multiple tokenizers based on different configurations. """ @@ -32,11 +32,11 @@ class Tokenizer: tokenizer_i = self._init_tokenizer(module, config_i) self.tokenizers.append(tokenizer_i) else: - self.tokenizers = self._init_tokenizer(module, config) + self.tokenizers = self._init_tokenizer(module, config) def get_tokenizer(self): return self.tokenizers - + def _init_tokenizer(self, module, config): if not isinstance(config, dict): config = config.to_dict() diff --git a/tests/ut/models/text_encoder/test_text_encoder_processor.py b/tests/ut/models/text_encoder/test_text_encoder_processor.py index fd194215..2ae206c6 100644 --- a/tests/ut/models/text_encoder/test_text_encoder_processor.py +++ b/tests/ut/models/text_encoder/test_text_encoder_processor.py @@ -26,7 +26,7 @@ class TestTextEncoder: text_encoder_dict = { "hub_backend": "hf", "model_id": "T5", - "dtype": "bf16", + "dtype": "bf16", "from_pretrained": T5_MODEL_PATH, } tokenizer_dict = { -- Gitee From 3f86bc843b3b4002e42986a4b32aa6df1213b4b1 Mon Sep 17 00:00:00 2001 From: htwang Date: Tue, 25 Feb 2025 10:57:25 +0800 Subject: [PATCH 2/4] add trust remote code --- evaluate_vlm.py | 3 ++- examples/internvl2.5/data_4B.json | 1 - examples/internvl2.5/data_78B.json | 1 - examples/internvl2.5/finetune_internvl2.5_4B.sh | 3 ++- examples/internvl2.5/finetune_internvl2.5_78B.sh | 3 ++- examples/internvl2.5/inference_4B.json | 3 +-- examples/internvl2.5/inference_internvl.sh | 1 + examples/internvl2/data_26B.json | 3 +-- examples/internvl2/data_2B.json | 1 - examples/internvl2/data_76B.json | 1 - examples/internvl2/data_8B.json | 1 - examples/internvl2/evaluate_internvl2_8B.json | 1 - examples/internvl2/evaluate_internvl2_8B.sh | 1 + examples/internvl2/finetune_internvl2_26B.sh | 1 + examples/internvl2/finetune_internvl2_2B.sh | 1 + examples/internvl2/finetune_internvl2_8B.sh | 1 + examples/internvl2/finetune_internvl2_8B_vpp.sh | 1 + examples/internvl2/inference_2B.json | 3 +-- examples/internvl2/inference_8B.json | 3 +-- examples/internvl2/inference_internvl.sh | 1 + inference_qihoo.py | 3 ++- inference_vlm.py | 3 ++- mindspeed_mm/arguments.py | 6 ++++-- mindspeed_mm/data/datasets/audio_dataset.py | 5 +++-- mindspeed_mm/models/text_encoder/text_encoder.py | 2 ++ mindspeed_mm/models/text_encoder/tokenizer.py | 3 +++ tests/st/run_configs/finetune_internvl2_8B/data_8B.json | 1 - tests/st/shell_scripts/finetune_internvl2_8B.sh | 1 + 28 files changed, 34 insertions(+), 24 deletions(-) diff --git a/evaluate_vlm.py b/evaluate_vlm.py index 0d412758..38fd0d74 100644 --- a/evaluate_vlm.py +++ b/evaluate_vlm.py @@ -3,13 +3,14 @@ from megatron.training import get_args from megatron.training.initialize import initialize_megatron from mindspeed_mm.configs.config import merge_mm_args from mindspeed_mm.configs.config import mm_extra_args_provider +from mindspeed_mm.arguments import extra_args_provider_decorator from mindspeed_mm.tasks.evaluation.eval_datasets import eval_dataset_dict from mindspeed_mm.tasks.evaluation.eval_impl import eval_impl_dict, eval_pipeline_dict from mindspeed_mm.tasks.evaluation.eval_prompt import eval_model_prompt_dict def main(): - initialize_megatron(extra_args_provider=mm_extra_args_provider) + initialize_megatron(extra_args_provider=extra_args_provider_decorator(mm_extra_args_provider)) args = get_args() merge_mm_args(args) args = args.mm.model diff --git a/examples/internvl2.5/data_4B.json b/examples/internvl2.5/data_4B.json index f7b0e8e7..a134628c 100644 --- a/examples/internvl2.5/data_4B.json +++ b/examples/internvl2.5/data_4B.json @@ -24,7 +24,6 @@ "from_pretrained": "OpenGVLab/InternVL2_5-4B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "use_text_processer": true, diff --git a/examples/internvl2.5/data_78B.json b/examples/internvl2.5/data_78B.json index bd69b111..4cd0145d 100644 --- a/examples/internvl2.5/data_78B.json +++ b/examples/internvl2.5/data_78B.json @@ -24,7 +24,6 @@ "from_pretrained": "OpenGVLab/InternVL2_5-78B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "use_text_processer": true, diff --git a/examples/internvl2.5/finetune_internvl2.5_4B.sh b/examples/internvl2.5/finetune_internvl2.5_4B.sh index 2256b696..aa4abd96 100644 --- a/examples/internvl2.5/finetune_internvl2.5_4B.sh +++ b/examples/internvl2.5/finetune_internvl2.5_4B.sh @@ -79,7 +79,8 @@ GPT_ARGS=" --load $LOAD_PATH \ --variable-seq-lengths \ --normalization RMSNorm \ - --num-workers 4 + --num-workers 4 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2.5/finetune_internvl2.5_78B.sh b/examples/internvl2.5/finetune_internvl2.5_78B.sh index 2453d4c7..d4283d05 100644 --- a/examples/internvl2.5/finetune_internvl2.5_78B.sh +++ b/examples/internvl2.5/finetune_internvl2.5_78B.sh @@ -88,7 +88,8 @@ GPT_ARGS=" --load $LOAD_PATH \ --variable-seq-lengths \ --normalization RMSNorm \ - --num-workers 4 + --num-workers 4 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2.5/inference_4B.json b/examples/internvl2.5/inference_4B.json index 9d4988bc..e0f07a0a 100644 --- a/examples/internvl2.5/inference_4B.json +++ b/examples/internvl2.5/inference_4B.json @@ -1,4 +1,4 @@ -{ +{ "infer_data_type": "image", "file_path": "./examples/internvl2.5/view.jpg", "prompts": "Please describe the image shortly.", @@ -102,7 +102,6 @@ "from_pretrained": "OpenGVLab/InternVL2_5-4B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "generation_config":{ diff --git a/examples/internvl2.5/inference_internvl.sh b/examples/internvl2.5/inference_internvl.sh index 45fa37c3..0240ddf4 100644 --- a/examples/internvl2.5/inference_internvl.sh +++ b/examples/internvl2.5/inference_internvl.sh @@ -53,6 +53,7 @@ GPT_ARGS=" --no-masked-softmax-fusion \ --use-distributed-optimizer \ --bf16 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2/data_26B.json b/examples/internvl2/data_26B.json index 2f450d76..1c2f2a59 100644 --- a/examples/internvl2/data_26B.json +++ b/examples/internvl2/data_26B.json @@ -14,7 +14,7 @@ {"trans_type": "Resize", "param": {"size": [448, 448], "interpolation": "BICUBIC"}}, {"trans_type": "ToTensor"}, {"trans_type": "norm_fun", "param": {"mean":[0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225]}} - ] + ] } }, "tokenizer_config": { @@ -23,7 +23,6 @@ "from_pretrained": "OpenGVLab/InternVL2-26B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "use_text_processer": true, diff --git a/examples/internvl2/data_2B.json b/examples/internvl2/data_2B.json index 25a649cc..7f6cd0b1 100644 --- a/examples/internvl2/data_2B.json +++ b/examples/internvl2/data_2B.json @@ -24,7 +24,6 @@ "from_pretrained": "OpenGVLab/InternVL2-2B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "use_text_processer": true, diff --git a/examples/internvl2/data_76B.json b/examples/internvl2/data_76B.json index aa9c6014..21476838 100644 --- a/examples/internvl2/data_76B.json +++ b/examples/internvl2/data_76B.json @@ -23,7 +23,6 @@ "from_pretrained": "OpenGVLab/InternVL2-Llama3-76B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": false, "use_fast": false }, "use_text_processer": true, diff --git a/examples/internvl2/data_8B.json b/examples/internvl2/data_8B.json index 94898a66..e5bd9f0b 100644 --- a/examples/internvl2/data_8B.json +++ b/examples/internvl2/data_8B.json @@ -23,7 +23,6 @@ "from_pretrained": "OpenGVLab/InternVL2-8B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "use_text_processer": true, diff --git a/examples/internvl2/evaluate_internvl2_8B.json b/examples/internvl2/evaluate_internvl2_8B.json index fcab66f3..9002918e 100644 --- a/examples/internvl2/evaluate_internvl2_8B.json +++ b/examples/internvl2/evaluate_internvl2_8B.json @@ -104,7 +104,6 @@ "from_pretrained": "./InternVL2-8B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "generation_config":{ diff --git a/examples/internvl2/evaluate_internvl2_8B.sh b/examples/internvl2/evaluate_internvl2_8B.sh index 659bfaa9..cd6de99a 100644 --- a/examples/internvl2/evaluate_internvl2_8B.sh +++ b/examples/internvl2/evaluate_internvl2_8B.sh @@ -76,6 +76,7 @@ GPT_ARGS=" --bf16 \ --distributed-timeout-minutes 1000 \ --use-flash-attn \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2/finetune_internvl2_26B.sh b/examples/internvl2/finetune_internvl2_26B.sh index 0a0e7a56..bee97ab8 100644 --- a/examples/internvl2/finetune_internvl2_26B.sh +++ b/examples/internvl2/finetune_internvl2_26B.sh @@ -82,6 +82,7 @@ GPT_ARGS=" --no-save-rng \ --num-workers 4 \ --enable-dummy-optimizer \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2/finetune_internvl2_2B.sh b/examples/internvl2/finetune_internvl2_2B.sh index 09d068ad..6bed8ad0 100644 --- a/examples/internvl2/finetune_internvl2_2B.sh +++ b/examples/internvl2/finetune_internvl2_2B.sh @@ -81,6 +81,7 @@ GPT_ARGS=" --no-save-optim \ --no-save-rng \ --num-workers 4 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2/finetune_internvl2_8B.sh b/examples/internvl2/finetune_internvl2_8B.sh index c1fc0229..57d10a1b 100644 --- a/examples/internvl2/finetune_internvl2_8B.sh +++ b/examples/internvl2/finetune_internvl2_8B.sh @@ -81,6 +81,7 @@ GPT_ARGS=" --no-save-optim \ --no-save-rng \ --num-workers 4 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2/finetune_internvl2_8B_vpp.sh b/examples/internvl2/finetune_internvl2_8B_vpp.sh index c84611b5..2091707a 100644 --- a/examples/internvl2/finetune_internvl2_8B_vpp.sh +++ b/examples/internvl2/finetune_internvl2_8B_vpp.sh @@ -82,6 +82,7 @@ GPT_ARGS=" --no-save-optim \ --no-save-rng \ --num-workers 4 \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/examples/internvl2/inference_2B.json b/examples/internvl2/inference_2B.json index 67ecf8fb..0dd767ac 100644 --- a/examples/internvl2/inference_2B.json +++ b/examples/internvl2/inference_2B.json @@ -1,4 +1,4 @@ -{ +{ "infer_data_type": "image", "file_path": "./examples/internvl2/view.jpg", "prompts": "Please describe the image shortly.", @@ -102,7 +102,6 @@ "from_pretrained": "OpenGVLab/InternVL2-2B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "generation_config":{ diff --git a/examples/internvl2/inference_8B.json b/examples/internvl2/inference_8B.json index e79d6836..3f32d0e2 100644 --- a/examples/internvl2/inference_8B.json +++ b/examples/internvl2/inference_8B.json @@ -1,4 +1,4 @@ -{ +{ "infer_data_type": "image", "file_path": "./examples/internvl2/view.jpg", "prompts": "Please describe the image shortly.", @@ -102,7 +102,6 @@ "from_pretrained": "OpenGVLab/InternVL2-8B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "generation_config":{ diff --git a/examples/internvl2/inference_internvl.sh b/examples/internvl2/inference_internvl.sh index 4d945790..f7c0c11a 100644 --- a/examples/internvl2/inference_internvl.sh +++ b/examples/internvl2/inference_internvl.sh @@ -54,6 +54,7 @@ GPT_ARGS=" --use-distributed-optimizer \ --bf16 \ --use-flash-attn \ + --trust-remote-code \ " OUTPUT_ARGS=" diff --git a/inference_qihoo.py b/inference_qihoo.py index cf1ab3c5..a331df3f 100644 --- a/inference_qihoo.py +++ b/inference_qihoo.py @@ -6,6 +6,7 @@ from megatron.training.initialize import initialize_megatron from megatron.training import get_args from mindspeed_mm.configs.config import merge_mm_args, mm_extra_args_provider +from mindspeed_mm.arguments import extra_args_provider_decorator from mindspeed_mm.tasks.inference.pipeline import sora_pipeline_dict from mindspeed_mm.tasks.inference.pipeline.utils.sora_utils import save_videos, load_prompts, save_image_or_videos from mindspeed_mm.models.predictor import PredictModel @@ -37,7 +38,7 @@ def prepare_pipeline(args, device): def main(): - initialize_megatron(extra_args_provider=mm_extra_args_provider, args_defaults={}) + initialize_megatron(extra_args_provider=extra_args_provider_decorator(mm_extra_args_provider), args_defaults={}) args = get_args() merge_mm_args(args) args = args.mm.model diff --git a/inference_vlm.py b/inference_vlm.py index d82b97df..245609c6 100644 --- a/inference_vlm.py +++ b/inference_vlm.py @@ -4,6 +4,7 @@ import mindspeed.megatron_adaptor from megatron.training import get_args from mindspeed_mm.tasks.inference.pipeline import vlm_pipeline_dict from mindspeed_mm.configs.config import mm_extra_args_provider +from mindspeed_mm.arguments import extra_args_provider_decorator def main(): @@ -14,7 +15,7 @@ def main(): torch.set_grad_enabled(False) initialize_megatron( - extra_args_provider=mm_extra_args_provider, args_defaults={'tokenizer_type': 'GPT2BPETokenizer'} + extra_args_provider=extra_args_provider_decorator(mm_extra_args_provider), args_defaults={'tokenizer_type': 'GPT2BPETokenizer'} ) args = get_args() merge_mm_args(args) diff --git a/mindspeed_mm/arguments.py b/mindspeed_mm/arguments.py index 8a280412..47fa1b24 100644 --- a/mindspeed_mm/arguments.py +++ b/mindspeed_mm/arguments.py @@ -76,7 +76,9 @@ def _add_training_args(parser): action='store_true', default=False, help='Use internal format to train') - group.add_argument('--virtual-pipeline-model-parallel-size', type=int, default=None, + group.add_argument('--virtual-pipeline-model-parallel-size', + type=int, + default=None, help='vpp size') group.add_argument('--encoder-dp-balance', action='store_true', @@ -129,7 +131,7 @@ def _add_logging_args(parser): def _add_security_args(parser): group = parser.add_argument_group(title='security configuration') - group.add_argument('--trust-remote-files', + group.add_argument('--trust-remote-code', action='store_true', default=False, help='Whether or not to allow for custom models defined on the Hub in their own modeling files.') diff --git a/mindspeed_mm/data/datasets/audio_dataset.py b/mindspeed_mm/data/datasets/audio_dataset.py index c53bf8a0..d9265b2a 100644 --- a/mindspeed_mm/data/datasets/audio_dataset.py +++ b/mindspeed_mm/data/datasets/audio_dataset.py @@ -14,6 +14,7 @@ from datasets import Audio, load_dataset from torch.utils.data import Dataset from transformers import WhisperProcessor +from megatron.training import get_args class AudioDataset(Dataset): @@ -40,7 +41,7 @@ class AudioDataset(Dataset): dataset_name_or_path, language, split="train+validation", - trust_remote_code=True, + trust_remote_code=get_args().trust_remote_code, ) train_dataset = train_dataset.remove_columns( [ @@ -59,7 +60,7 @@ class AudioDataset(Dataset): processor = WhisperProcessor.from_pretrained( processor_name_or_path, language=processor_language, - task=task, + task=task, local_files_only=True, ) feature_extractor = processor.feature_extractor diff --git a/mindspeed_mm/models/text_encoder/text_encoder.py b/mindspeed_mm/models/text_encoder/text_encoder.py index 10d26bd5..505a8edd 100644 --- a/mindspeed_mm/models/text_encoder/text_encoder.py +++ b/mindspeed_mm/models/text_encoder/text_encoder.py @@ -2,6 +2,7 @@ import importlib import torch import torch.nn as nn from mindspeed_mm.utils.utils import get_dtype +from megatron.training import get_args TEXT_ENCODER_MAPPING = { @@ -116,6 +117,7 @@ class TextEncoder(nn.Module): self.automodel_name = TEXT_ENCODER_MAPPING[model_id] config["pretrained_model_name_or_path"] = config.pop("from_pretrained") config["torch_dtype"] = get_dtype(config.pop("dtype")) + config["trust_remote_code"] = get_args().trust_remote_code config["local_files_only"] = True # Only huggingface backend is supported, OpenMind backend will be supported soon. module = importlib.import_module("transformers") diff --git a/mindspeed_mm/models/text_encoder/tokenizer.py b/mindspeed_mm/models/text_encoder/tokenizer.py index 6600ed6c..7f72c9e7 100644 --- a/mindspeed_mm/models/text_encoder/tokenizer.py +++ b/mindspeed_mm/models/text_encoder/tokenizer.py @@ -1,5 +1,6 @@ import importlib from torch import nn +from megatron.training import get_args class Tokenizer: @@ -45,5 +46,7 @@ class Tokenizer: self.backend = config.pop("hub_backend") tokenizer_name = config.pop("autotokenizer_name") config["pretrained_model_name_or_path"] = config.pop("from_pretrained") + config["trust_remote_code"] = get_args().trust_remote_code + config["local_files_only"] = True tokenizer_cls = getattr(module, tokenizer_name) return tokenizer_cls.from_pretrained(**config) \ No newline at end of file diff --git a/tests/st/run_configs/finetune_internvl2_8B/data_8B.json b/tests/st/run_configs/finetune_internvl2_8B/data_8B.json index 8cc38468..bf61000c 100644 --- a/tests/st/run_configs/finetune_internvl2_8B/data_8B.json +++ b/tests/st/run_configs/finetune_internvl2_8B/data_8B.json @@ -23,7 +23,6 @@ "from_pretrained": "/home/ci_resource/models/InternVL2-8B/pretrained/raw_ckpt/InternVL2-8B", "model_max_length": 4096, "add_eos_token": false, - "trust_remote_code": true, "use_fast": false }, "use_text_processer": true, diff --git a/tests/st/shell_scripts/finetune_internvl2_8B.sh b/tests/st/shell_scripts/finetune_internvl2_8B.sh index 16fb69b3..e006622a 100644 --- a/tests/st/shell_scripts/finetune_internvl2_8B.sh +++ b/tests/st/shell_scripts/finetune_internvl2_8B.sh @@ -84,6 +84,7 @@ GPT_ARGS=" --normalization RMSNorm \ --use-fused-rmsnorm \ --num-workers 4 \ + --trust-remote-code \ " OUTPUT_ARGS=" -- Gitee From 59ecf3033371e5b77492e5c8d72948124fe27767 Mon Sep 17 00:00:00 2001 From: htwang Date: Tue, 25 Feb 2025 11:21:16 +0800 Subject: [PATCH 3/4] update --- .gitignore | 4 ++-- checkpoint/utils.py | 2 +- examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py | 3 ++- examples/internvl2/internvl2_convert_to_mm_ckpt.py | 3 ++- examples/llava1.5/evaluate_llava1_5.json | 1 - examples/llava1.5/inference_llava.json | 3 +-- examples/llava1.5/vicuna_converter.py | 3 ++- examples/opensora1.0/inference_model_120x256x256.json | 2 -- examples/opensora1.0/inference_model_16x512x512.json | 2 -- examples/opensora1.2/inference_model_102x720x1280.json | 2 -- examples/opensoraplan1.2/inference_model_29x480x640.json | 2 -- examples/opensoraplan1.3/i2v/inference_i2v_model.json | 5 ++--- examples/opensoraplan1.3/t2v/inference_t2v_model.json | 5 ++--- examples/qihoo_t2x/inference_model_image.json | 4 +--- examples/qwen2vl/evaluate_qwen2vl_7b.json | 3 +-- examples/qwen2vl/inference_qwen2vl_2b.json | 3 +-- examples/qwen2vl/inference_qwen2vl_72b.json | 3 +-- examples/qwen2vl/inference_qwen2vl_7b.json | 3 +-- examples/whisper/pretrain_whisper.sh | 1 + mindspeed | 1 - mindspeed_mm/models/text_encoder/text_encoder.py | 3 ++- mindspeed_mm/models/text_encoder/tokenizer.py | 1 - .../inference_qwen2vl_7B_pp1/inference_qwen2vl_7b.json | 3 +-- .../inference_qwen2vl_7B_pp4/inference_qwen2vl_7b.json | 3 +-- 24 files changed, 24 insertions(+), 41 deletions(-) delete mode 120000 mindspeed diff --git a/.gitignore b/.gitignore index 7aa4dbb4..1977d146 100644 --- a/.gitignore +++ b/.gitignore @@ -151,9 +151,9 @@ cython_debug/ /ci/kernel*/ # mindspeed core -/mindspeed/ +mindspeed -# test +# test /tests/st/run_jsons/ /tests/st/run_logs/ diff --git a/checkpoint/utils.py b/checkpoint/utils.py index b995beaa..14f20618 100644 --- a/checkpoint/utils.py +++ b/checkpoint/utils.py @@ -91,7 +91,7 @@ class HfConfig(BaseModel): @cached_property def config(self) -> PretrainedConfig: - return AutoConfig.from_pretrained(self.hf_dir) + return AutoConfig.from_pretrained(self.hf_dir, local_files_only=True) @model_validator(mode='after') def validate_hf_dir(self) -> "HfConfig": diff --git a/examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py b/examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py index 31b5113d..d6f12037 100644 --- a/examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py +++ b/examples/internvl2.5/internvl2.5_convert_to_mm_ckpt.py @@ -83,7 +83,8 @@ def load_from_hf(load_dir, trust_remote_code): trust_remote_code=trust_remote_code, local_files_only=True) print(hf_model) - config = AutoConfig.from_pretrained(load_dir, trust_remote_code=trust_remote_code) + config = AutoConfig.from_pretrained( + load_dir, trust_remote_code=trust_remote_code, local_files_only=True) global llm_arch llm_arch = config.llm_config.architectures[0] return hf_model, config diff --git a/examples/internvl2/internvl2_convert_to_mm_ckpt.py b/examples/internvl2/internvl2_convert_to_mm_ckpt.py index b2346ab2..cbc2b920 100644 --- a/examples/internvl2/internvl2_convert_to_mm_ckpt.py +++ b/examples/internvl2/internvl2_convert_to_mm_ckpt.py @@ -18,7 +18,8 @@ def load_from_hf(load_dir, trust_remote_code): trust_remote_code=trust_remote_code, local_files_only=True) print(hf_model) - config = AutoConfig.from_pretrained(load_dir, trust_remote_code=trust_remote_code) + config = AutoConfig.from_pretrained( + load_dir, trust_remote_code=trust_remote_code, local_files_only=True) global llm_arch llm_arch = config.llm_config.architectures[0] return hf_model diff --git a/examples/llava1.5/evaluate_llava1_5.json b/examples/llava1.5/evaluate_llava1_5.json index deba5834..a93a82c1 100644 --- a/examples/llava1.5/evaluate_llava1_5.json +++ b/examples/llava1.5/evaluate_llava1_5.json @@ -87,7 +87,6 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "./llava_7b", - "local_files_only": false, "use_fast": false }, "generation_config": { diff --git a/examples/llava1.5/inference_llava.json b/examples/llava1.5/inference_llava.json index 38ba3b3c..c18e068c 100644 --- a/examples/llava1.5/inference_llava.json +++ b/examples/llava1.5/inference_llava.json @@ -89,8 +89,7 @@ "tokenizer":{ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "llava_weights/vicuna-7b-v1.5", - "local_files_only": false + "from_pretrained": "llava_weights/vicuna-7b-v1.5" }, "generation_config":{ "bos_token_id": 1, diff --git a/examples/llava1.5/vicuna_converter.py b/examples/llava1.5/vicuna_converter.py index 04727556..3e464140 100644 --- a/examples/llava1.5/vicuna_converter.py +++ b/examples/llava1.5/vicuna_converter.py @@ -12,7 +12,8 @@ def load_from_hf(load_dir, trust_remote_code): trust_remote_code=trust_remote_code, torch_dtype=torch.bfloat16, local_files_only=True) print(hf_model) - config = AutoConfig.from_pretrained(load_dir, trust_remote_code=trust_remote_code) + config = AutoConfig.from_pretrained( + load_dir, trust_remote_code=trust_remote_code, local_files_only=True) return hf_model, config diff --git a/examples/opensora1.0/inference_model_120x256x256.json b/examples/opensora1.0/inference_model_120x256x256.json index 31fe16d5..e8d7ddc3 100644 --- a/examples/opensora1.0/inference_model_120x256x256.json +++ b/examples/opensora1.0/inference_model_120x256x256.json @@ -10,7 +10,6 @@ "hub_backend": "hf", "model_id": "T5", "from_pretrained": "DeepFloyd/t5-v1_1-xxl", - "local_files_only": false, "low_cpu_mem_usage": true, "dtype": "fp32" }, @@ -18,7 +17,6 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "DeepFloyd/t5-v1_1-xxl", - "local_files_only": false, "model_max_length": 120 }, "predictor": { diff --git a/examples/opensora1.0/inference_model_16x512x512.json b/examples/opensora1.0/inference_model_16x512x512.json index d382f266..5965d4b0 100644 --- a/examples/opensora1.0/inference_model_16x512x512.json +++ b/examples/opensora1.0/inference_model_16x512x512.json @@ -10,7 +10,6 @@ "hub_backend": "hf", "model_id": "T5", "from_pretrained": "DeepFloyd/t5-v1_1-xxl", - "local_files_only": false, "low_cpu_mem_usage": true, "dtype": "fp32" }, @@ -18,7 +17,6 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "DeepFloyd/t5-v1_1-xxl", - "local_files_only": false, "model_max_length": 120 }, "predictor": { diff --git a/examples/opensora1.2/inference_model_102x720x1280.json b/examples/opensora1.2/inference_model_102x720x1280.json index 928ada67..469e945a 100644 --- a/examples/opensora1.2/inference_model_102x720x1280.json +++ b/examples/opensora1.2/inference_model_102x720x1280.json @@ -15,7 +15,6 @@ "hub_backend": "hf", "model_id": "T5", "from_pretrained": "DeepFloyd/t5-v1_1-xxl", - "local_files_only": false, "low_cpu_mem_usage": true, "dtype": "fp32" }, @@ -23,7 +22,6 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "DeepFloyd/t5-v1_1-xxl", - "local_files_only": false, "model_max_length":300 }, "predictor": { diff --git a/examples/opensoraplan1.2/inference_model_29x480x640.json b/examples/opensoraplan1.2/inference_model_29x480x640.json index 4c5548d4..8cc08231 100644 --- a/examples/opensoraplan1.2/inference_model_29x480x640.json +++ b/examples/opensoraplan1.2/inference_model_29x480x640.json @@ -47,7 +47,6 @@ "hub_backend": "hf", "model_id": "MT5", "from_pretrained": "./weights/google/mt5-xxl", - "local_files_only": false, "low_cpu_mem_usage": true, "dtype": "fp16" }, @@ -55,7 +54,6 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "./opensoraplanv12/weights/mt5", - "local_files_only": false, "model_max_length": 512 }, "predictor": { diff --git a/examples/opensoraplan1.3/i2v/inference_i2v_model.json b/examples/opensoraplan1.3/i2v/inference_i2v_model.json index cf26436c..a2050288 100644 --- a/examples/opensoraplan1.3/i2v/inference_i2v_model.json +++ b/examples/opensoraplan1.3/i2v/inference_i2v_model.json @@ -1,4 +1,4 @@ -{ +{ "predictor": { "model_id": "videoditsparsei2v", "from_pretrained": "./weights/sparsedit/sparsediti2v_mm.pth", @@ -49,10 +49,9 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "./weights/google/mt5-xxl", - "local_files_only": false, "model_max_length": 512 }, - + "diffusion": { "model_id": "EulerAncestralDiscrete", "num_inference_steps":100, diff --git a/examples/opensoraplan1.3/t2v/inference_t2v_model.json b/examples/opensoraplan1.3/t2v/inference_t2v_model.json index bcda965e..249dce8f 100644 --- a/examples/opensoraplan1.3/t2v/inference_t2v_model.json +++ b/examples/opensoraplan1.3/t2v/inference_t2v_model.json @@ -1,4 +1,4 @@ -{ +{ "predictor": { "model_id": "videoditsparse", "from_pretrained": "./weights/sparsedit/sparsedit_mm.pth", @@ -49,10 +49,9 @@ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", "from_pretrained": "./weights/google/mt5-xxl", - "local_files_only": false, "model_max_length": 512 }, - + "diffusion": { "model_id": "EulerAncestralDiscrete", "num_inference_steps":100, diff --git a/examples/qihoo_t2x/inference_model_image.json b/examples/qihoo_t2x/inference_model_image.json index 9878e1ed..0494baad 100644 --- a/examples/qihoo_t2x/inference_model_image.json +++ b/examples/qihoo_t2x/inference_model_image.json @@ -47,15 +47,13 @@ "hub_backend": "hf", "model_id": "T5", "from_pretrained": "./pretrain_models/text_encoder", - "local_files_only": false, "low_cpu_mem_usage": true, "dtype": "fp32" }, "tokenizer":{ "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "./pretrain_models/tokenizer", - "local_files_only": false + "from_pretrained": "./pretrain_models/tokenizer" }, "predictor": { "dtype": "bf16", diff --git a/examples/qwen2vl/evaluate_qwen2vl_7b.json b/examples/qwen2vl/evaluate_qwen2vl_7b.json index fba234e0..8c77e68a 100644 --- a/examples/qwen2vl/evaluate_qwen2vl_7b.json +++ b/examples/qwen2vl/evaluate_qwen2vl_7b.json @@ -87,8 +87,7 @@ "tokenizer": { "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "./Qwen2-VL-7B-Instruct", - "local_files_only":false + "from_pretrained": "./Qwen2-VL-7B-Instruct" }, "generation_config": { "bos_token_id": 151643, diff --git a/examples/qwen2vl/inference_qwen2vl_2b.json b/examples/qwen2vl/inference_qwen2vl_2b.json index 0f9aa052..8e5fdcdd 100644 --- a/examples/qwen2vl/inference_qwen2vl_2b.json +++ b/examples/qwen2vl/inference_qwen2vl_2b.json @@ -85,8 +85,7 @@ "tokenizer": { "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "ckpt/hf_path/Qwen2-VL-2B-Instruct", - "local_files_only":false + "from_pretrained": "ckpt/hf_path/Qwen2-VL-2B-Instruct" }, "generation_config": { "bos_token_id": 151643, diff --git a/examples/qwen2vl/inference_qwen2vl_72b.json b/examples/qwen2vl/inference_qwen2vl_72b.json index 64e23985..5a39ca49 100644 --- a/examples/qwen2vl/inference_qwen2vl_72b.json +++ b/examples/qwen2vl/inference_qwen2vl_72b.json @@ -84,8 +84,7 @@ "tokenizer": { "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "ckpt/hf_path/Qwen2-VL-72B-Instruct", - "local_files_only":false + "from_pretrained": "ckpt/hf_path/Qwen2-VL-72B-Instruct" }, "generation_config": { "bos_token_id": 151643, diff --git a/examples/qwen2vl/inference_qwen2vl_7b.json b/examples/qwen2vl/inference_qwen2vl_7b.json index 4fc01c72..6421d771 100644 --- a/examples/qwen2vl/inference_qwen2vl_7b.json +++ b/examples/qwen2vl/inference_qwen2vl_7b.json @@ -85,8 +85,7 @@ "tokenizer": { "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "ckpt/hf_path/Qwen2-VL-7B-Instruct", - "local_files_only":false + "from_pretrained": "ckpt/hf_path/Qwen2-VL-7B-Instruct" }, "generation_config": { "bos_token_id": 151643, diff --git a/examples/whisper/pretrain_whisper.sh b/examples/whisper/pretrain_whisper.sh index 4fa7cb12..6927fa33 100644 --- a/examples/whisper/pretrain_whisper.sh +++ b/examples/whisper/pretrain_whisper.sh @@ -65,6 +65,7 @@ GPT_ARGS=" --overlap-grad-reduce \ --overlap-param-gather \ --num-workers 4 \ + --trust-remote-code \ " MM_ARGS=" diff --git a/mindspeed b/mindspeed deleted file mode 120000 index 22f84c16..00000000 --- a/mindspeed +++ /dev/null @@ -1 +0,0 @@ -../MindSpeed-3f09d67/mindspeed/ \ No newline at end of file diff --git a/mindspeed_mm/models/text_encoder/text_encoder.py b/mindspeed_mm/models/text_encoder/text_encoder.py index 505a8edd..26aad1bf 100644 --- a/mindspeed_mm/models/text_encoder/text_encoder.py +++ b/mindspeed_mm/models/text_encoder/text_encoder.py @@ -1,8 +1,9 @@ import importlib import torch import torch.nn as nn -from mindspeed_mm.utils.utils import get_dtype + from megatron.training import get_args +from mindspeed_mm.utils.utils import get_dtype TEXT_ENCODER_MAPPING = { diff --git a/mindspeed_mm/models/text_encoder/tokenizer.py b/mindspeed_mm/models/text_encoder/tokenizer.py index 7f72c9e7..00a63682 100644 --- a/mindspeed_mm/models/text_encoder/tokenizer.py +++ b/mindspeed_mm/models/text_encoder/tokenizer.py @@ -1,5 +1,4 @@ import importlib -from torch import nn from megatron.training import get_args diff --git a/tests/st/run_configs/inference_qwen2vl_7B_pp1/inference_qwen2vl_7b.json b/tests/st/run_configs/inference_qwen2vl_7B_pp1/inference_qwen2vl_7b.json index 7df1c60b..4bf6067b 100644 --- a/tests/st/run_configs/inference_qwen2vl_7B_pp1/inference_qwen2vl_7b.json +++ b/tests/st/run_configs/inference_qwen2vl_7B_pp1/inference_qwen2vl_7b.json @@ -85,8 +85,7 @@ "tokenizer": { "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "/home/ci_resource/models/qwen2vl_7b/qwen2vl7b", - "local_files_only":false + "from_pretrained": "/home/ci_resource/models/qwen2vl_7b/qwen2vl7b" }, "generation_config": { "bos_token_id": 151643, diff --git a/tests/st/run_configs/inference_qwen2vl_7B_pp4/inference_qwen2vl_7b.json b/tests/st/run_configs/inference_qwen2vl_7B_pp4/inference_qwen2vl_7b.json index 7df1c60b..4bf6067b 100644 --- a/tests/st/run_configs/inference_qwen2vl_7B_pp4/inference_qwen2vl_7b.json +++ b/tests/st/run_configs/inference_qwen2vl_7B_pp4/inference_qwen2vl_7b.json @@ -85,8 +85,7 @@ "tokenizer": { "hub_backend": "hf", "autotokenizer_name": "AutoTokenizer", - "from_pretrained": "/home/ci_resource/models/qwen2vl_7b/qwen2vl7b", - "local_files_only":false + "from_pretrained": "/home/ci_resource/models/qwen2vl_7b/qwen2vl7b" }, "generation_config": { "bos_token_id": 151643, -- Gitee From 46a4bdeed4fc6172a15d07a6d37aa11f1965d0ae Mon Sep 17 00:00:00 2001 From: htwang Date: Tue, 25 Feb 2025 21:19:46 +0800 Subject: [PATCH 4/4] fix error --- mindspeed_mm/models/text_encoder/text_encoder.py | 8 ++++++-- mindspeed_mm/models/text_encoder/tokenizer.py | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/mindspeed_mm/models/text_encoder/text_encoder.py b/mindspeed_mm/models/text_encoder/text_encoder.py index 26aad1bf..2aad5aef 100644 --- a/mindspeed_mm/models/text_encoder/text_encoder.py +++ b/mindspeed_mm/models/text_encoder/text_encoder.py @@ -2,7 +2,6 @@ import importlib import torch import torch.nn as nn -from megatron.training import get_args from mindspeed_mm.utils.utils import get_dtype @@ -118,8 +117,13 @@ class TextEncoder(nn.Module): self.automodel_name = TEXT_ENCODER_MAPPING[model_id] config["pretrained_model_name_or_path"] = config.pop("from_pretrained") config["torch_dtype"] = get_dtype(config.pop("dtype")) - config["trust_remote_code"] = get_args().trust_remote_code config["local_files_only"] = True + try: + from megatron.training import get_args + config["trust_remote_code"] = get_args().trust_remote_code + except (ImportError, AssertionError): + config["trust_remote_code"] = False + # Only huggingface backend is supported, OpenMind backend will be supported soon. module = importlib.import_module("transformers") automodel = getattr(module, self.automodel_name) diff --git a/mindspeed_mm/models/text_encoder/tokenizer.py b/mindspeed_mm/models/text_encoder/tokenizer.py index 00a63682..2e9ec259 100644 --- a/mindspeed_mm/models/text_encoder/tokenizer.py +++ b/mindspeed_mm/models/text_encoder/tokenizer.py @@ -1,5 +1,4 @@ import importlib -from megatron.training import get_args class Tokenizer: @@ -45,7 +44,11 @@ class Tokenizer: self.backend = config.pop("hub_backend") tokenizer_name = config.pop("autotokenizer_name") config["pretrained_model_name_or_path"] = config.pop("from_pretrained") - config["trust_remote_code"] = get_args().trust_remote_code config["local_files_only"] = True + try: + from megatron.training import get_args + config["trust_remote_code"] = get_args().trust_remote_code + except (ImportError, AssertionError): + config["trust_remote_code"] = False tokenizer_cls = getattr(module, tokenizer_name) return tokenizer_cls.from_pretrained(**config) \ No newline at end of file -- Gitee