diff --git a/debug/weight_convert/README.md b/debug/weight_convert/README.md index 6cc4e2481fbee3460fa79d600bd680721db4e6b7..443767799f4bd1bf2e761d58a41fe72c4af82427 100644 --- a/debug/weight_convert/README.md +++ b/debug/weight_convert/README.md @@ -22,6 +22,7 @@ git clone https://gitee.com/ascend/ModelLink.git git clone https://gitee.com/ascend/AscendSpeed.git cd AscendSpeed +git checkout 1.0 pip3 install -r requirements.txt pip3 install -e . cd .. diff --git a/debug/weight_convert/bloom.py b/debug/weight_convert/bloom.py index d884d451f3282be197df967ebdd979a20c6775c1..74931acf88fac63b2a3543d5963233637f448034 100644 --- a/debug/weight_convert/bloom.py +++ b/debug/weight_convert/bloom.py @@ -24,10 +24,10 @@ import torch_npu def add_arguments(parser): group = parser.add_argument_group(title='Megatron saver') - group.add_argument('--target-tensor-parallel-size', type=int, + group.add_argument('--target-tensor-parallel-size', type=int, default=1, help='Target tensor model parallel size, defaults to the tensor parallel size ' 'in the input checkpoint if provided by the loader, otherwise to 1') - group.add_argument('--target-pipeline-parallel-size', type=int, + group.add_argument('--target-pipeline-parallel-size', type=int, default=1, help='Target tensor model parallel size, default to the pipeline parall size ' 'in the input checkpoint if provided by the loader, otherwise to 1') group.add_argument("--w-pack", type=bool, @@ -37,110 +37,18 @@ def add_arguments(parser): def save_huggingface_bloom(args, model, model_args): hf2mg_map = {} - for name_param_m in model.named_parameters(): - print("name_param_m", name_param_m[0]) - layer_num = name_param_m[0].split(".")[3] if len(name_param_m[0].split(".")) > 3 else name_param_m[0].split(".")[1] - nh = model_args.num_attention_heads - ng = ( - model_args.checkpoint_args.num_query_groups - if model_args.checkpoint_args.group_query_attention - else model_args.num_attention_heads - ) - repeats = nh // ng - # word embedding - if name_param_m[0] == "language_model.embedding.word_embeddings.weight": - hf2mg_map["word_embeddings.weight"] = name_param_m[1] - continue - if name_param_m[0] == "language_model.embedding.word_embeddings.norm.weight": - hf2mg_map["word_embeddings_layernorm.weight"] = name_param_m[1] - continue - if name_param_m[0] == "language_model.embedding.word_embeddings.norm.bias": - hf2mg_map["word_embeddings_layernorm.bias"] = name_param_m[1] - continue - - # input layernorm - if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.input_norm.weight": - hf2mg_map[f"h.{layer_num}.input_layernorm.weight"] = name_param_m[1] - continue - if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.input_norm.bias": - hf2mg_map[f"h.{layer_num}.input_layernorm.bias"] = name_param_m[1] - continue - - # qkv - if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.self_attention.query_key_value.weight": - hf2mg_map[f"h.{layer_num}.self_attention.query_key_value.weight"] = name_param_m[1] - continue - if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.self_attention.query_key_value.bias": - hf2mg_map[f"h.{layer_num}.self_attention.query_key_value.bias"] = name_param_m[1] - continue - - # post attention norm - if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.post_attention_norm.weight": - hf2mg_map[f"h.{layer_num}.post_attention_layernorm.weight"] = name_param_m[1] - continue - if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.post_attention_norm.bias": - hf2mg_map[f"h.{layer_num}.post_attention_layernorm.bias"] = name_param_m[1] - continue - - # dense - if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.self_attention.dense.weight": - hf2mg_map[f"h.{layer_num}.self_attention.dense.weight"] = name_param_m[ - 1 - ] - continue - if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.self_attention.dense.bias": - hf2mg_map[f"h.{layer_num}.self_attention.dense.bias"] = name_param_m[1] - continue - # mlp - if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.mlp.dense_h_to_4h.weight": - hf2mg_map[f"h.{layer_num}.mlp.dense_h_to_4h.weight"] = name_param_m[1] - continue - if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.mlp.dense_h_to_4h.bias": - hf2mg_map[f"h.{layer_num}.mlp.dense_h_to_4h.bias"] = name_param_m[1] - continue - if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.mlp.dense_4h_to_h.weight": - hf2mg_map[f"h.{layer_num}.mlp.dense_4h_to_h.weight"] = name_param_m[1] - continue - if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.mlp.dense_4h_to_h.bias": - hf2mg_map[f"h.{layer_num}.mlp.dense_4h_to_h.bias"] = name_param_m[1] - continue - # final norm - if name_param_m[0] == "language_model.encoder.final_norm.weight": - hf2mg_map[f"ln_f.weight"] = name_param_m[1] - continue - if name_param_m[0] == "language_model.encoder.final_norm.bias": - hf2mg_map[f"ln_f.bias"] = name_param_m[1] - continue - print('hf2mg_map.keys', hf2mg_map.keys()) - - gc.collect() file_format = "pytorch_model-{:05d}-of-{:05d}.bin" file_list = os.listdir(args.output_model_dir) output_mg2hg_path = os.path.join(args.output_model_dir, 'mg2hg') + + hf_model = BloomForCausalLM.from_pretrained(args.output_model_dir, device_map="cpu", torch_dtype="auto") + for name_param_h, name_param_m in zip(hf_model.named_parameters(), model.named_parameters()): + name_param_h[1].data.copy_(name_param_m[1]) + hf_model.save_pretrained(save_dir) + os.makedirs(output_mg2hg_path, exist_ok=True) for filename in file_list: - if filename.startswith("pytorch_model-") and filename.endswith(".bin"): - try: - start_index = len("pytorch_model-") - end_index = filename.index("-of-") - index1 = int(filename[start_index:end_index]) - index2 = int(filename[end_index + len("-of-"): -len(".bin")]) - if 0 <= index1 <= index2 <= 99999: - formatted_filename = file_format.format(index1, index2) - file_path = os.path.join(args.output_model_dir, formatted_filename) - hf_model = torch.load(file_path, map_location=torch.device('cpu')) - for key in hf_model.keys(): - print(key) - if key in hf2mg_map.keys(): - hf_model[key].data.copy_(hf2mg_map[key]) - output_file_path = os.path.join(output_mg2hg_path, formatted_filename) - print(f'save weight to {output_file_path}') - # torch.save(hf_model, output_file_path) - else: - print("Invalid file format:", filename) - except ValueError: - print("Unable to save file:", filename) - elif filename.endswith('.json') or filename.endswith('.model'): + if filename.endswith('.json') or filename.endswith('.model'): source_file = os.path.join(args.output_model_dir, filename) destination_file = os.path.join(output_mg2hg_path, filename) shutil.copyfile(source_file, destination_file) diff --git a/debug/weight_convert/llama.py b/debug/weight_convert/llama.py index 0ae2173c6cd5a1b1ffcb8fd1ee56e58aa05fe646..aa2ef586a5bcd9b670360df8eb85f31f531010ba 100644 --- a/debug/weight_convert/llama.py +++ b/debug/weight_convert/llama.py @@ -24,10 +24,10 @@ import torch_npu def add_arguments(parser): group = parser.add_argument_group(title='Megatron saver') - group.add_argument('--target-tensor-parallel-size', type=int, + group.add_argument('--target-tensor-parallel-size', type=int, default=1, help='Target tensor model parallel size, defaults to the tensor parallel size ' 'in the input checkpoint if provided by the loader, otherwise to 1') - group.add_argument('--target-pipeline-parallel-size', type=int, + group.add_argument('--target-pipeline-parallel-size', type=int, default=1, help='Target tensor model parallel size, default to the pipeline parall size ' 'in the input checkpoint if provided by the loader, otherwise to 1') group.add_argument("--w-pack", type=bool,