diff --git a/.jenkins/test/config/chemistry_config/dependent_packages.yaml b/.jenkins/test/config/chemistry_config/dependent_packages.yaml index 5538514cd4a10c4856ae7c05fea26f887cec358d..14e9969f1471573384288cb69f1b0b2e086a1fa4 100644 --- a/.jenkins/test/config/chemistry_config/dependent_packages.yaml +++ b/.jenkins/test/config/chemistry_config/dependent_packages.yaml @@ -1,2 +1,2 @@ mindspore: - '/mindspore/mindspore/version/202407/20240717/master_20240717220023_a93473380da7c5fb0e8d8bed2c45e9b75dbd0330_newest/' \ No newline at end of file + '/mindspore/mindspore/version/202411/20241129/r2.4.1_20241129194610_0fd8a04edb85b498cc12d9c41216e3e78cbc8564_newest/' \ No newline at end of file diff --git a/MindChemistry/applications/allegro/src/potential.py b/MindChemistry/applications/allegro/src/potential.py index 0cf59c750f6bddb57d603b1bfd827bdc6702a6c0..2c1e950498318f7602752bb80188c6de13d099a8 100755 --- a/MindChemistry/applications/allegro/src/potential.py +++ b/MindChemistry/applications/allegro/src/potential.py @@ -32,24 +32,6 @@ from src.allegro_embedding import AllegroEmbedding _cur_dir = os.getcwd() - -class PotentialForce(Cell): - """Potential_Force - """ - - def __init__(self, potential_net): - super().__init__() - self.potential_net = potential_net - self.grad = ms.grad(self.potential_net, grad_position=1) - - def construct(self, x, pos, edge_index, batch, batch_size): - atom_types = x.reshape(-1, 1) - pos = pos.reshape(-1, 3) - total_energy = self.potential_net(atom_types, pos, edge_index, batch, batch_size) - forces = ops.neg(self.grad(atom_types, pos, edge_index, batch, batch_size)) - return total_energy, forces - - class Potential(Cell): """Potential """ diff --git a/MindChemistry/applications/high_entropy_alloy_design/config.yml b/MindChemistry/applications/high_entropy_alloy_design/config.yml deleted file mode 100644 index e3a4a83d5cc0816d06f7c9899dbbcf54b119cfaa..0000000000000000000000000000000000000000 --- a/MindChemistry/applications/high_entropy_alloy_design/config.yml +++ /dev/null @@ -1,41 +0,0 @@ -train_params: - visualize: True - save_log: True -eval_params: - visualize: True - save_log: True -wae_params: - num_epoch: 20 - batch_size: 20 - lr: 0.0005 - weight_decay: 0.0 - sigma: 8.0 - MMD_lambda: 0.0001 - model_name: 'WAE' - channels: [6, 80, 64, 48, 2] - activation: [True, True, True, False] - layer_norm: [True, True, True, False] - layer_norm_epsilon: 1e-5 -cls_params: - num_epoch: 20 - batch_size: 16 - lr: 0.0001 - num_fold: 5 - weight_decay: 0. - model_name: 'Classifier' - channels: [2, 8, 1] - dropout: [True, False] - activation: [False, True] -ensem_params: - model_name: 'Ensemble' - num_feature: [17, 20] - num_output: 1 - weight_decay: 0.0001 - num_epoch: 5 - num_group: 5 - seed_start: 40 - seed_end: 43 - NN_stage1_dir: '/Invar_inference_NN_stage1.xlsx' - Tree_stage1_dir: '/Invar_inference_GBDT_stage1.xlsx' - NN_stage2_dir: '/Invar_inference_NN_stage2.xlsx' - Tree_stage2_dir: '/Invar_inference_GBDT_stage2.xlsx' diff --git a/MindChemistry/applications/high_entropy_alloy_design/src/__init__.py b/MindChemistry/applications/high_entropy_alloy_design/src/__init__.py deleted file mode 100644 index 0205e7054e7ce87d06ba41161cfbfb0b7f1bbc76..0000000000000000000000000000000000000000 --- a/MindChemistry/applications/high_entropy_alloy_design/src/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright 2023 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""init""" -from .dataset import * -from .model import * -from .module import * -from .trainer import * -from .utils import * - -__all__ = ['HighEntropyAlloy', - 'GenerationModule', 'RankingModule', - 'WAE', 'Classifier', 'MlpModel', 'TreeModel', - 'gaussian_mixture_model', 'sampler', - 'train_wae', 'train_cls', - 'train_mlp', 'train_tree' - ] diff --git a/MindChemistry/applications/high_entropy_alloy_design/src/dataset.py b/MindChemistry/applications/high_entropy_alloy_design/src/dataset.py deleted file mode 100644 index a831088ae57d732e06075c4a361da53eb4e9025e..0000000000000000000000000000000000000000 --- a/MindChemistry/applications/high_entropy_alloy_design/src/dataset.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright 2023 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""create dataset""" -import pandas as pd -import numpy as np - -from sklearn import preprocessing -from sklearn.model_selection import train_test_split - - -class HighEntropyAlloy(): - def __init__(self, root): - super(HighEntropyAlloy, self).__init__() - self.root = root - self.raw_data = pd.read_excel(root + '/data/Data_base.xlsx', header=0) - self.component_name = ['Fe', 'Ni', 'Co', 'Cr', 'V', 'Cu'] - self.property_name = { - 'stage1': ['VEC', 'AR1', 'AR2', 'PE', 'Density', 'TermalC', 'MP', 'FI', 'SI', 'TI', 'M'], - 'stage2': ['TC', 'MS', 'MagS'] - } - self.bins = [18, 35, 48, 109, 202, 234, 525, 687] - - def process_train_gen_data(self): - # load data - gen_data = self.raw_data.iloc[:, 1:19].to_numpy().astype(np.float32) - raw_x = gen_data[:, :6] - raw_y = gen_data[:, 17].reshape(-1, 1) - # generate label - label_y = np.where(raw_y < 5, 1, 0).astype(np.float32) - return raw_x, label_y - - def process_train_rank_data(self, stage_num, seed): - # load data - rank_data_train = self.raw_data[:696] - df_all = rank_data_train.drop(columns=['alloy']) - # filter adopted properties - if stage_num == 1: - feature_name = self.property_name['stage1'] - elif stage_num == 2: - feature_name = self.property_name['stage1'] + self.property_name['stage2'] - # normalize properties - min_max_scaler = preprocessing.MinMaxScaler() - normalized_atomic_properties = min_max_scaler.fit_transform(df_all[feature_name]) - # define input and label - composition = df_all[self.component_name] - raw_x = np.concatenate([composition.values, normalized_atomic_properties], axis=1).astype(np.float32) - y = df_all[['TEC']] - label_y = y.values.astype(np.float32) - # split train and test set with 7-fold stratify - stratify_flag = np.digitize(y.index, self.bins, right=True) - train_x, test_x, train_labels, test_labels = train_test_split(raw_x, label_y, test_size=0.15, - random_state=seed, - stratify=stratify_flag) - return train_x, test_x, train_labels, test_labels - - def process_eval_data(self, stage_num): - # load data - rank_data_test = self.raw_data[696:] - df_all = rank_data_test.drop(columns=['alloy']) - # filter adopted properties - if stage_num == 1: - feature_name = self.property_name['stage1'] - elif stage_num == 2: - feature_name = self.property_name['stage1'] + self.property_name['stage2'] - # define input and label - min_max_scaler = preprocessing.MinMaxScaler() - normalized_atomic_properties = min_max_scaler.fit_transform(df_all[feature_name]) - composition = df_all[self.component_name] - raw_x = np.concatenate([composition.values, normalized_atomic_properties], axis=1).astype(np.float32) - y = df_all[['TEC']] - label_y = y.values.astype(np.float32) - return raw_x, label_y diff --git a/MindChemistry/applications/high_entropy_alloy_design/src/model.py b/MindChemistry/applications/high_entropy_alloy_design/src/model.py deleted file mode 100644 index 33507844d9219acb76eae50f8480d0c10a0763ef..0000000000000000000000000000000000000000 --- a/MindChemistry/applications/high_entropy_alloy_design/src/model.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright 2023 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""define models""" - -from lightgbm import LGBMRegressor - -import mindspore.nn as nn -from mindspore.common import initializer as init -from mindspore.common.initializer import HeNormal - -from mindchemistry import AutoEncoder, FCNet, MLPNet - - -class WAE(nn.Cell): - def __init__(self, params): - super(WAE, self).__init__() - uniform_scale = [init.Uniform(x ** -0.5) for x in params['channels']] - self.wae_model = AutoEncoder(channels=params['channels'], - weight_init=uniform_scale, - has_bias=True, - bias_init=uniform_scale, - has_layernorm=params['layer_norm'], - layernorm_epsilon=1e-5, - has_activation=params['activation'], - act='relu', - out_act='softmax') - - def construct(self, inputs): - return self.wae_model(inputs) - - def encode(self, inputs): - return self.wae_model.encode(inputs) - - def decode(self, inputs): - return self.wae_model.decode(inputs) - - -class Classifier(nn.Cell): - def __init__(self, params): - super(Classifier, self).__init__() - uniform_scale = [init.Uniform(x ** -0.5) for x in params['channels']] - self.cls_model = FCNet(channels=params['channels'], - weight_init=uniform_scale, - has_bias=True, - bias_init=uniform_scale, - has_dropout=params['dropout'], - has_layernorm=False, - has_activation=params['activation'], - act='sigmoid') - - def construct(self, inputs): - return self.cls_model(inputs) - - -class MlpModel(nn.Cell): - def __init__(self, params): - super(MlpModel, self).__init__() - # load BO searched params - num_feature = params['num_feature'][int(params['stage_num']) - 1] - num_output = params['num_output'] - layer_num = int(params['module__w']) - hidden_num = int(params['module__n_hidden']) - # model init - self.mlp_model = MLPNet(in_channels=num_feature, - out_channels=num_output, - layers=layer_num, - neurons=hidden_num, - weight_init=HeNormal(), - has_bias=True, - has_dropout=False, - has_layernorm=False, - has_activation=True, - act=['relu'] * (layer_num - 1)) - - def construct(self, inputs): - return self.mlp_model(inputs) - - -def TreeModel(params): - tree_params = { - "num_leaves": int(round(params['num_leaves'])), - 'min_child_samples': int(round(params['min_child_samples'])), - 'learning_rate': params['learning_rate'], - 'n_estimators': int(round(params['n_estimators'])), - 'max_bin': int(round(params['max_bin'])), - 'colsample_bytree': max(min(params['colsample_bytree'], 1), 0), - 'subsample': max(min(params['subsample'], 1), 0), - 'max_depth': int(round(params['max_depth'])), - 'reg_lambda': max(params['reg_lambda'], 0), - 'reg_alpha': max(params['reg_alpha'], 0), - 'min_split_gain': params['min_split_gain'], - 'min_child_weight': params['min_child_weight'], - 'objective': 'regression', - 'verbose': -1 - } - model = LGBMRegressor(**tree_params) - return model diff --git a/MindChemistry/applications/high_entropy_alloy_design/src/module.py b/MindChemistry/applications/high_entropy_alloy_design/src/module.py deleted file mode 100644 index afc332a509122e0c134ca5b6904c5b2741f2df44..0000000000000000000000000000000000000000 --- a/MindChemistry/applications/high_entropy_alloy_design/src/module.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright 2023 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""2-phase modules""" -import os -import pandas as pd - -from src.dataset import HighEntropyAlloy -from src.model import WAE, Classifier, MlpModel, TreeModel -from src.utils import sampler, gaussian_mixture_model -from src.trainer import train_wae, train_cls, train_mlp, train_tree - - -class GenerationModule(): - """ Generation Module""" - - def __init__(self, wae_params, cls_params): - self.wae_params = wae_params - self.cls_params = cls_params - self.dataset = HighEntropyAlloy(self.wae_params['root']) - self.input, self.label = self.dataset.process_train_gen_data() - self.wae_model = WAE(self.wae_params) - self.cls_model = Classifier(self.cls_params) - - def train(self): - # train WAE - wae_data = self.input - latents = train_wae(self.wae_model, wae_data, self.wae_params) - # train CLS - cls_data = (latents, self.label) - train_cls(self.cls_model, cls_data, self.cls_params) - # generate Gaussian Mixture Model - gm_model = gaussian_mixture_model(latents, self.wae_params) - # generate samples - sampler(gm=gm_model, classifier=self.cls_model, n_samples=5000, sigma=0.5) - - -class RankingModule(): - """Ranking Module""" - - def __init__(self, params): - self.params = params - self.dataset = HighEntropyAlloy(self.params['root']) - self.target_dir = os.path.join(self.params['root'], 'data/') - self.mlp_params_stage1 = pd.read_excel(self.target_dir + self.params['NN_stage1_dir']) - self.mlp_params_stage2 = pd.read_excel(self.target_dir + self.params['NN_stage2_dir']) - self.tree_params_stage1 = pd.read_excel(self.target_dir + self.params['Tree_stage1_dir']) - self.tree_params_stage2 = pd.read_excel(self.target_dir + self.params['Tree_stage2_dir']) - - def train(self): - # train 1st stage ranking models - self.params['stage_num'] = 1 - self.params['model_name'] += str(self.params['stage_num']) - for i in range(self.params['num_group']): - for j in range(self.params['seed_start'], self.params['seed_end']): - data = self.dataset.process_train_rank_data(stage_num=1, seed=j) - self.params.update(self.mlp_params_stage1.iloc[i]) - mlp_model = MlpModel(self.params) - train_mlp(mlp_model, data, j, self.params) - self.params.update(self.tree_params_stage1.iloc[i]) - tree_model = TreeModel(self.params) - train_tree(tree_model, data, j - 10, self.params) - # train 2nd stage ranking models - self.params['stage_num'] = 2 - self.params['model_name'] += str(self.params['stage_num']) - for i in range(self.params['num_group']): - for j in range(self.params['seed_start'], self.params['seed_end']): - data = self.dataset.process_train_rank_data(stage_num=2, seed=j) - self.params.update(self.mlp_params_stage2.iloc[i]) - mlp_model = MlpModel(self.params) - train_mlp(mlp_model, data, j, self.params) - self.params.update(self.tree_params_stage2.iloc[i]) - tree_model = TreeModel(self.params) - train_tree(tree_model, data, j - 10, self.params) diff --git a/MindChemistry/applications/high_entropy_alloy_design/src/trainer.py b/MindChemistry/applications/high_entropy_alloy_design/src/trainer.py deleted file mode 100644 index bedc6cb6ac2cf1277ca8cc19824f3e999ba3c5c1..0000000000000000000000000000000000000000 --- a/MindChemistry/applications/high_entropy_alloy_design/src/trainer.py +++ /dev/null @@ -1,467 +0,0 @@ -# Copyright 2023 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""model trainers""" -import os -import time -import warnings -import stat -import joblib - -import seaborn as sns -import matplotlib.pyplot as plt -from matplotlib.pyplot import MultipleLocator - -import numpy as np -import pandas as pd -from sklearn.model_selection import KFold - -import mindspore as ms -import mindspore.dataset as ds - - -def train_cls(model, data, params): - '''Train classification network''' - # load params - model_name = params['model_name'] - exp_name = params['exp_name'] - num_epoch = params['num_epoch'] - lr = params['lr'] - w_decay = params['weight_decay'] - folder_dir = params['folder_dir'] - # prepare data split - latents, label_y = data - kf = KFold(n_splits=params['num_fold']) - - # model training - train_acc = [] - test_acc = [] - k = 1 - - # prepare training - optimizer = ms.nn.Adam(params=model.trainable_params(), learning_rate=lr, - weight_decay=w_decay) # initialize optimizer - def forward_fn(input_x, label): - y_pred = model(input_x) - loss = ms.ops.binary_cross_entropy(y_pred, label) - return loss, y_pred - - grad_fn = ms.ops.value_and_grad(forward_fn, None, optimizer.parameters) - - @ms.jit() - def train_step(step_x, step_y): - ((step_loss, step_y_pred), grads) = grad_fn(step_x, step_y) - step_loss = ms.ops.depend(step_loss, optimizer(grads)) - return step_loss, step_y_pred - - for train, test in kf.split(latents): - # split train and test data - x_train, x_test, y_train, y_test = latents[train], latents[test], label_y[train], label_y[test] - # prepare train data - train_data = ds.NumpySlicesDataset(data={'x': x_train, 'y': y_train}, shuffle=True) - train_data = train_data.batch(batch_size=params['batch_size']) - train_iterator = train_data.create_dict_iterator() - # prepare save_dir for checkpoint - if not os.path.isdir(folder_dir): - os.mkdir(folder_dir) - warnings.warn('current model file not exists, please check history model training record.') - if params['save_log']: - flags = os.O_RDWR | os.O_CREAT - modes = stat.S_IWUSR | stat.S_IRUSR - train_record = os.open(folder_dir + '/' + model_name + '-' + exp_name + '.txt', flags, modes) - - # start model training - for epoch in range(num_epoch): - start_time = time.time() - epoch_acc = [] - test_epoch_acc = [] - model.set_train(True) - for _, data_ in enumerate(train_iterator): - x = data_['x'] - y = data_['y'] - iter_y_pred = train_step(x, y)[1] - # train accuracy - iter_acc = ms.numpy.equal( - ms.numpy.where(iter_y_pred >= ms.Tensor(0.5), ms.Tensor(1.), ms.Tensor(0.)), - y, ms.float32).mean().asnumpy() - epoch_acc.append(iter_acc) - # test - # prepare test data - test_data = ds.NumpySlicesDataset(data={'x': x_test, 'y': y_test}, shuffle=False) - test_data = test_data.batch(batch_size=len(y_test)) - test_iterator = test_data.create_dict_iterator() - - for _, data_ in enumerate(test_iterator): - x = data_['x'] - y = data_['y'] - test_y_pred = model(x) - # test accuracy - test_iter_acc = ms.numpy.equal( - ms.numpy.where(test_y_pred >= ms.Tensor(0.5), ms.Tensor(1.), ms.Tensor(0.)), - y, ms.float32).mean().asnumpy() - test_epoch_acc.append(test_iter_acc) - # print training info - record = '[{}/{}/{}] train_acc: {:.04f} || test_acc: {:.04f}, time: {:.3f} sec'.format(epoch, - k, - params['num_fold'], - sum(epoch_acc) / - len(epoch_acc), - sum(test_epoch_acc) / - len(test_epoch_acc), - time.time() - - start_time) - print(record) - if params['save_log']: - # save loss record - os.write(train_record, str.encode(record + '\n')) - train_acc_ = sum(epoch_acc) / len(epoch_acc) - test_acc_ = sum(test_epoch_acc) / len(test_epoch_acc) - train_acc.append(train_acc_) - test_acc.append(test_acc_) - k += 1 - record = 'average acc: train_acc: {:.04f} || test_acc: {:.04f}'.format(sum(train_acc) / len(train_acc), - sum(test_acc) / len(test_acc)) - print(record) - # save model checkpoint - save_model_file = str(model_name + ".ckpt") - save_model_dir = os.path.join(folder_dir, save_model_file) - ms.save_checkpoint(model, save_model_dir) - - # save training info - if params['save_log']: - os.write(train_record, str.encode(record + '\n')) - # loss record saved - train_record.close() - - # visualize classifier - if params['visualize']: - plt.figure() - sns.set_style() - plt.xlabel('number of folds') - plt.ylabel('loss') - x = range(1, params['num_fold'] + 1) - sns.set_style("darkgrid") - x_major_locator = MultipleLocator(1) - ax = plt.gca() - plt.plot(x, train_acc) - plt.plot(x, test_acc, linestyle=':', c='steelblue') - plt.legend(["train_accuracy", "test_accuracy"]) - ax.xaxis.set_major_locator(x_major_locator) - plt.savefig(folder_dir + '/binary_classifier.png', dpi=300) - print('=' * 200 + '\n' + 'Training Complete! Model file saved at' + save_model_dir + '\n' + '==' * 200) - - -def imq_kernel(input_x, output_y, h_dim): - '''Compute maximum mean discrepancy using inverse multiquadric kernel''' - batch_size = input_x.shape[0] - norms_x = input_x.pow(2).sum(axis=1, keepdims=True) - prods_x = ms.ops.MatMul()(input_x, input_x.T) - dists_x = norms_x + norms_x.T - 2 * prods_x - norms_y = output_y.pow(2).sum(axis=1, keepdims=True) - prods_y = ms.ops.MatMul()(output_y, output_y.T) - dists_y = norms_y + norms_y.T - 2 * prods_y - dot_prd = ms.ops.MatMul()(input_x, output_y.T) - dists_c = norms_x + norms_y.T - 2 * dot_prd - stats = 0 - for scale in [.1, .2, .5, 1., 2., 5., 10.]: - c = 2 * h_dim * 1.0 * scale - res1 = c / (c + dists_x) - res1 += c / (c + dists_y) - res1 = (1 - ms.ops.eye(batch_size, batch_size, ms.float32)) * res1 - res1 = res1.sum() / (batch_size - 1) - res2 = c / (c + dists_c) - res2 = res2.sum() * 2. / batch_size - stats += res1 - res2 - return stats - - -def get_latents(model, iterator): - model.set_train(mode=False) - latents = [] - for _, data in enumerate(iterator): - x = data['x'] - z = model.encode(x) - latents.append(z.asnumpy().astype(np.float32)) - return np.concatenate(latents, axis=0) - - -def train_wae(model, data, params): - ''' Train WAE generation network''' - # load params - model_name = params['model_name'] - exp_name = params['exp_name'] - num_epoch = params['num_epoch'] - batch_size = params['batch_size'] - sigma = params['sigma'] - mmd_lambda = params['MMD_lambda'] - folder_dir = params['folder_dir'] - lr = params['lr'] - w_decay = params['weight_decay'] - raw_x = data - # prepare train data - train_data = ds.NumpySlicesDataset(data={'x': raw_x[:]}, shuffle=True) - train_data = train_data.batch(batch_size=batch_size) - train_iterator = train_data.create_dict_iterator() - - # prepare save_dir for checkpoint - if not os.path.isdir(folder_dir): - os.mkdir(folder_dir) - warnings.warn('current model file not exists, please check history model training record.') - if params['save_log']: - flags = os.O_RDWR | os.O_CREAT - modes = stat.S_IWUSR | stat.S_IRUSR - train_record = os.open(folder_dir + '/' + model_name + '-' + exp_name + '.txt', flags, modes) - - # prepare model training - optimizer = ms.nn.Adam(params=model.trainable_params(), learning_rate=lr, - weight_decay=w_decay) - - def forward_fn(x): - recon_x, z_tilde = model(x) - z = sigma * ms.ops.StandardNormal()(z_tilde.shape) - recon_loss = ms.ops.binary_cross_entropy(recon_x, x) - mmd_loss = imq_kernel(z_tilde, z, h_dim=2) - mmd_loss = mmd_loss / x.shape[0] - return recon_loss, mmd_loss * mmd_lambda - - grad_fn = ms.ops.value_and_grad(forward_fn, None, optimizer.parameters) - - @ms.jit() - def train_step(x): - ((step_recon_loss, step_mmd_loss), grads) = grad_fn(x) - step_loss = step_recon_loss + step_mmd_loss - step_loss = ms.ops.depend(step_loss, optimizer(grads)) - return step_loss, step_recon_loss, step_mmd_loss / mmd_lambda - - # start model training - loss_ = [] - for epoch in range(num_epoch): - start_time = time.time() - epoch_loss = [] - epoch_recon = [] - epoch_mmd = [] - model.set_train(True) - for _, data_ in enumerate(train_iterator): - data_x = data_['x'] - (iter_loss, iter_recon_loss, iter_mmd_loss) = train_step(data_x) - epoch_loss.append(iter_loss.asnumpy()) - epoch_recon.append(iter_recon_loss.asnumpy()) - epoch_mmd.append(iter_mmd_loss.asnumpy()) - # loss record - avg_loss = np.sum(epoch_loss) / len(epoch_loss) - avg_recon = np.sum(epoch_recon) / len(epoch_recon) - avg_mmd = np.sum(epoch_mmd) / len(epoch_mmd) - loss_.append(avg_loss) - - # print training info - record = '[{:03}/{:03}] Total_loss: {:.6f} Recon_loss: {:.6f}, MMD_loss:{:.6f}, time: {:.3f} sec'.format( - epoch + 1, - num_epoch, - avg_loss, - avg_recon, - avg_mmd, - time.time() - start_time) - print(record) - - # save training info - if params['save_log']: - # save loss record - os.write(train_record, str.encode(record + '\n')) - - # save model checkpoint - save_model_file = str(model_name + ".ckpt") - save_model_dir = os.path.join(folder_dir, save_model_file) - ms.save_checkpoint(model, save_model_dir) - # save training info - if params['save_log']: - # loss record saved - train_record.close() - - # prepare test data - sampler = ds.SequentialSampler() - test_data = ds.NumpySlicesDataset(data={'x': raw_x[:]}, sampler=sampler) - test_data = test_data.batch(batch_size=2) - test_iterator = test_data.create_dict_iterator() - # save generated latents for GM eval - latents = get_latents(model, test_iterator) - latents_ = pd.DataFrame(latents) - latents_.to_csv(folder_dir + '/latents.csv', index=False) - - # visualize latent space - if params['visualize']: - sns.set_style('ticks') - # assign different colors to alloy with and without Copper, - low_cu = raw_x[:, 5] < 0.05 - low_cu_latent = latents[low_cu] - high_cu = raw_x[:, 5] >= 0.05 - high_cu_latent = latents[high_cu] - fig, axs = plt.subplots(figsize=(3, 3), dpi=200) - axs.set_yticks(np.arange(-6, 8, step=2)) - axs.set_xticks(np.arange(-10, 5, step=2)) - axs.set_yticklabels(np.arange(-6, 8, step=2), fontsize=7) - axs.set_xticklabels(np.arange(-10, 5, step=2), fontsize=7) - for axis in ['top', 'bottom', 'left', 'right']: - axs.spines[axis].set_linewidth(1.) - axs.tick_params(axis='both', which='major', top=False, labeltop=False, direction='out', width=1., length=4) - axs.tick_params(axis='both', which='major', right=False, labelright=False, direction='out', width=1., length=4) - - axs.scatter(low_cu_latent[:, 0], low_cu_latent[:, 1], c='steelblue', alpha=.55, s=8, linewidths=0, - label='Alloys w/o Cu') - axs.scatter(high_cu_latent[:, 0], high_cu_latent[:, 1], c='firebrick', alpha=.65, s=14, linewidths=0, - marker='^', label='Alloys w/ Cu') - handles, labels = axs.get_legend_handles_labels() - handles = handles[::1] - labels = labels[::1] - legend_properties = {'size': 7.5} - axs.legend(handles, labels, loc='upper right', bbox_to_anchor=(1.015, 1.017), handletextpad=-0.3, frameon=False, - prop=legend_properties) - fig.savefig(folder_dir + '/latents.tif', bbox_inches='tight', pad_inches=0.01) - print('=' * 200 + '\n' + 'Training Complete! Model file saved at' + save_model_dir + '\n' + '==' * 200) - return latents - - -def train_mlp(model, data, seed, params): - ''' Train MLP ranking network''' - # load params: - w_decay = params['weight_decay'] - num_epoch = params['num_epoch'] - folder_dir = params['folder_dir'] - model_name = 'MLP_' + params['model_name'] - exp_name = params['exp_name'] - batch_size = int(params['batch_size']) - lr = params['lr'] - search_params_no = int(params['no']) - # prepare train data - train_x, test_x, train_labels, test_labels = data - train_data = ds.NumpySlicesDataset(data={'x': train_x, 'y': train_labels}, shuffle=True) - train_data = train_data.batch(batch_size=batch_size) - train_iterator = train_data.create_dict_iterator() - # prepare save_dir for checkpoint - if not os.path.isdir(folder_dir): - os.mkdir(folder_dir) - warnings.warn('current model file not exists, please check history model training record.') - if params['save_log']: - flags = os.O_RDWR | os.O_CREAT - modes = stat.S_IWUSR | stat.S_IRUSR - train_record = os.open(folder_dir + '/' + model_name + '-' + exp_name + '.txt', flags, modes) - - # prepare model training - optimizer = ms.nn.Adam(params=model.trainable_params(), learning_rate=lr, - weight_decay=w_decay) - - def forward_fn(x, y): - y_predict = model(x) - forward_loss = (y_predict - y).square().mean() - return forward_loss - - grad_fn = ms.ops.value_and_grad(forward_fn, None, optimizer.parameters) - - @ms.jit() - def train_step(x, y): - (step_loss, grads) = grad_fn(x, y) - step_loss = ms.ops.depend(step_loss, optimizer(grads)) - return step_loss - - # start model training - epoch_losses = [] - for epoch in range(num_epoch): - start_time = time.time() - iter_losses = [] - model.set_train(True) - for _, data_ in enumerate(train_iterator): - data_x = data_['x'] - data_y = data_['y'] - iter_loss = train_step(data_x, data_y) - iter_losses.append(iter_loss.asnumpy()) - # train loss - epoch_loss = np.mean(iter_losses) - epoch_losses.append(epoch_loss) - - # eval - # prepare test data - test_data = ds.NumpySlicesDataset(data={'x': test_x, 'y': test_labels}) - test_data = test_data.batch(batch_size=len(test_labels)) - test_iterator = test_data.create_dict_iterator() - model.set_train(False) - for _, data_ in enumerate(test_iterator): - data_x_ = data_['x'] - data_y_ = data_['y'] - y_predict_ = model(data_x_) - # test loss - test_loss = (y_predict_ - data_y_).square().mean() - - # print training info - record = '[{:03}/{:03}] train loss: {:.6f} , test loss: {:.6f}, time: {:.3f} sec'.format( - epoch + 1, - num_epoch, - epoch_loss, - test_loss.asnumpy(), - time.time() - start_time) - print(record) - # save training info - if params['save_log']: - os.write(train_record, str.encode(record + '\n')) - - # save model checkpoint - save_model_file = str(model_name + "_{}_{}.ckpt".format(seed, search_params_no)) - save_model_dir = os.path.join(folder_dir, save_model_file) - ms.save_checkpoint(model, save_model_dir) - # save training info - if params['save_log']: - # loss record saved - train_record.close() - print('=' * 200 + '\n' + 'Training Complete! Model file saved at' + save_model_dir + '\n' + '==' * 200) - - -def train_tree(model, data, seed, params): - ''' Train Tree ranking network''' - # load params - folder_dir = params['folder_dir'] - model_name = 'Tree_' + params['model_name'] - exp_name = params['exp_name'] - search_params_no = int(params['no']) - - # prepare train data - train_features, test_features, train_labels, test_labels = data - train_labels, test_labels = train_labels.reshape(-1), test_labels.reshape(-1) - # prepare save_dir for checkpoint - if not os.path.isdir(folder_dir): - os.mkdir(folder_dir) - warnings.warn('current model file not exists, please check history model training record.') - if params['save_log']: - flags = os.O_RDWR | os.O_CREAT - modes = stat.S_IWUSR | stat.S_IRUSR - train_record = os.open(folder_dir + '/' + model_name + '-' + exp_name + '.txt', flags, modes) - - # start model training - model.fit(train_features, train_labels) - - # save model checkpoint - save_model_file = str(model_name + "_{}_{}.pkl".format(seed, search_params_no)) - save_model_dir = os.path.join(folder_dir, save_model_file) - joblib.dump(model, save_model_dir) - - # model testing - preds = model.predict(test_features) - test_loss = np.mean(np.square((preds - test_labels))) - - # print training info - record = '[Gradient boosting decision tree test loss: {:.6f}'.format(test_loss) - print(record) - # save training info - if params['save_log']: - os.write(train_record, str.encode(record + '\n')) - # loss record saved - train_record.close() - print('=' * 200 + '\n' + 'Training Complete! Model file saved at' + save_model_dir + '\n' + '==' * 200) diff --git a/MindChemistry/applications/high_entropy_alloy_design/src/utils.py b/MindChemistry/applications/high_entropy_alloy_design/src/utils.py deleted file mode 100644 index 94eccaa770cd3994db0ee2b3c24f97626b52e22d..0000000000000000000000000000000000000000 --- a/MindChemistry/applications/high_entropy_alloy_design/src/utils.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2023 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""utils for 1st phase generation""" -import numpy as np -import matplotlib.pyplot as plt -import seaborn as sns - -from sklearn.mixture import GaussianMixture -from matplotlib.patches import Ellipse - -import mindspore as ms - - -def draw_ellipse(position, covariance, ax=None, **kwargs): - ax = ax or plt.gca() - if covariance.shape == (2, 2): - u, s, _ = np.linalg.svd(covariance) - angle = np.degrees(np.arctan2(u[1, 0], u[0, 0])) - width, height = 2 * np.sqrt(s) - else: - angle = 0 - width, height = 2 * np.sqrt(covariance) - for nsig in range(1, 4): - ax.add_patch(Ellipse(position, nsig * width, nsig * height, - angle, **kwargs)) - - -def plot_gmm(gm, latents): - fig, axs = plt.subplots(1, 1, figsize=(2, 2), dpi=200) - ax = axs or plt.gca() - ax.scatter(latents[:, 0], latents[:, 1], s=5, zorder=2) - ax.axis('equal') - - w_factor = 0.2 / gm.weights_.max() - for pos, covar, w in zip(gm.means_, gm.covariances_, gm.weights_): - draw_ellipse(pos, covar, alpha=0.75 * w * w_factor, facecolor='slategrey', zorder=-10) - - -def gaussian_mixture_model(latents, params): - gm = GaussianMixture(n_components=4, random_state=0, init_params='kmeans').fit(latents) - print('Average negative log likelihood:', -1 * gm.score(latents)) - if params['visualize']: - plot_gmm(gm, latents) - scores = [] - for i in range(1, 8): - gm = GaussianMixture(n_components=i, random_state=0, init_params='kmeans').fit(latents) - scores.append(-1 * gm.score(latents)) - sns.set_style("darkgrid") - plt.figure() - plt.scatter(range(1, 8), scores, color='green') - plt.plot(range(1, 8), scores) - plt.savefig(params['folder_dir'] + '/gaussian_mixture_model.png', format='png', dpi=300) - plt.show() - return gm - - -def sampler(gm, classifier, n_samples, sigma=0.1): - sample_z = [] - z = gm.sample(1)[0] - for i in range(n_samples): - uniform_rand = np.random.uniform(size=1) - z_next = np.random.multivariate_normal(z.squeeze(), sigma * np.eye(2)).reshape(1, -1) - z_combined = np.concatenate((z, z_next), axis=0) - scores = classifier(ms.Tensor(z_combined, ms.float32)).asnumpy().squeeze() - z_score, z_next_score = np.log(scores[0]), np.log(scores[1]) - z_prob, z_next_prob = (gm.score(z) + z_score), (gm.score(z_next) + z_next_score) - acceptance = min(0, (z_next_prob - z_prob)) - if i == 0: - sample_z.append(z.squeeze()) - - if np.log(uniform_rand) < acceptance: - sample_z.append(z_next.squeeze()) - z = z_next - else: - pass - return np.stack(sample_z) diff --git a/MindChemistry/applications/high_entropy_alloy_design/train.py b/MindChemistry/applications/high_entropy_alloy_design/train.py deleted file mode 100644 index 89429ba776757dfd263d2232291e33ee725c7daf..0000000000000000000000000000000000000000 --- a/MindChemistry/applications/high_entropy_alloy_design/train.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright 2023 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""train process""" -import os -import io -import warnings -import datetime -import argparse -import yaml - -import mindspore as ms - -from src import GenerationModule, RankingModule - -if __name__ == "__main__": - # set params - # load arg parser - parser = argparse.ArgumentParser(description="Argparser for Train") - parser.add_argument("-s", "--stage", type=list, default=[2]) - parser.add_argument("-r", "--root", type=str, default=os.path.abspath('.')) - parser.add_argument("-n", "--exp_name", type=str, default=str(datetime.datetime.now())[:19].replace(" ", "-")) - parser.add_argument("-m", "--mode", default=ms.GRAPH_MODE) - parser.add_argument("-dt", "--device_target", type=str, default='Ascend') - parser.add_argument("-di", "--device_id", type=int, default=6) - parser.add_argument("-c", "--config_path", type=str, default=os.path.abspath('.') + '/config.yml') - args = parser.parse_args() - - # load config - with io.open(args.config_path, 'r') as stream: - params = yaml.safe_load(stream) - train_params = params['train_params'] - wae_params = params['wae_params'] - cls_params = params['cls_params'] - ensem_params = params['ensem_params'] - train_params['root'] = args.root - train_params['exp_name'] = args.exp_name - - # create save directory for current experiment - exp_dir = os.path.join(args.root, 'save_dir', args.exp_name) - if not os.path.isdir(exp_dir): - os.mkdir(exp_dir) - else: - warnings.warn('Current experiment file exists.') - train_params['folder_dir'] = exp_dir - - # set context - ms.set_context(mode=args.mode, device_target=args.device_target, device_id=args.device_id) - - # generation model train: - if 1 in args.stage: - # set generation models trainer - wae_params.update(train_params) - cls_params.update(train_params) - gen_trainer = GenerationModule(wae_params, cls_params) - # generation models training - gen_trainer.train() - - # ranking model train: - if 2 in args.stage: - # set ranking model trainer - ensem_params.update(train_params) - rank_trainer = RankingModule(ensem_params) - # ranking model training - rank_trainer.train() diff --git a/tests/st/mindchemistry/cell/test_cdvae/test_cdvae.py b/tests/st/mindchemistry/cell/test_cdvae/test_cdvae.py index a1afd77aefc60444421ffb26fd78336d40e982d4..8751dbaa8c86dc5023f56b970afbf9940e436627 100644 --- a/tests/st/mindchemistry/cell/test_cdvae/test_cdvae.py +++ b/tests/st/mindchemistry/cell/test_cdvae/test_cdvae.py @@ -21,10 +21,8 @@ import pytest import mindspore as ms from mindspore import context, Tensor, mint -from mindchemistry.cell import CDVAE from mindchemistry.cell.dimenet.dimenet_wrap import DimeNetWrap from mindchemistry.cell.gemnet.gemnet_wrap import GemNetWrap -from mindchemistry.cell.gemnet.data_utils import StandardScalerMindspore ms.set_seed(1234) np.random.seed(1234) @@ -34,58 +32,6 @@ context.set_context(mode=context.PYNATIVE_MODE) config_path = "./configs.yaml" data_config_path = "./perov_5.yaml" - -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_onecard -def test_cdvae(): - """ - Feature: Test CDVAE in platform ascend. - Description: The forward output should has expected shape. - Expectation: Success or throw AssertionError. - """ - - cdvae_model = CDVAE(config_path, data_config_path) - - # input data - batch_size = 2 - atom_types = Tensor([6, 7, 6, 8], ms.int32) - dist = Tensor([1.4, 1.7, 1.8, 1.9, 2.0, 2.1, 1.8, 1.6], ms.float32) - idx_kj = Tensor([0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0, 7, 6, 6, 7], ms.int32) - idx_ji = Tensor([1, 0, 3, 2, 5, 4, 4, 5, 2, 3, 0, 1, 6, 7, 7, 6], ms.int32) - edge_j = Tensor([0, 1, 1, 0, 2, 3, 3, 2], ms.int32) - edge_i = Tensor([1, 0, 0, 1, 3, 2, 2, 3], ms.int32) - batch = Tensor([0, 0, 1, 1], ms.int32) - lengths = Tensor([[2.5, 2.5, 2.5], - [2.5, 2.5, 2.5]], ms.float32) - angles = Tensor([[90.0, 90.0, 90.0], - [90.0, 90.0, 90.0]], ms.float32) - frac_coords = Tensor([[0.0, 0.0, 0.0], - [0.5, 0.5, 0.5], - [0.7, 0.7, 0.7], - [0.5, 0.5, 0.5]], ms.float32) - num_atoms = Tensor([2, 2], ms.int32) - y = Tensor([0.08428, 0.01353], ms.float32) - total_atoms = 4 - np.random.seed(1234) - sbf = Tensor(np.random.randn(16, 42), ms.float32) - cdvae_model.lattice_scaler = StandardScalerMindspore( - Tensor([2.5, 2.5, 2.5, 90.0, 90.0, 90.0], ms.float32), - Tensor([1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ms.float32)) - cdvae_model.scaler = StandardScalerMindspore( - Tensor([2.62], ms.float32), - Tensor([1.0], ms.float32)) - - out = cdvae_model(atom_types, dist, - idx_kj, idx_ji, edge_j, edge_i, - batch, lengths, num_atoms, - angles, frac_coords, y, batch_size, - sbf, total_atoms, False, True) - - assert mint.isclose(out, ms.Tensor(29.453514), rtol=1e-4, atol=1e-4), f"For `CDVAE`, the output should be\ - 29.4535, but got {out}." - - @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard diff --git a/tests/st/mindchemistry/cell/test_diffcsp.py b/tests/st/mindchemistry/cell/test_diffcsp.py index 7756c89f4226a35413990b58564804b45c9763eb..8f279bd53e225a1ff59936c620263abe7acc4712 100644 --- a/tests/st/mindchemistry/cell/test_diffcsp.py +++ b/tests/st/mindchemistry/cell/test_diffcsp.py @@ -26,8 +26,8 @@ from mindchemistry.cell import CSPNet from mindchemistry.graph.graph import LiftGlobalToNode from mindchemistry.graph.loss import L2LossMask -LTOL = 0.6 -FTOL = 0.6 +LTOL = 0.7 +FTOL = 0.7 class SinusoidalTimeEmbeddings(nn.Cell): """ Embedding for the time step in diffution.