diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/.keep b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/LICENSE b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..5ea8a5f7b6ae91ebb12b7f2fa71a5432bb89de63 --- /dev/null +++ b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/LICENSE @@ -0,0 +1,284 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +------------------ +Files: third_party/compute_library/... + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------------ +Files: ACKNOWLEDGEMENTS +LICENSE + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------ +Files: third_party/hexagon + +Copyright (c) 2016-2019, The Linux Foundation. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted (subject to the limitations in the +disclaimer below) provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of The Linux Foundation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE +GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT +HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/ModelZoo_level.txt b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/ModelZoo_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..319c4d1963a64bdc5cd00cebcfd015b2935cb760 --- /dev/null +++ b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/ModelZoo_level.txt @@ -0,0 +1,4 @@ +ModelConvert:OK +QuantStatus:OK +FuncStatus:OK +PerfStatus:OK diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/README.md b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8a138041eff0b2ffe588fce028580252b9724855 --- /dev/null +++ b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/README.md @@ -0,0 +1,117 @@ +# README + +## 1、关于项目 +OOD是一个细菌检测网络,其主要特点是提出Likelihood Ratios并用以提高网络对于分布外细菌检测的准确性 + +论文: [paper](https://arxiv.org/abs/1906.02845) + +论文源代码: [code](https://github.com/google-research/google-research/tree/master/genomics_ood) + +## 2、代码结构 +``` + + +├── config +│ ├── om_in.sh #msame推理in_val_data +│ ├── om_ood.sh #msame推理ood_val_data +│ └── pb2om.sh #act命令 +├── log +│ ├── bin_data #存放转化为的bin格式的数据 +│ ├── in_val_data #分布内数据 +│ └── ood_val_data #分布外数据 +│ ├── ckpt_file #存放ckpt文件 +│ ├── frozen_pb_file #存放冻结后的pb模型 +│ ├── om_file #存放om模型 +│ ├── original_data #存放原始数据 +│ ├── between_2011-2016_in_val #分布内数据 +│ └── between_2011-2016_ood_val #分布外数据 +│ ├── pb_file #存放pb模型 +│ └── result #存放离线推理结果的文件夹 +├── auroc.py #根据推理结果计算模型最终指标AUROC +├── ckpt2pb.py #将ckpt文件转化为pb文件 +├── data2bin.py #将原始数据转化为bin格式 +├── generative.py #网络结构,用于ckpt转pb +├── LICENSE +├── modelzoo_level.txt +├── msame #msame,具体的环境需要自己编译 +├── README.md +├── requirements.txt #需要安装的第三方库文件 +└── utils.py + + +``` +## 3、关于测试集和模型 + +测试集放在./log/original_data/下。 +测试数据集和模型文件都放在百度网盘上:[百度网盘链接](https://pan.baidu.com/s/1wDSn-rkcyE2Hjr6lQCxw9w?pwd=mqpt) +提取码:mqpt +百度网盘中对应目录下也包含已成功转换的pb、om等模型及bin数据文件 +## 4、pb模型 + +原始ckpt文件下载后,放在路径log/ckpt_file/目录下,使用的是ckpt-218000。执行以下命令转换为pb模型并生成冻结后的pb模型。 + +``` +python3 ckpt2pb.py +``` +ckpt2pb.py中固定了pb模型(且用于pb转om)的输出目录:./log/pb_file/ \ +冻结后的pb模型的输出目录:./log/frozen_pb_file/ + + +## 5、生成om模型。 + +使用atc命令将pb模型转换为om模型,执行以下命令转换为om模型。 + +``` +bash ./config/pb2om.sh +``` + +注意配置对应的文件路径,soc_version默认是Ascend910 + +## 6、测试集内文件转换为bin文件 + +执行以下命令将测试集数据经过处理转换为用于网络输入的bin文件。data2bin.py文件中固定了生成bin文件存放的目录为./log/bin_data。 +测试数据集分为分布内数据(存放目录为./log/original_data/between_2011-2016_in_val)和分布外数据(存放目录为./log/original_data/between_2011-2016_in_val) +这两部分数据均包含10,000个样本,每100个样本生成一个.bin文件。 +使用如下命令转换分布内数据 +``` +python3 data2bin.py --in_val_data=True --out_dir ./log/bin_data/in_val_data +``` +使用如下命令转换分布外数据 +``` +python3 data2bin.py --in_val_data=False --out_dir ./log/bin_data/ood_val_data +``` +转换得到的bin文件分别存放于目录./log/bin_data/in_val_data(分布内数据)和./log/bin_data/ood_val_data(分布外数据) +## 7、使用om模型进行推理 + +使用msame工具进行推理。参考[msame简介](https://gitee.com/ascend/tools/tree/master/msame),参考命令如下。 +推理分布内数据 + +``` +bash config/om_in.sh +``` +推理得到结果存放在目录./log/result,注意将结果目录更名为in_val_result,否则须在auroc.py中更改文件路径 + +推理分布外数据 +``` +bash config/om_ood.sh +``` +推理得到结果存放在目录./log/result,注意将结果目录更名为ood_val_result,否则须在auroc.py中更改文件路径 +注意配置对应的文件路径 + + +## 8、om模型离线推理性能 + +每一百个样本推理的平均运行性能为858.35 ms。 + + +## 9、om模型离线推理结果对比NPU及GPU + +执行以下命令得到模型衡量指标AUROC数值 + +``` + python3 auroc.py +``` +| | 论文 | GPU | NPU | om | +|-------|------|------|------|------| +| AUROC | 0.626 | 0.677 | 0.641 | 0.665 | + diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/auroc.py b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/auroc.py new file mode 100644 index 0000000000000000000000000000000000000000..f5c342609c394db10f2aed92b85184e0ef2809b7 --- /dev/null +++ b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/auroc.py @@ -0,0 +1,69 @@ +# coding=utf-8 +# Copyright 2022 The Google Research Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import tensorflow as tf +import os + +from numpy import mean +from sklearn.metrics import roc_auc_score + +in_val_output_dir = "./log/result/in_val_result" +ood_val_output_dir ="./log/result/ood_val_result" + +def main(): + #in-distribution validation + in_val_output_file_list = [ + os.path.join(in_val_output_dir, x) + for x in tf.gfile.ListDirectory(in_val_output_dir) + if '.txt' in x + ] + + # ood validation + ood_val_output_file_list = [ + os.path.join(ood_val_output_dir, x) + for x in tf.gfile.ListDirectory(ood_val_output_dir) + if '.txt' in x + ] + i = 0 + auc_list = [] + while(i<100): + in_loss_i = [] + ood_loss_i = [] + with open(in_val_output_file_list[i], 'r') as f: + in_data = f.readlines() # txt中所有字符串读入data,得到的是一个list + # 对list中的数据做分隔和类型转换 + for line in in_data: + line_data = line.split() + in_loss_i = list(map(float, line_data)) + with open(ood_val_output_file_list[i], 'r') as f: + ood_data = f.readlines() # txt中所有字符串读入data,得到的是一个list + # 对list中的数据做分隔和类型转换 + for line in ood_data: + line_data = line.split() + ood_loss_i = list(map(float, line_data)) + # auc using raw likelihood, larger for OOD + neg = np.array(in_loss_i) + pos = np.array(ood_loss_i) + auc = roc_auc_score([0] * neg.shape[0] + [1] * pos.shape[0], + np.concatenate((neg, pos), axis=0)) + auc_list.append(auc) + i = i+1 + + print(mean(auc_list)) + + + +if __name__ == '__main__': + main() diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/ckpt2pb.py b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/ckpt2pb.py new file mode 100644 index 0000000000000000000000000000000000000000..23518e52177a5a79d2ee0c2b855f7ef7d4d7afd2 --- /dev/null +++ b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/ckpt2pb.py @@ -0,0 +1,106 @@ +# coding=utf-8 +# Copyright 2022 The Google Research Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf +from tensorflow.python.tools import freeze_graph +from npu_bridge.estimator import npu_ops +from tensorflow.python.framework import graph_util +#from ops import * +#from data import * +#from net import * +#from utils import * +# 添加导入NPU库的头文件 +import npu_bridge +from npu_bridge.npu_init import * +from npu_bridge.estimator.npu import util +from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig + +from absl import flags +from tensorflow.contrib import training as contrib_training +from generative import SeqModel +from generative import load_datasets +from generative import create_out_dir + +# parameters +FLAGS = tf.app.flags.FLAGS + +flags.DEFINE_string('ckpt_path', './log/ckpt_file/model_218000.ckpt', + 'directory of ckpt') +FLAGS = tf.app.flags.FLAGS +config = tf.ConfigProto() +# keras迁移添加内容 +custom_op = config.graph_options.rewrite_options.custom_optimizers.add() +custom_op.name = "NpuOptimizer" +custom_op.parameter_map["use_off_line"].b = True +config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF +config.graph_options.rewrite_options.remapping = RewriterConfig.OFF # 必须显示关闭remap + + +#ckpt_path = "./log/ckpt_file/model_218000.ckpt" + + +def main(): + tf.reset_default_graph() + ckpt_path = FLAGS.ckpt_path + params = contrib_training.HParams( + num_steps=FLAGS.num_steps, + val_freq=FLAGS.val_freq, + seq_len=FLAGS.seq_len, + batch_size=FLAGS.batch_size, + emb_variable=FLAGS.emb_variable, + emb_size=FLAGS.emb_size, + vocab_size=4, + hidden_lstm_size=FLAGS.hidden_lstm_size, + norm_lstm=FLAGS.norm_lstm, + dropout_rate=FLAGS.dropout_rate, + learning_rate=FLAGS.learning_rate, + reg_type=FLAGS.reg_type, + reg_weight=FLAGS.reg_weight, + out_dir=FLAGS.out_dir, + in_tr_data_dir=FLAGS.in_tr_data_dir, + in_val_data_dir=FLAGS.in_val_data_dir, + ood_val_data_dir=FLAGS.ood_val_data_dir, + master=FLAGS.master, + save_meta=FLAGS.save_meta, + filter_label=FLAGS.filter_label, + mutation_rate=FLAGS.mutation_rate, + ) + create_out_dir(params) + x_in =tf.placeholder(tf.int32, shape=[100, 250], name="x_in") + y_in =tf.placeholder(tf.int32, shape=[100, ], name="y_in") + model = SeqModel(params, x_in, y_in) + model.reset() + loss_i = tf.identity(model.loss_i, name='loss_i') + with tf.Session(config=config) as sess: + graph_def = tf.get_default_graph().as_graph_def(add_shapes=True) + node_list = [n.name for n in graph_def.node] + for node in node_list: + print("node_name", node) + tf.train.write_graph(sess.graph_def, './log/frozen_pb_file', 'model.pb') + freeze_graph.freeze_graph( + input_graph='./log/frozen_pb_file/model.pb', + input_saver='', + input_binary=False, + input_checkpoint=ckpt_path, + output_node_names='loss_i', + restore_op_name='save/restore_all', + filename_tensor_name='save/Const:0', + output_graph='./log/pb_file/result.pb', + clear_devices=False, + initializer_nodes='') + print("done") + + +if __name__ == '__main__': + main() diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/config/om_in.sh b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/config/om_in.sh new file mode 100644 index 0000000000000000000000000000000000000000..d4b93c07f74f7cdc7bf85e1c12e6d84932f201b3 --- /dev/null +++ b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/config/om_in.sh @@ -0,0 +1 @@ +./msame --model "./log/om_file/result.om" --input "./log/bin_data/in_val_data/" --output "./log/result/" --outfmt TXT diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/config/om_ood.sh b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/config/om_ood.sh new file mode 100644 index 0000000000000000000000000000000000000000..e58e427b0eb20c90d3a6d80fe362dd78008d56da --- /dev/null +++ b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/config/om_ood.sh @@ -0,0 +1 @@ +./msame --model "./log/om_file/result.om" --input "./log/bin_data/ood_val_data/" --output "./log/result/" --outfmt TXT diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/config/pb2om.sh b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/config/pb2om.sh new file mode 100644 index 0000000000000000000000000000000000000000..12fe9e8a5859146d6a10e7c2e5a02ff9e497bec4 --- /dev/null +++ b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/config/pb2om.sh @@ -0,0 +1,8 @@ +export PATH=/usr/local/python3.7.5/bin:$PATH +export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/atc/python/site-packages/te:$PYTHONPATH +export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/atc/lib64:${LD_LIBRARY_PATH} + +/usr/local/Ascend/ascend-toolkit/latest/atc/bin/atc --model=./log/pb_file/result.pb --framework=3 --output=./log/om_file/result --soc_version=Ascend910 \ + --input_shape="x_in:100,250" \ + --log=info \ + --out_nodes="loss_i:0" diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/data2bin.py b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/data2bin.py new file mode 100644 index 0000000000000000000000000000000000000000..d44c3f2f7bb3294fdd9217c43d924d53e9e676af --- /dev/null +++ b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/data2bin.py @@ -0,0 +1,90 @@ +# coding=utf-8 +# Copyright 2022 The Google Research Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import absl.app as app +import absl.flags as flags + +import numpy +import tensorflow as tf +import utils + +# in_val_data_dir = r"C:\Users\zhj\Desktop\study_files\CANN\llr_ood_genomics\between_2011-2016_in_val" +# ood_val_data_dir = r"C:\Users\zhj\Desktop\study_files\CANN\llr_ood_genomics\between_2011-2016_ood_val" +FLAGS = tf.app.flags.FLAGS +flags.DEFINE_boolean( + 'in_val_data', True, + 'False is ood_val_data, True is in_val_data') +flags.DEFINE_string('out_dir', './log/bin_data/in_val_data', + 'Directory where to write log and models.') +in_val_data_dir = r"./log/original_data/between_2011-2016_in_val" +ood_val_data_dir = r"./log/original_data/between_2011-2016_ood_val" +in_val_file_pattern = 'in_val' +ood_val_file_pattern = 'ood_val' + +def main(): + num_epoch=1 + batch_size=1 + in_val_data = FLAGS.in_val_data + out_dir = FLAGS.out_dir + #in_val_num = 999774 + #ood_val_num = 6000000 + if in_val_data: + data_dir = in_val_data_dir + file_pattern = in_val_file_pattern + else: + data_dir = ood_val_data_dir + file_pattern = ood_val_file_pattern + + data_file_list = [ + os.path.join(data_dir, x) + for x in tf.gfile.ListDirectory(data_dir) + if file_pattern in x and '.tfrecord' in x + ] + tf.logging.info('data_file_list=%s', data_file_list) + dataset = tf.data.TFRecordDataset( + data_file_list).map(lambda v: utils.parse_single_tfexample(v, v)) + + dataset = dataset.repeat(num_epoch) + dataset = dataset.batch(batch_size) # Batch size to use + iterator = dataset.make_one_shot_iterator() + features = iterator.get_next() + + val_x = [] + val_y = [] + + + i=1 + + with tf.Session() as sess: + try: + while(i<10001): + x, y = sess.run([features['x'], features['y']]) + val_x.append(x) + val_y.append(y) + if i%100==0: + val_x = numpy.array(val_x) + val_x = val_x.reshape([100, 250]) + val_y = numpy.array(val_y) + val_y = val_y.reshape([100, 1]) + val_x.tofile(os.path.join(out_dir, "{}.bin".format(i))) + val_x = [] + val_y = [] + print(i) + i = i+1 + except tf.errors.OutOfRangeError: + print(i) + +if __name__ == '__main__': + main() diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/generative.py b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/generative.py new file mode 100644 index 0000000000000000000000000000000000000000..80851ef01350e71266903b62b464c2ca37b9f364 --- /dev/null +++ b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/generative.py @@ -0,0 +1,539 @@ +# coding=utf-8 +# Copyright 2022 The Google Research Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Build an autoregressive generative model for DNA sequences.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from npu_bridge.npu_init import * +import json +import os +import random +from absl import flags +import numpy as np +from sklearn.metrics import roc_auc_score +import tensorflow as tf +import utils +from tensorflow.contrib import rnn as contrib_rnn +from tensorflow.contrib import training as contrib_training + +import time + +# parameters +FLAGS = flags.FLAGS + +flags.DEFINE_integer('random_seed', 1234, 'The random seed') +flags.DEFINE_integer('batch_size', 100, 'The number of images in each batch.') +flags.DEFINE_integer('num_steps', 1000000, 'The number of trainig steps') +flags.DEFINE_integer('val_freq', 1000, 'How often to eval validation (# steps)') +flags.DEFINE_float('learning_rate', 0.0005, 'The learning rate') +flags.DEFINE_boolean( + 'emb_variable', False, + 'If the word embedding is variables. If not, use one-hot encoding.') +flags.DEFINE_integer('emb_size', 4, 'The word embedding dimensions') +flags.DEFINE_integer('hidden_lstm_size', 2000, + 'The number of hidden units in LSTM.') +flags.DEFINE_boolean('norm_lstm', False, + 'If turn on the layer normalization for LSTM.') +flags.DEFINE_float('dropout_rate', 0.1, 'The learning rate') +flags.DEFINE_string( + 'reg_type', 'l2', + 'l2 or l1 regularization for parameters in lstm and dense layers.') +flags.DEFINE_float( + 'reg_weight', 0.0, + 'The regularization weight for parameters in lstm and dense layers.') +flags.DEFINE_integer('seq_len', 250, 'sequence length') +flags.DEFINE_float('mutation_rate', 0.0, 'Mutation rate for data augmentation.') +flags.DEFINE_integer( + 'filter_label', -1, + ('If only sequences from the class=filter_label are used for training.' + 'if -1, no filter.')) +flags.DEFINE_string('in_tr_data_dir', '/home/test_user04/dataset/before_2011_in_tr', + 'data directory of in-distribution training') +flags.DEFINE_string('in_val_data_dir', '/home/test_user04/dataset/between_2011-2016_in_val', + 'data directory of in-distribution validation') +flags.DEFINE_string('ood_val_data_dir', '/home/test_user04/dataset/between_2011-2016_ood_val', + 'data directory of OOD validation') +flags.DEFINE_string('out_dir', '/tmp/out_generative', + 'Directory where to write log and models.') +flags.DEFINE_boolean('save_meta', False, 'Save meta graph file for each ckpt.') +flags.DEFINE_string('master', '', 'TensorFlow master to use.') + +FLAGS = flags.FLAGS + + +def create_out_dir(params): + """Setup the output directory.""" + params.in_tr_data_dir = utils.clean_last_slash_if_any(params.in_tr_data_dir) + params.in_val_data_dir = utils.clean_last_slash_if_any(params.in_val_data_dir) + params.ood_val_data_dir = utils.clean_last_slash_if_any( + params.ood_val_data_dir) + + sub_dir = ('generative_l%d_bs%d_lr%.4f' + '_hr%d_nr%s_reg%s_regw%.6f_fi%d_mt%.2f') % ( + params.seq_len, params.batch_size, params.learning_rate, + params.hidden_lstm_size, params.norm_lstm, params.reg_type, + params.reg_weight, params.filter_label, params.mutation_rate) + log_dir = os.path.join(params.out_dir, sub_dir, 'log') + params.add_hparam('log_dir_in_tr', os.path.join(log_dir, 'in_tr')) + params.add_hparam('log_dir_in_val', os.path.join(log_dir, 'in_val')) + params.add_hparam('model_dir', log_dir.replace('log', 'model')) + + if not tf.gfile.Exists(params.out_dir): + tf.gfile.MakeDirs(params.out_dir) + if not tf.gfile.Exists(params.log_dir_in_tr): + tf.gfile.MakeDirs(params.log_dir_in_tr) + if not tf.gfile.Exists(params.log_dir_in_val): + tf.gfile.MakeDirs(params.log_dir_in_val) + if not tf.gfile.Exists(params.model_dir): + tf.gfile.MakeDirs(params.model_dir) + + tf.logging.info('model_dir=%s', params.model_dir) + + +def filter_for_label(features, target_label): + """A filter for dataset to get seqs with a specific label.""" + ### TODO(jjren) not working + return tf.equal(features['y'], + tf.convert_to_tensor(target_label, dtype=tf.int32)) + + +def load_datasets(params, mode_eval=False): + """load class labels, in_tr_data, in_val_data, ood_val_data.""" + if mode_eval: # For evaluation, no need to prepare training data + in_tr_dataset = None + else: + in_tr_file_list = [ + os.path.join(params.in_tr_data_dir, x) + for x in tf.gfile.ListDirectory(params.in_tr_data_dir) + if params.in_tr_file_pattern in x + ] + + # load in-distribution training sequence + in_tr_data_file_list = [x for x in in_tr_file_list if '.tfrecord' in x] + tf.logging.info('in_tr_data_file_list=%s', in_tr_data_file_list) + + def parse_single_tfexample_addmutations_short(unused_key, v): + return utils.parse_single_tfexample_addmutations(unused_key, v, + params.mutation_rate, + params.seq_len) + + # for training a background model, we mutate input sequences + if params.mutation_rate == 0: + in_tr_dataset = tf.data.TFRecordDataset(in_tr_data_file_list).map( + lambda v: utils.parse_single_tfexample(v, v)) + else: + in_tr_dataset = tf.data.TFRecordDataset(in_tr_data_file_list).map( + lambda v: parse_single_tfexample_addmutations_short(v, v)) + + if params.filter_label != -1: + + def filter_fn(v): + return filter_for_label(v, params.filter_label) + + in_tr_dataset = in_tr_dataset.filter(filter_fn) + + # in-distribution validation + in_val_data_file_list = [ + os.path.join(params.in_val_data_dir, x) + for x in tf.gfile.ListDirectory(params.in_val_data_dir) + if params.in_val_file_pattern in x and '.tfrecord' in x + ] + tf.logging.info('in_val_data_file_list=%s', in_val_data_file_list) + in_val_dataset = tf.data.TFRecordDataset( + in_val_data_file_list).map(lambda v: utils.parse_single_tfexample(v, v)) + + # ood validation + ood_val_data_file_list = [ + os.path.join(params.ood_val_data_dir, x) + for x in tf.gfile.ListDirectory(params.ood_val_data_dir) + if params.ood_val_file_pattern in x and '.tfrecord' in x + ] + tf.logging.info('ood_val_data_file_list=%s', ood_val_data_file_list) + ood_val_dataset = tf.data.TFRecordDataset( + ood_val_data_file_list).map(lambda v: utils.parse_single_tfexample(v, v)) + + return in_tr_dataset, in_val_dataset, ood_val_dataset + + +class SeqModel(object): + """DNA sequence modeling.""" + + def __init__(self, params, x, y0): + """Create the model.""" + self._params = params + + self.x = x#tf.placeholder(tf.int32,shape=[None,250]) + self.y0 =y0#tf.placeholder(tf.int32,shape=[None]) + #self._make_dataset() + self._make_placeholders() + if self._params.emb_variable: + self._make_variables() + else: + self._one_hot_encode_x() + self._make_rnn_model() + self._make_losses() + self._make_summary_stats() + self._make_train_op() + + def _make_dataset(self): + """make data generators.""" + self.handle = tf.placeholder(tf.string, shape=[]) + self.iterator = tf.data.Iterator.from_string_handle(self.handle, { + 'x': tf.int32, + 'y': tf.int32 + }, { + 'x': [None, self._params.seq_len], + 'y': [None] + }) + features = self.iterator.get_next() + self.x, self.y0 = features['x'], features['y'] + + def _make_placeholders(self): + """Make placeholders for dropout rate.""" + self.dropout_rate = tf.placeholder_with_default( + self._params.dropout_rate, shape=(), name='dropout_rnn') + + def _make_variables(self): + """make variables.""" + # emb_size must equal to vocab_size, + # otherwise exceed vocab will be encoded as zeros + tf.logging.info('using variable dict for embedding') + self.emb_dict = tf.Variable( + tf.one_hot( + list(range(self._params.vocab_size)), depth=self._params.emb_size)) + self.x_emb = tf.nn.embedding_lookup( + self.emb_dict, tf.cast(self.x, dtype=tf.int64), name='embx') + + def _one_hot_encode_x(self): + """Make embedding layer.""" + # input for encoder + tf.logging.info('use one hot encoding') + self.x_emb = tf.one_hot( + tf.cast(self.x, dtype=tf.int64), depth=self._params.vocab_size) + tf.logging.info('shape of x_emb=%s', self.x_emb.shape) + + def _make_rnn_model(self): + """Make rnn model.""" + self.y = tf.cast(self.x[:, 1:], dtype=tf.int64) + self.y_emb = tf.one_hot(self.y, depth=self._params.emb_size) + tf.logging.info('y.shape=%s', self.y.shape) + + lstm_fw_cell_g = contrib_rnn.LayerNormBasicLSTMCell( + self._params.hidden_lstm_size, + layer_norm=self._params.norm_lstm, + dropout_keep_prob=1 - self.dropout_rate) + lstm_hidden, _ = tf.nn.dynamic_rnn( + lstm_fw_cell_g, self.x_emb, dtype=tf.float32) + # stagger two directional vectors so that the backward RNN does not reveal + # medium.com/@plusepsilon/the-bidirectional-language-model-1f3961d1fb27 + self.logits = tf.layers.dense( + lstm_hidden[:, :-1, :], + units=self._params.vocab_size, + activation=None, + name='logits') + tf.logging.info('shape of logits=%s', self.logits.shape) + + # cross entropy + self.loss_i_t = tf.nn.softmax_cross_entropy_with_logits( + labels=self.y_emb, logits=self.logits) + self.loss_i = tf.reduce_mean(self.loss_i_t, axis=1) + + def _make_losses(self): + """make loss functions.""" + self.loss = tf.reduce_mean(self.loss_i) + # l2 norm + self.variables = tf.trainable_variables() + if self._params.reg_type == 'l2': + self.loss_reg = tf.add_n( + [tf.nn.l2_loss(v) for v in self.variables if 'bias' not in v.name]) + else: + self.loss_reg = tf.add_n([ + tf.reduce_sum(tf.abs(v)) + for v in self.variables + if 'bias' not in v.name + ]) + # total loss + self.loss_total = self.loss + self._params.reg_weight * self.loss_reg + + def _make_summary_stats(self): + """make summary stats.""" + probs = tf.nn.softmax(self.logits) + pred_words = tf.argmax(probs, axis=2) + self.acc_i_t = tf.equal(pred_words, tf.cast(self.y, dtype=tf.int64)) + self.acc_i = tf.reduce_mean(tf.cast(self.acc_i_t, dtype=tf.float32), axis=1) + self.acc = tf.reduce_mean(self.acc_i) + + self.summary = tf.summary.merge([ + tf.summary.scalar('loss', self.loss), + tf.summary.scalar('acc', self.acc), + tf.summary.scalar('loss_total', self.loss_total), + tf.summary.scalar('loss_reg', self.loss_reg) + ]) + + def _make_train_op(self): + """make train op.""" + # training operations + optimizer = tf.train.AdamOptimizer(self._params.learning_rate) + grads = optimizer.compute_gradients( + self.loss_total, var_list=self.variables) + self.minimize = optimizer.apply_gradients(grads) + + def reset(self): + """prepare sess.""" + # setup session and + config = tf.ConfigProto() + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["use_off_line"].b = True #在昇腾AI处理器执行训练 + #custom_op.parameter_map["mix_compile_mode"].b = False #关闭混合计算 + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") #开启混合精度 + #custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") #开启Auto tune 自动调优 + #custom_op.parameter_map["customize_dtypes"].s=tf.compat.as_bytes("./customize_dtypes.cfg") + config.graph_options.rewrite_options.remapping = RewriterConfig.OFF # 必须显式关闭 + config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF # 必须显式关闭 + self.sess = tf.Session(config=config) + self.sess.run(tf.global_variables_initializer()) + self.tr_writer = tf.summary.FileWriter(self._params.log_dir_in_tr, + self.sess.graph) + self.val_writer = tf.summary.FileWriter(self._params.log_dir_in_val, + self.sess.graph) + self.saver = tf.train.Saver(max_to_keep=500) + + + def print_log(self, str, print_time=True): + if print_time: + # localtime = time.asctime(time.localtime(time.time())) + str = time.strftime("[%m.%d.%y|%X] ", time.localtime()) + str + with open("work_dir/oodback.log", 'a') as f: + print(str, file=f) + + def train(self, in_tr_dataset, in_val_dataset, ood_val_dataset, prev_steps): + """training steps.""" + in_tr_dataset = in_tr_dataset.repeat().shuffle(1000).batch( + self._params.batch_size) + in_val_dataset = in_val_dataset.repeat().shuffle(1000).batch( + self._params.batch_size) + ood_val_dataset = ood_val_dataset.repeat().shuffle(1000).batch( + self._params.batch_size) + + in_tr_iterator = in_tr_dataset.make_one_shot_iterator() + in_val_iterator = in_val_dataset.make_one_shot_iterator() + ood_val_iterator = ood_val_dataset.make_one_shot_iterator() + + self.in_tr_handle = self.sess.run(in_tr_iterator.string_handle()) + self.in_val_handle = self.sess.run(in_val_iterator.string_handle()) + self.ood_val_handle = self.sess.run(ood_val_iterator.string_handle()) + start = 0 + end = 0 + num_steps = self._params.num_steps + for i in range(prev_steps, num_steps, 1): + print(i, "step") + start = time.time() + _, in_tr_loss, _, in_tr_acc, in_tr_summary = self.sess.run( + [self.minimize, self.loss, self.loss_i, self.acc, self.summary], + feed_dict={ + self. + self.dropout_rate: self._params.dropout_rate + }) + end = time.time() + print(end-start) + if i % self._params.val_freq == 0: + in_val_loss, in_val_loss_i, in_val_acc, in_val_summary = self.sess.run( + [self.loss, self.loss_i, self.acc, self.summary], + feed_dict={ + self.handle: self.in_val_handle, + self.dropout_rate: 0 + }) + + ood_val_loss, ood_val_loss_i, ood_val_acc, _ = self.sess.run( + [self.loss, self.loss_i, self.acc, self.summary], + feed_dict={ + self.handle: self.ood_val_handle, + self.dropout_rate: 0 + }) + + # auc using raw likelihood, larger for OOD + neg = in_val_loss_i + pos = ood_val_loss_i + auc = roc_auc_score([0] * neg.shape[0] + [1] * pos.shape[0], + np.concatenate((neg, pos), axis=0)) + + tf.logging.info( + ('i=%d \t in_tr_loss=%.4f, in_val_loss=%.4f, ood_val_loss=%.4f\n' + 'in_tr_acc=%.4f, in_val_acc=%.4f, ood_val_acc=%.4f\n' + 'auc=%.4f'), i, in_tr_loss, in_val_loss, ood_val_loss, in_tr_acc, + in_val_acc, ood_val_acc, auc) + self.print_log('i={} \t in_tr_loss={:.4f}, in_val_loss={:.4f}, ood_val_loss={:.4f}\n' + 'in_tr_acc={:.4f}, in_val_acc={:.4f}, ood_val_acc={:.4f}\n' + 'auc={:.4f}'.format(i, in_tr_loss, in_val_loss, ood_val_loss, in_tr_acc, + in_val_acc, ood_val_acc, auc)) + + _ = self.saver.save( + self.sess, + os.path.join(self._params.model_dir, 'model_{}.ckpt'.format(i)), + write_meta_graph=self._params.save_meta) # if meta file is too big + + self.tr_writer.add_summary(in_tr_summary, i) + self.tr_writer.flush() + self.val_writer.add_summary(in_val_summary, i) + self.val_writer.flush() + + auc_summary = tf.Summary() + auc_summary.value.add( + tag='AUROC_using_raw_likelihood', simple_value=auc) + self.val_writer.add_summary(auc_summary, i) + self.val_writer.flush() + + def finish(self): + tf.logging.info('training is done') + self.tr_writer.close() + self.val_writer.close() + self.saver.close() + + def restore_from_ckpt(self, ckpt_path): + """restore model from a ckpt.""" + # meta_file = ckpt_path + '.meta' + # saver = tf.train.import_meta_graph(meta_file) + self.saver.restore(self.sess, ckpt_path) + + def pred_from_ckpt(self, x_in, y_in, x_out, y_out, num_samples): + #in_val_dataset = in_val_dataset.batch(self._params.batch_size) + #ood_val_dataset = ood_val_dataset.batch(self._params.batch_size) + #in_val_iterator = in_val_dataset.make_one_shot_iterator() + #ood_val_iterator = ood_val_dataset.make_one_shot_iterator() + #self.in_val_handle = self.sess.run(in_val_iterator.string_handle()) + #self.ood_val_handle = self.sess.run(ood_val_iterator.string_handle()) + x_in = x_in.eval(session=self.sess)#np.array(x_in) + y_in = y_in.eval(session=self.sess)#np.array(y_in) + x_out = x_out.eval(session=self.sess)#np.array(x_out) + y_out = y_out.eval(session=self.sess)#np.array(y_out) + for _ in range(num_samples // self._params.batch_size): + in_val_loss, in_val_loss_i, in_val_acc, in_val_summary = self.sess.run( + [self.loss, self.loss_i, self.acc, self.summary], + feed_dict={ + self.x: x_in, + self.y0: y_in, + self.dropout_rate: self._params.dropout_rate + }) + + ood_val_loss, ood_val_loss_i, ood_val_acc, _ = self.sess.run( + [self.loss, self.loss_i, self.acc, self.summary], + feed_dict={ + self.x: x_out, + self.y0: y_out, + self.dropout_rate: self._params.dropout_rate + }) + + # auc using raw likelihood, larger for OOD + neg = in_val_loss_i + pos = ood_val_loss_i + auc = roc_auc_score([0] * neg.shape[0] + [1] * pos.shape[0], + np.concatenate((neg, pos), axis=0)) + self.print_log(' in_val_loss={:.4f}, ood_val_loss={:.4f}\n' + ' in_val_acc={:.4f}, ood_val_acc={:.4f}\n' + 'auc={:.4f}'.format( in_val_loss, ood_val_loss, in_val_acc, ood_val_acc, auc)) + + # """make prediction from a ckpt.""" + # test_dataset = test_dataset.batch(self._params.batch_size) + # test_iterator = test_dataset.make_one_shot_iterator() + # + # self.test_handle = self.sess.run(test_iterator.string_handle()) + # + # loss_test = [] + # loss_total_test = [] + # acc_test = [] + # y_test = [] + # x_test = [] + # for _ in range(num_samples // self._params.batch_size): + # out = self.sess.run( + # [self.loss_i, self.loss_total, self.acc_i, self.y0, self.y], + # feed_dict={ + # self.handle: self.test_handle, + # self.dropout_rate: 0 + # }) + # loss_test.append(out[0]) + # loss_total_test.append(out[1]) + # acc_test.append(out[2]) + # y_test.append(out[3]) + # x_test.append(out[4]) + # return loss_test, loss_total_test, acc_test, y_test, x_test + + + + + +def main(_): + + tf.logging.set_verbosity(tf.logging.INFO) + random.seed(FLAGS.random_seed) + + params = contrib_training.HParams( + num_steps=FLAGS.num_steps, + val_freq=FLAGS.val_freq, + seq_len=FLAGS.seq_len, + batch_size=FLAGS.batch_size, + emb_variable=FLAGS.emb_variable, + emb_size=FLAGS.emb_size, + vocab_size=4, + hidden_lstm_size=FLAGS.hidden_lstm_size, + norm_lstm=FLAGS.norm_lstm, + dropout_rate=FLAGS.dropout_rate, + learning_rate=FLAGS.learning_rate, + reg_type=FLAGS.reg_type, + reg_weight=FLAGS.reg_weight, + out_dir=FLAGS.out_dir, + in_tr_data_dir=FLAGS.in_tr_data_dir, + in_val_data_dir=FLAGS.in_val_data_dir, + ood_val_data_dir=FLAGS.ood_val_data_dir, + master=FLAGS.master, + save_meta=FLAGS.save_meta, + filter_label=FLAGS.filter_label, + mutation_rate=FLAGS.mutation_rate, + ) + + # setup output directory + create_out_dir(params) + + # load datasets + params.add_hparam('in_tr_file_pattern', 'in_tr') + params.add_hparam('in_val_file_pattern', 'in_val') + params.add_hparam('ood_val_file_pattern', 'ood_val') + (in_tr_dataset, in_val_dataset, ood_val_dataset) = load_datasets(params) + + # print parameter settings + tf.logging.info(params) + with tf.gfile.GFile( + os.path.join(params.model_dir, 'params.json'), mode='w') as f: + f.write(json.dumps(params.to_json(), sort_keys=True)) + + # construct model + model = SeqModel(params) + model.reset() + + ## if previous model ckpt exists, restore the model from there + tf.logging.info('model dir=%s', os.path.join(params.out_dir, '*.ckpt.index')) + prev_steps, ckpt_file = utils.get_latest_ckpt(params.model_dir) + if ckpt_file: + tf.logging.info('previous ckpt exist, prev_steps=%s', prev_steps) + model.restore_from_ckpt(ckpt_file) + + # training + model.train(in_tr_dataset, in_val_dataset, ood_val_dataset, prev_steps) + + +if __name__ == '__main__': + tf.app.run() diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/msame b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/msame new file mode 100644 index 0000000000000000000000000000000000000000..23f78bc08b510a5631f695639ade3a25c7ee85a0 Binary files /dev/null and b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/msame differ diff --git a/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/utils.py b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5604632c9e064a22ad901eccbb19a4a29f8382ff --- /dev/null +++ b/ACL_TensorFlow/contrib/cv/OOD_ID2046_for_ACL/utils.py @@ -0,0 +1,205 @@ +# coding=utf-8 +# Copyright 2021 The Google Research Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Functions for loading and encoding sequences. + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import json +import os +import time +import tensorflow as tf +import yaml +#from tensorflow.contrib import training as contrib_training +#from npu_bridge.npu_init import * + +def parse_single_tfexample(_, serialized_example): + """Parsing serialized pb2 example.""" + # read data from serialized examples + features = tf.parse_single_example( + serialized_example, + features={ + 'x': tf.FixedLenFeature([], tf.string), + 'y': tf.FixedLenFeature([], tf.int64), + # z is for sequence origins, + # i.e. which genome and which position the seq is from + # 'z': tf.VarLenFeature(tf.string) + }) + seq_str = features['x'] + + x_str = tf.string_split([seq_str], delimiter=' ').values + features['x'] = tf.string_to_number(x_str, out_type=tf.int32) + features['y'] = tf.cast(features['y'], dtype=tf.int32) + + return features + + +def mutate_x(x, mutation_rate, seq_len): + """Randomly and independently mutate a sequence based on a mutation rate.""" + # generate mutations for all positions, + # in order to be different than itselves, the mutations have to be >= 1 + # mute the untargeted positions by multiple mask (1 for targeted) + # adding the mutations to the original, mod 4 if necessary + tf.set_random_seed(time.time()) + + def true_fn(): + """no mutations.""" + return x + + def false_fn(): + """add mutations.""" + mask = tf.cast( + tf.multinomial(tf.log([[1 - mutation_rate, mutation_rate]]), seq_len), + tf.int32)[0] + possible_mutations = tf.random_uniform([seq_len], + minval=1, + maxval=4, + dtype=tf.int32) + x_new = tf.mod(x + mask * possible_mutations, 4) + return x_new + + return tf.cond(tf.equal(mutation_rate, 0), true_fn, false_fn) + + +def parse_single_tfexample_addmutations(_, serialized_example, mutation_rate, + seq_len): + """Parsing serialized pb2 example and add mutations.""" + # read data from serialized examples + features = tf.parse_single_example( + serialized_example, + features={ + 'x': tf.FixedLenFeature([], tf.string), + 'y': tf.FixedLenFeature([], tf.int64), + # z is for sequence origins, + # i.e. which genome and which position the seq is from + # 'z': tf.VarLenFeature(tf.string) + }) + seq_str = features['x'] + + x_str0 = tf.string_split([seq_str], delimiter=' ').values + x = tf.string_to_number(x_str0, out_type=tf.int32) + + x_new = mutate_x(x, mutation_rate, seq_len) + + features['x'] = x_new + features['y'] = tf.cast(features['y'], dtype=tf.int32) + + return features + + +def compute_label_weights_using_sample_size(label_dict, label_sample_size): + """Compute weights for each class according to their sample sizes. + + Args: + label_dict: a dictionary with class labels as keys (strings) and encoded + label index as values (ints). + label_sample_size: a dictionary with class labels as keys (strings) and + sample size as values (ints). + + Returns: + label_weights: weights for labels. + """ + + # keys: encoded class labels, values: sample size + label_code_sample_size = { + label_dict[label]: label_sample_size[label] + for label in label_sample_size.keys() + } + print('label_code_sample_size={}'.format(label_code_sample_size)) + # create label weights = 1/label_sample_size + label_weights = [ + 1 / float(label_code_sample_size[idx]) + for idx in range(len(label_code_sample_size)) + ] + # label_weights = [ + # x / float(sum(label_weights0)) * len(label_weights0) / float( + # params.batch_size) for x in label_weights0 + # ] + return label_weights + + +def get_latest_ckpt(tr_model_dir): + """find previous ckpt and return the filename of the latest ckpt.""" + tf.logging.info('model dir={}'.format( + os.path.join(tr_model_dir, '*.ckpt.index'))) + list_of_ckpt = tf.gfile.Glob(os.path.join( + tr_model_dir, + '*.ckpt.index')) # * means all if need specific format then *.csv + # tf.logging.info('list_of_ckpt={}'.format(list_of_ckpt)) + if list_of_ckpt: + steps = [ + int(os.path.basename(x).split('model_')[1].split('.ckpt')[0]) + for x in list_of_ckpt + ] + prev_steps, latest_file0 = [ + (x, y) for x, y in sorted(zip(steps, list_of_ckpt), reverse=True) + ][0] + latest_file = latest_file0.replace('.index', '') + # latest_file = max(list_of_ckpt, key=os.path.getctime) does not work + tf.logging.info('previous model exists: {}'.format(latest_file)) + # prev_steps = int(latest_file.split('.')[0].split('_')[1]) + return prev_steps, latest_file.replace('.meta', '') + else: + prev_steps = 0 + return 0, None + + +def get_ckpt_at_step(tr_model_dir, step): + """find previous ckpt and return the filename of the latest ckpt.""" + tf.logging.info('model dir={}'.format( + os.path.join(tr_model_dir, '*.ckpt.index'))) + ckpt_file_pattern = os.path.join(tr_model_dir, '*_{}.ckpt.index'.format(step)) + ckpt_file = tf.gfile.Glob( + ckpt_file_pattern) # * means all if need specific format then *.csv + if ckpt_file: + return step, ckpt_file[0].replace('.index', '') + else: + tf.logging.info('Cannot find the ckpt file at step {}'.format(step)) + return step, None + + +def clean_last_slash_if_any(path): + return path[:-1] if path.endswith('/') else path + + +def generate_hparams(params_yaml_file): + """Create tf.HParams object based on params loaded from yaml file.""" + with tf.gfile.Open(params_yaml_file, mode='rb') as f: + params_json = yaml.safe_load(f) + params_dict = json.loads(params_json) + + params = contrib_training.HParams() + for key, value in params_dict.items(): + params.add_hparam(key, value) + params.master = '' # should be 'local' or '' + params.dropout_rate = 0.0 # turn off dropout for eval + + return params